ドキュメントを更新し、型注釈を追加してコードの可読性とメンテナンス性を向上。各モジュールの使用例や依存関係を明示化し、エラーハンドリングを改善。

2025-10-09 17:35:55 +09:00
parent b26129af68
commit 690a2f081b
5 changed files with 276 additions and 60 deletions
--- a/src-python/models/transcription/transcription_whisper.py
+++ b/src-python/models/transcription/transcription_whisper.py
@@ -1,6 +1,17 @@
+"""Helpers for downloading and loading Whisper (faster-whisper) models.
+
+This module exposes small utilities used by the transcription subsystem:
+- downloadFile: stream-download a file with optional progress callback
+- checkWhisperWeight: quick local availability check
+- downloadWhisperWeight: download model artifacts from HF hub
+- getWhisperModel: construct and return a WhisperModel instance
+
+The functions are defensive: failures are caught and reported by the caller.
+"""
+
 from os import path as os_path, makedirs as os_makedirs
 from requests import get as requests_get
-from typing import Callable
+from typing import Callable, Optional
 import huggingface_hub
 from faster_whisper import WhisperModel
 import logging
@@ -30,24 +41,36 @@ _FILENAMES = [
    "vocabulary.json",
 ]

-def downloadFile(url, path, func=None):
+def downloadFile(url: str, path: str, func: Optional[Callable[[float], None]] = None) -> None:
+    """Download a file from `url` to `path`.
+
+    Args:
+        url: remote URL to download from
+        path: local filepath to write
+        func: optional callback(progress: float) called with a 0.0-1.0 progress
+    """
    try:
        res = requests_get(url, stream=True)
        res.raise_for_status()
        file_size = int(res.headers.get('content-length', 0))
        total_chunk = 0
        with open(os_path.join(path), 'wb') as file:
-            for chunk in res.iter_content(chunk_size=1024*2000):
+            for chunk in res.iter_content(chunk_size=1024 * 2000):
                file.write(chunk)
-                if isinstance(func, Callable):
+                if callable(func) and file_size:
                    total_chunk += len(chunk)
-                    func(total_chunk/file_size)
+                    func(total_chunk / file_size)
    except Exception:
+        # Silent failure here; caller may re-check or log
        pass

-def checkWhisperWeight(root, weight_type):
+def checkWhisperWeight(root: str, weight_type: str) -> bool:
+    """Return True if a Whisper model for `weight_type` is loadable from disk.
+
+    This attempts to construct a local `WhisperModel` with local_files_only=True
+    to verify required files exist and are compatible.
+    """
    path = os_path.join(root, "weights", "whisper", weight_type)
-    result = False
    try:
        WhisperModel(
            path,
@@ -58,23 +81,47 @@ def checkWhisperWeight(root, weight_type):
            num_workers=1,
            local_files_only=True,
        )
-        result = True
+        return True
    except Exception:
-        pass
-    return result
+        return False

-def downloadWhisperWeight(root, weight_type, callback=None, end_callback=None):
+def downloadWhisperWeight(
+    root: str,
+    weight_type: str,
+    callback: Optional[Callable[[float], None]] = None,
+    end_callback: Optional[Callable[[], None]] = None,
+) -> None:
+    """Ensure Whisper weight files are present locally; download them if missing.
+
+    Args:
+        root: project root where `weights/whisper` lives
+        weight_type: key from `_MODELS` (eg. "tiny", "base")
+        callback: progress callback for the main model file
+        end_callback: called when download completes
+    """
    path = os_path.join(root, "weights", "whisper", weight_type)
    os_makedirs(path, exist_ok=True)
-    if checkWhisperWeight(root, weight_type) is False:
+    if not checkWhisperWeight(root, weight_type):
        for filename in _FILENAMES:
            file_path = os_path.join(path, filename)
            url = huggingface_hub.hf_hub_url(_MODELS[weight_type], filename)
            downloadFile(url, file_path, func=callback if filename == "model.bin" else None)
-    if isinstance(end_callback, Callable):
+    if callable(end_callback):
        end_callback()

-def getWhisperModel(root, weight_type, device="cpu", device_index=0, compute_type="auto"):
+def getWhisperModel(
+    root: str,
+    weight_type: str,
+    device: str = "cpu",
+    device_index: int = 0,
+    compute_type: str = "auto",
+) -> WhisperModel:
+    """Return a `WhisperModel` instance loaded from local weights.
+
+    Raises:
+        ValueError: when VRAM shortage is detected (wrapped from RuntimeError)
+        Exception: other loading errors are propagated.
+    """
    path = os_path.join(root, "weights", "whisper", weight_type)
    if compute_type == "auto":
        compute_type = getBestComputeType(device, device_index)
@@ -90,11 +137,10 @@ def getWhisperModel(root, weight_type, device="cpu", device_index=0, compute_typ
        )
        return model
    except RuntimeError as e:
-        # VRAM不足エラーの検出
+        # Detect VRAM out-of-memory-like errors and raise a clear ValueError
        error_message = str(e)
        if "CUDA out of memory" in error_message or "CUBLAS_STATUS_ALLOC_FAILED" in error_message:
            raise ValueError("VRAM_OUT_OF_MEMORY", error_message)
-        # その他のエラーは通常通り再送出
        raise

 if __name__ == "__main__":