From eca5e31429daeb1da5ac241afc3127a78f5a130e Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 18:53:42 +0900 Subject: [PATCH] =?UTF-8?q?torch=E3=81=A8ctranslate2=E3=81=AE=E3=82=A4?= =?UTF-8?q?=E3=83=B3=E3=83=9D=E3=83=BC=E3=83=88=E3=82=92=E3=82=AC=E3=83=BC?= =?UTF-8?q?=E3=83=89=E3=81=97=E3=80=81=E5=AE=89=E5=85=A8=E3=81=AA=E3=83=87?= =?UTF-8?q?=E3=83=95=E3=82=A9=E3=83=AB=E3=83=88=E3=82=92=E6=8F=90=E4=BE=9B?= =?UTF-8?q?=E3=80=82=E5=9E=8B=E6=B3=A8=E9=87=88=E3=81=A8docstring=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=E3=81=97=E3=81=A6=E5=8F=AF=E8=AA=AD=E6=80=A7?= =?UTF-8?q?=E3=82=92=E5=90=91=E4=B8=8A=E3=80=82=E3=83=AD=E3=82=B0=E8=A8=AD?= =?UTF-8?q?=E5=AE=9A=E3=81=AE=E9=87=8D=E8=A4=87=E3=83=8F=E3=83=B3=E3=83=89?= =?UTF-8?q?=E3=83=A9=E8=BF=BD=E5=8A=A0=E3=82=92=E9=98=B2=E3=81=90=E3=83=81?= =?UTF-8?q?=E3=82=A7=E3=83=83=E3=82=AF=E3=82=92=E5=B0=8E=E5=85=A5=E3=80=82?= =?UTF-8?q?encodeBase64=E3=81=AF=E3=83=87=E3=82=B3=E3=83=BC=E3=83=89?= =?UTF-8?q?=E5=A4=B1=E6=95=97=E6=99=82=E3=81=AB=E7=A9=BA=E8=BE=9E=E6=9B=B8?= =?UTF-8?q?=E3=82=92=E8=BF=94=E3=81=99=E3=82=88=E3=81=86=E3=81=AB=E5=A4=89?= =?UTF-8?q?=E6=9B=B4=E3=80=82getComputeDeviceList=E3=81=AFGPU=E6=83=85?= =?UTF-8?q?=E5=A0=B1=E5=8F=96=E5=BE=97=E5=A4=B1=E6=95=97=E6=99=82=E3=81=AB?= =?UTF-8?q?CPU=E6=83=85=E5=A0=B1=E3=82=92=E8=BF=94=E3=81=99=E3=82=88?= =?UTF-8?q?=E3=81=86=E3=81=AB=E4=BE=8B=E5=A4=96=E4=BF=9D=E8=AD=B7=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/docs/modules/utils.md | 58 ++++++++++ src-python/utils.py | 177 ++++++++++++++++++++----------- 2 files changed, 176 insertions(+), 59 deletions(-) diff --git a/src-python/docs/modules/utils.md b/src-python/docs/modules/utils.md index 7a7b2289..9b7d4e71 100644 --- a/src-python/docs/modules/utils.md +++ b/src-python/docs/modules/utils.md @@ -1,3 +1,61 @@ +## utils モジュール(src-python/utils.py) + +このドキュメントは `src-python/utils.py` に対する最近のリファクタ内容、公開 API、利用上の注意点、テスト方法をまとめたものです。 + +### 概要 +- `utils.py` はプロジェクト全体で使われる汎用ユーティリティ群を提供します。主な内容: + - ネットワーク接続チェック (`isConnectedNetwork`) + - ソケットの空きポート確認 (`isAvailableWebSocketServer`) + - IP アドレス検証 (`isValidIpAddress`) + - 計算デバイス一覧取得 (`getComputeDeviceList` / `getBestComputeType`) + - Base64 デコード (JSON) (`encodeBase64`) + - ロガー設定/ログ出力ヘルパー (`setupLogger`, `printLog`, `printResponse`, `errorLogging`) + +### 今回のリファクタ(要点) +- Optional 依存へのフォールバック: `torch` と `ctranslate2` が存在しない環境でも動作するよう、import をガードし、安全なデフォルトを返す実装にしました。 +- 型注釈と docstring を追加して可読性を向上させました。 +- ログ設定の重複ハンドラ追加を防ぐチェックを導入しました。 +- `encodeBase64` はデコード失敗時に例外を投げず空辞書を返すように(安全側)変更しました。 +- `getComputeDeviceList` は GPU 情報取得で失敗しても CPU 情報を返すように例外保護を行いました。 + +### 重要な利用上の注意(breaking/behavior changes) +- Optional 依存 + - `torch` が無い環境では GPU 情報は取得できません(`getComputeDeviceList` は CPU エントリのみ返します)。 + - `ctranslate2` の `get_supported_compute_types` が無い場合は空リストを返します。 + → 環境に依存する挙動を想定して、呼び出し側は存在チェックやフォールバックを実装してください。 + +- `encodeBase64` の挙動 + - 不正な base64/JSON を入力した場合、例外を投げず `{}` を返します。既存コードが例外を期待している場合は注意してください。 + +- `isAvailableWebSocketServer` の仕様 + - 指定した host:port に対して bind が成功すれば True を返します(「使用中かどうか」を判定する用途と逆の意味合いになることがあるため注意)。 + +- ロギング + - `setupLogger` は同じログファイルに対するハンドラを重複して追加しません。`errorLogging()` はログ書き込みに失敗した場合でも最後に trace を stdout に出力するフォールバックがあります。 + +### API 使い方(短い例) + +```python +from utils import getComputeDeviceList, encodeBase64, printResponse + +devices = getComputeDeviceList() +print(devices) + +obj = encodeBase64('eyAia2V5IjogInZhbHVlIiB9') # -> {'key': 'value'} + +printResponse(200, '/health', {'status': 'ok'}) +``` + +### テスト方針 +- optional 依存の違いを扱うため、ユニットテストは `torch` と `ctranslate2` をモックして行うことを推奨します。 +- 例: `getComputeDeviceList()` は GPU がない環境でも CPU のエントリを返すことを確認するテスト。 + +### トラブルシュート +- ログファイルの書き込みエラー: 権限やディスク容量を確認してください。`error.log` と `process.log` の存在と権限をチェックします。 +- `getComputeDeviceList()` が空しか返さない場合、`torch` または `ctranslate2` のインストールを確認してください。 + +### 変更履歴 +- 2025-10-09: 型注釈・docstring 追加、optional import ガード、ロギング堅牢化。 # utils.py — 関数一覧と使用例 目的: 共通ユーティリティ(ログ、JSON 出力、ネットワーク/ポート検査、デバイス/計算タイプ列挙、バリデーション等)を提供します。 diff --git a/src-python/utils.py b/src-python/utils.py index fe4faa90..1e250e87 100644 --- a/src-python/utils.py +++ b/src-python/utils.py @@ -1,12 +1,22 @@ import base64 -from typing import Any, List, Dict +from typing import Any, List, Dict, Optional import json import traceback import logging from logging.handlers import RotatingFileHandler -import torch -from ctranslate2 import get_supported_compute_types +try: + import torch +except Exception: + torch = None # type: ignore + +try: + from ctranslate2 import get_supported_compute_types +except Exception: + # Fallback: if ctranslate2 is not installed, provide a safe stub. + def get_supported_compute_types(device: str, device_index: int) -> List[str]: + return [] + import requests import ipaddress import socket @@ -47,32 +57,32 @@ def validateDictStructure(data: dict, structure: dict) -> bool: return True def isConnectedNetwork(url="http://www.google.com", timeout=3) -> bool: + """Quick network connectivity check by requesting `url`. + + Returns True when a 200 response is returned within `timeout` seconds. + """ try: response = requests.get(url, timeout=timeout) return response.status_code == 200 except requests.RequestException: return False -def isAvailableWebSocketServer(host:str, port:int) -> bool: - """WebSocketサーバーのポートが使用中かどうかを確認する""" - response = True +def isAvailableWebSocketServer(host: str, port: int) -> bool: + """Return True if the given host/port appear available for binding. + + Note: This attempts to bind a TCP socket to the address. If bind + succeeds the function returns True (meaning the address was available). + """ try: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as chk: - try: - # SO_REUSEADDRを設定してソケットの再利用を許可 - chk.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - chk.bind((host, port)) - # シャットダウン前にリッスン状態にする必要はない - chk.close() - except Exception: - response = False + chk.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + chk.bind((host, port)) + return True except Exception: - errorLogging() - response = False - - return response + return False def isValidIpAddress(ip_address: str) -> bool: + """Return True if `ip_address` is a valid IPv4/IPv6 address.""" try: ipaddress.ip_address(ip_address) return True @@ -80,7 +90,12 @@ def isValidIpAddress(ip_address: str) -> bool: return False def getComputeDeviceList() -> List[Dict[str, Any]]: - compute_types = [ + """Return a list of available compute devices and supported compute types. + + The returned list contains dicts describing CPU and (if available) + CUDA devices. This function is defensive to missing optional packages. + """ + compute_types: List[Dict[str, Any]] = [ { "device": "cpu", "device_index": 0, @@ -89,32 +104,47 @@ def getComputeDeviceList() -> List[Dict[str, Any]]: } ] - if torch.cuda.is_available(): - for device_index in range(torch.cuda.device_count()): - gpu_device_name = torch.cuda.get_device_name(device_index) - gpu_compute_types = ["auto"] + list(get_supported_compute_types("cuda", device_index)) + try: + if torch is not None and hasattr(torch, "cuda") and torch.cuda.is_available(): + for device_index in range(torch.cuda.device_count()): + gpu_device_name = torch.cuda.get_device_name(device_index) + gpu_compute_types = ["auto"] + list(get_supported_compute_types("cuda", device_index)) - # デバイスごとの計算タイプの制限 - if "GTX" in gpu_device_name: - unsupported_types = {"int8_bfloat16", "bfloat16", "float16", "int8"} - gpu_compute_types = [t for t in gpu_compute_types if t not in unsupported_types] - elif not any(keyword in gpu_device_name for keyword in ["RTX", "Tesla", "A100", "Quadro"]): - gpu_compute_types = ["float32"] + # デバイスごとの計算タイプの制限 + if "GTX" in gpu_device_name: + unsupported_types = {"int8_bfloat16", "bfloat16", "float16", "int8"} + gpu_compute_types = [t for t in gpu_compute_types if t not in unsupported_types] + elif not any(keyword in gpu_device_name for keyword in ["RTX", "Tesla", "A100", "Quadro"]): + gpu_compute_types = ["float32"] - compute_types.append( - { - "device": "cuda", - "device_index": device_index, - "device_name": gpu_device_name, - "compute_types": gpu_compute_types, - } - ) + compute_types.append( + { + "device": "cuda", + "device_index": device_index, + "device_name": gpu_device_name, + "compute_types": gpu_compute_types, + } + ) + except Exception: + # If querying GPU devices fails, return at least the CPU entry + errorLogging() return compute_types def getBestComputeType(device: str, device_index: int) -> str: - compute_types = set(get_supported_compute_types(device, device_index)) - device_name = "cpu" if device == "cpu" else torch.cuda.get_device_name(device_index) + """Pick the best available compute type for a device. + + Falls back to "float32" when no preferred type is available. + """ + try: + compute_types = set(get_supported_compute_types(device, device_index)) + except Exception: + compute_types = set() + + try: + device_name = "cpu" if device == "cpu" else (torch.cuda.get_device_name(device_index) if torch is not None else "") + except Exception: + device_name = "" # デバイスごとの優先計算タイプ preferred_types = { @@ -141,14 +171,26 @@ def getBestComputeType(device: str, device_index: int) -> str: return "float32" -def encodeBase64(data:str) -> dict: - return json.loads(base64.b64decode(data).decode('utf-8')) +def encodeBase64(data: str) -> Dict[str, Any]: + """Decode a base64-encoded JSON string and return the parsed object. -def removeLog(): - with open('process.log', 'w', encoding="utf-8") as f: - f.write("") + Returns an empty dict on failure. + """ + try: + return json.loads(base64.b64decode(data).decode('utf-8')) + except Exception: + errorLogging() + return {} -def setupLogger(name, log_file, level=logging.INFO): +def removeLog() -> None: + """Truncate the process log file (process.log) if present.""" + try: + with open('process.log', 'w', encoding="utf-8") as f: + f.write("") + except Exception: + errorLogging() + +def setupLogger(name: str, log_file: str, level: int = logging.INFO) -> logging.Logger: """ 特定の名前とログファイルを持つロガーを設定します。 """ @@ -174,13 +216,17 @@ def setupLogger(name, log_file, level=logging.INFO): formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_handler.setFormatter(formatter) - # ロガーにハンドラーを追加 - logger.addHandler(file_handler) + # ロガーにハンドラーを追加(重複追加を避ける) + if not any(isinstance(h, RotatingFileHandler) and getattr(h, 'baseFilename', None) == getattr(file_handler, 'baseFilename', None) for h in logger.handlers): + logger.addHandler(file_handler) return logger -process_logger = None -def printLog(log:str, data:Any=None) -> None: +process_logger: Optional[logging.Logger] = None + + +def printLog(log: str, data: Any = None) -> None: + """Log and print a structured process log message.""" global process_logger if process_logger is None: process_logger = setupLogger("process", "process.log", logging.INFO) @@ -194,7 +240,11 @@ def printLog(log:str, data:Any=None) -> None: serialized = json.dumps(response) print(serialized, flush=True) -def printResponse(status:int, endpoint:str, result:Any=None) -> None: +def printResponse(status: int, endpoint: str, result: Any = None) -> None: + """Log and print a structured response object. + + If JSON serialization fails, record the error and emit a generic error payload. + """ global process_logger if process_logger is None: process_logger = setupLogger("process", "process.log", logging.INFO) @@ -208,28 +258,37 @@ def printResponse(status:int, endpoint:str, result:Any=None) -> None: try: serialized_response = json.dumps(response) - except OSError as e: - errorLogging() # Log the full traceback of the OSError - process_logger.error(f"Problematic response object before json.dumps: {response}") - process_logger.error(f"OSError during json.dumps: {e}") - # Optionally, print a generic error JSON to stdout if needed, or re-raise - # For now, we'll print a simple error message to stdout as a fallback + except Exception as e: + errorLogging() # Log the full traceback of the exception + try: + process_logger.error(f"Problematic response object before json.dumps: {response}") + process_logger.error(f"Exception during json.dumps: {e}") + except Exception: + pass + # Fallback generic error payload error_json = json.dumps({ "status": 500, "endpoint": endpoint, - "result": {"error": "Failed to serialize response due to OSError", "details": str(e)} + "result": {"error": "Failed to serialize response", "details": str(e)}, }) print(error_json, flush=True) else: print(serialized_response, flush=True) -error_logger = None +error_logger: Optional[logging.Logger] = None + + def errorLogging() -> None: + """Log the current exception traceback to the error logger.""" global error_logger if error_logger is None: error_logger = setupLogger("error", "error.log", logging.ERROR) - error_logger.error(traceback.format_exc()) + try: + error_logger.error(traceback.format_exc()) + except Exception: + # As a last resort, print the traceback to stdout + print(traceback.format_exc(), flush=True) if __name__ == "__main__": print(getComputeDeviceList()) \ No newline at end of file