From 7a6ed850b5dfebc64daa74410872ce7c77851fb7 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Sat, 30 Aug 2025 21:13:15 +0900 Subject: [PATCH 01/92] [Chore] Localization: Add quotations. (Asked english localization from this edited version.) --- locales/en.yml | 20 ++++++++++---------- locales/ja.yml | 22 +++++++++++----------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/locales/en.yml b/locales/en.yml index 065d01f6..0b7405e9 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -244,7 +244,7 @@ config_page: title: "Settings" original: "Original" translated: "Translated" - for_multi_translation: For Multi-Translation + for_multi_translation: "For Multi-Translation" send_message_format: label: "Message Format (Send)" desc: "You can change the decoration of the message you want to send." @@ -294,16 +294,16 @@ config_page: save_success: "Settings have been saved." plugin_notifications: - downloading: Downloading the plugin. - downloaded_success: Downloaded successfully. - downloaded_error: Download failed. + downloading: "Downloading the plugin." + downloaded_success: "Downloaded successfully." + downloaded_error: "Download failed." - updating: Updating the plugin. - updated_success: Updated successfully. - updated_error: Update failed. + updating: "Updating the plugin." + updated_success: "Updated successfully." + updated_error: "Update failed." - disabled_out_of_support: The plugin has been disabled. It's not supported on this VRCT version. + disabled_out_of_support: "The plugin has been disabled. It's not supported on this VRCT version." disabled_due_to_an_error: "An error was detected while running the plugin. Please report this to the plugin developer." - is_enabled: The plugin has enabled. - is_disabled: The plugin has disabled. \ No newline at end of file + is_enabled: "The plugin has enabled." + is_disabled: "The plugin has disabled." \ No newline at end of file diff --git a/locales/ja.yml b/locales/ja.yml index 978a5b29..d3da554e 100644 --- a/locales/ja.yml +++ b/locales/ja.yml @@ -243,7 +243,7 @@ config_page: title: "設定" original: "原文" translated: "翻訳" - for_multi_translation: 多言語翻訳用 + for_multi_translation: "多言語翻訳用" send_message_format: label: メッセージフォーマット(送信) desc: VRChatで相手に実際に見えるフォーマットを変更できます。 @@ -293,16 +293,16 @@ config_page: save_success: "設定を保存しました。" plugin_notifications: - downloading: プラグインをダウンロード中。 - downloaded_success: プラグインのダウンロードが完了しました。 - downloaded_error: プラグインのダウンロードに失敗しました。 + downloading: "プラグインをダウンロード中。" + downloaded_success: "プラグインのダウンロードが完了しました。" + downloaded_error: "プラグインのダウンロードに失敗しました。" - updating: プラグインをアップデート中。 - updated_success: プラグインのアップデートが完了しました。 - updated_error: プラグインのアップデートに失敗しました。 + updating: "プラグインをアップデート中。" + updated_success: "プラグインのアップデートが完了しました。" + updated_error: "プラグインのアップデートに失敗しました。" - disabled_out_of_support: 現在のバージョンとの互換性がありません。プラグインを無効にしました。 - disabled_due_to_an_error: プラグイン実行中にエラーを検知しました。プラグイン開発者に報告してください。 + disabled_out_of_support: "現在のバージョンとの互換性がありません。プラグインを無効にしました。" + disabled_due_to_an_error: "プラグイン実行中にエラーを検知しました。プラグイン開発者に報告してください。" - is_enabled: プラグインを有効にしました。 - is_disabled: プラグインを無効にしました。 \ No newline at end of file + is_enabled: "プラグインを有効にしました。" + is_disabled: "プラグインを無効にしました。" \ No newline at end of file From e7ae70add0942552835dbc0159a1f7d89e1d6890 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Sat, 30 Aug 2025 22:45:09 +0900 Subject: [PATCH 02/92] [Update] Localization: Add and adjust keys for each language. (Asked korean localization from this edited version.) --- locales/en.yml | 1 - locales/ko.yml | 65 ++++++++++++++++++++++++++++++++++++++++++++- locales/zh-Hans.yml | 65 ++++++++++++++++++++++++++++++++++++++++++++- locales/zh-Hant.yml | 65 ++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 192 insertions(+), 4 deletions(-) diff --git a/locales/en.yml b/locales/en.yml index 0b7405e9..f311ced3 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -231,7 +231,6 @@ config_page: send_received_message_to_vrc: label: "Send Received Message To VRChat" desc: "Send the message you received from the speaker's voice to VRChat's chatbox." - message_format_common: example_view: title: "Preview" diff --git a/locales/ko.yml b/locales/ko.yml index 39df6d4a..58684102 100644 --- a/locales/ko.yml +++ b/locales/ko.yml @@ -24,6 +24,9 @@ common_error: invalid_value_speaker_phrase_timeout: "0 이상에서 '{{speaker_record_timeout_label}}'보다 작을 수 없습니다." invalid_value_speaker_max_phrase: "0 이상의 숫자만 설정할 수 있습니다." +common_warning: + unable_to_use_osc_query: + main_page: translation: "번역" transcription_send: "음성인식 (마이크)" @@ -79,6 +82,7 @@ config_page: transcription: "음성인식" others: "기타" hotkeys: + plugins: advanced_settings: "고급 설정" device: @@ -115,6 +119,9 @@ config_page: hide: "숨김 (Enter 키를 사용하여 전송)" show: "표시" show_and_disable_enter_key: "표시 (Enter 키 전송 비활성화)" + show_resend_button: + label: + desc: font_family: label: "폰트" ui_language: @@ -204,6 +211,7 @@ config_page: others: section_label_sounds: + section_label_message_formats: auto_clear_the_message_box: label: "챗박스 자동 삭제" send_only_translated_messages: @@ -223,6 +231,25 @@ config_page: send_received_message_to_vrc: label: desc: + message_format_common: + example_view: + title: + original_translated: + original_translated_multi: + translated_only_multi: + translated_only: + original_only: + settings: + title: + original: + translated: + for_multi_translation: + send_message_format: + label: + desc: + received_message_format: + label: + desc: hotkeys: toggle_vrct_visibility: @@ -234,6 +261,18 @@ config_page: toggle_transcription_receive: label: + plugins: + downloaded_version: + latest_version: + available_after_updating: + unavailable_downloaded: + no_latest_info: + using_latest_version: + available_latest_version: + unavailable_latest_version: + available_in_latest_vrct_version: + unavailable_not_downloaded: + advanced_settings: osc_ip_address: label: "OSC IP 주소" @@ -242,4 +281,28 @@ config_page: open_config_filepath: label: "설정 파일 열기" switch_compute_device: - label: \ No newline at end of file + label: + enable_websocket: + label: + websocket_host: + label: + websocket_port: + label: + + notifications: + save_success: + +plugin_notifications: + downloading: + downloaded_success: + downloaded_error: + + updating: + updated_success: + updated_error: + + disabled_out_of_support: + disabled_due_to_an_error: + + is_enabled: + is_disabled: \ No newline at end of file diff --git a/locales/zh-Hans.yml b/locales/zh-Hans.yml index dd1dafc7..3e2ec51f 100644 --- a/locales/zh-Hans.yml +++ b/locales/zh-Hans.yml @@ -24,6 +24,9 @@ common_error: invalid_value_speaker_phrase_timeout: "转录间隔时间大于0秒且不能小于「{{speaker_record_timeout_label}}」" invalid_value_speaker_max_phrase: "数值应为 0 以上" +common_warning: + unable_to_use_osc_query: + main_page: translation: "翻译" transcription_send: "你的语音转文字" @@ -79,6 +82,7 @@ config_page: transcription: "转录" others: "其他" hotkeys: + plugins: advanced_settings: "高级设置" device: @@ -115,6 +119,9 @@ config_page: hide: "隐藏 (可使用回车发送信息)" show: "显示" show_and_disable_enter_key: "显示,并且停用‘回车发送信息’" + show_resend_button: + label: + desc: font_family: label: "字体" ui_language: @@ -204,6 +211,7 @@ config_page: others: section_label_sounds: + section_label_message_formats: auto_clear_the_message_box: label: "发言后自动清空chatbox" send_only_translated_messages: @@ -223,6 +231,25 @@ config_page: send_received_message_to_vrc: label: desc: + message_format_common: + example_view: + title: + original_translated: + original_translated_multi: + translated_only_multi: + translated_only: + original_only: + settings: + title: + original: + translated: + for_multi_translation: + send_message_format: + label: + desc: + received_message_format: + label: + desc: hotkeys: toggle_vrct_visibility: @@ -234,6 +261,18 @@ config_page: toggle_transcription_receive: label: + plugins: + downloaded_version: + latest_version: + available_after_updating: + unavailable_downloaded: + no_latest_info: + using_latest_version: + available_latest_version: + unavailable_latest_version: + available_in_latest_vrct_version: + unavailable_not_downloaded: + advanced_settings: osc_ip_address: label: "OSC IP 地址" @@ -242,4 +281,28 @@ config_page: open_config_filepath: label: "打开设置文件" switch_compute_device: - label: \ No newline at end of file + label: + enable_websocket: + label: + websocket_host: + label: + websocket_port: + label: + + notifications: + save_success: + +plugin_notifications: + downloading: + downloaded_success: + downloaded_error: + + updating: + updated_success: + updated_error: + + disabled_out_of_support: + disabled_due_to_an_error: + + is_enabled: + is_disabled: \ No newline at end of file diff --git a/locales/zh-Hant.yml b/locales/zh-Hant.yml index 08210a7d..f091fd03 100644 --- a/locales/zh-Hant.yml +++ b/locales/zh-Hant.yml @@ -24,6 +24,9 @@ common_error: invalid_value_speaker_phrase_timeout: "不能小於「{{speaker_record_timeout_label}}」,應為 0 或更高。" invalid_value_speaker_max_phrase: "可以設置 0 或更高的數值。" +common_warning: + unable_to_use_osc_query: + main_page: translation: "翻譯" transcription_send: "麥克風轉文字" @@ -79,6 +82,7 @@ config_page: transcription: "轉錄" others: "其他" hotkeys: + plugins: advanced_settings: "進階設定" device: @@ -115,6 +119,9 @@ config_page: hide: "隱藏(使用 Enter 鍵發送)" show: "顯示" show_and_disable_enter_key: "顯示並停用 Enter 鍵發送" + show_resend_button: + label: + desc: font_family: label: "字型" ui_language: @@ -204,6 +211,7 @@ config_page: others: section_label_sounds: + section_label_message_formats: auto_clear_the_message_box: label: "自動清除 Chatbox" send_only_translated_messages: @@ -223,6 +231,25 @@ config_page: send_received_message_to_vrc: label: desc: + message_format_common: + example_view: + title: + original_translated: + original_translated_multi: + translated_only_multi: + translated_only: + original_only: + settings: + title: + original: + translated: + for_multi_translation: + send_message_format: + label: + desc: + received_message_format: + label: + desc: hotkeys: toggle_vrct_visibility: @@ -234,6 +261,18 @@ config_page: toggle_transcription_receive: label: + plugins: + downloaded_version: + latest_version: + available_after_updating: + unavailable_downloaded: + no_latest_info: + using_latest_version: + available_latest_version: + unavailable_latest_version: + available_in_latest_vrct_version: + unavailable_not_downloaded: + advanced_settings: osc_ip_address: label: "OSC IP 位址" @@ -242,4 +281,28 @@ config_page: open_config_filepath: label: "打開設定文件" switch_compute_device: - label: \ No newline at end of file + label: + enable_websocket: + label: + websocket_host: + label: + websocket_port: + label: + + notifications: + save_success: + +plugin_notifications: + downloading: + downloaded_success: + downloaded_error: + + updating: + updated_success: + updated_error: + + disabled_out_of_support: + disabled_due_to_an_error: + + is_enabled: + is_disabled: \ No newline at end of file From 8867d180d22a214def354a08307661522439c346 Mon Sep 17 00:00:00 2001 From: Soumt Date: Wed, 3 Sep 2025 20:03:41 +0900 Subject: [PATCH 03/92] [Update] Localization: Updated Korean localization --- locales/ko.yml | 216 ++++++++++++++++++++++++------------------------- 1 file changed, 108 insertions(+), 108 deletions(-) diff --git a/locales/ko.yml b/locales/ko.yml index 58684102..2294e974 100644 --- a/locales/ko.yml +++ b/locales/ko.yml @@ -9,11 +9,11 @@ common: common_error: no_device_mic: "마이크 디바이스를 찾지 못했습니다." no_device_speaker: "스피커 디바이스를 찾지 못했습니다." - threshold_invalid_value: - failed_download_weight_ctranslate2: - failed_download_weight_whisper: - translation_limit: - deepl_auth_key_invalid_length: + threshold_invalid_value: "{{min}}부터 {{max}}까지의 숫자로 설정할 수 있습니다." + failed_download_weight_ctranslate2: "CTranslate2 모델 다운로드에 실패했습니다." + failed_download_weight_whisper: "Whisper 모델 다운로드에 실패했습니다." + translation_limit: "번역 엔진 사용 제한에 도달했거나 일시적으로 이용 제한이 걸렸습니다." + deepl_auth_key_invalid_length: "DeepL 인증 키의 문자 수가 잘못되었습니다." deepl_auth_key_failed_authentication: "인증키가 잘못되었거나 API 사용 제한이 상한에 도달했습니다." invalid_value_mic_record_timeout: "0 이상에서 '{{mic_phrase_timeout_label}}'보다 클 수 없습니다." @@ -25,7 +25,7 @@ common_error: invalid_value_speaker_max_phrase: "0 이상의 숫자만 설정할 수 있습니다." common_warning: - unable_to_use_osc_query: + unable_to_use_osc_query: "OSC IP 주소 설정으로 인해 OSC 데이터 수신이 불가능하므로, 아래 기능이 자동으로 비활성화되었습니다." main_page: translation: "번역" @@ -42,7 +42,7 @@ main_page: translator_label_default: "기본값" translator_selector: - is_selected_same_language: + is_selected_same_language: "「{{your_language}}」와 「{{target_language}}」에 동일한 언어가 선택되어 있으므로, 「{{ctranslate2}}」만 사용할 수 있습니다." message_log: all: "전체" @@ -50,7 +50,7 @@ main_page: received: "수신" system: "시스템" - resend_button_on_hover_desc: + resend_button_on_hover_desc: "길게 눌러서 보내기" state_text_enabled: "Enabled" state_text_disabled: "Disabled" @@ -63,33 +63,33 @@ main_page: updating: "업데이트 중..." update_modal: - cpu_desc: - cuda_desc: - cuda_compare_cpu_desc: - cuda_disk_space_desc: - close_modal: - download_latest_and_restart: - is_latest_version_already: - is_current_compute_device: + cpu_desc: "처리 장치로 CPU만을 사용합니다." + cuda_desc: "처리 장치로 CPU와 NVIDIA GPU를 선택할 수 있습니다." + cuda_compare_cpu_desc: "GPU 선택 시, CPU에 비해 처리가 빠릅니다." + cuda_disk_space_desc: "약 {{size}}의 디스크 용량이 필요합니다." + close_modal: "닫기" + download_latest_and_restart: "최신 버전이 다운로드되면 앱이 자동으로 재시작됩니다." + is_latest_version_already: "이미 최신 버전을 사용 중입니다." + is_current_compute_device: "현재 사용 중인 버전" config_page: version: "버전 {{version}}" - model_download_button_label: + model_download_button_label: "다운로드" side_menu_labels: - device: + device: "장치" appearance: "모양" translation: "번역" transcription: "음성인식" others: "기타" - hotkeys: - plugins: + hotkeys: "단축키" + plugins: "플러그인" advanced_settings: "고급 설정" device: - check_volume: - label_auto_select: + check_volume: "음량 확인" + label_auto_select: "자동 선택" label_host: "호스트/드라이버" - label_device: + label_device: "장치" mic_host_device: label: "마이크 장치" mic_dynamic_energy_threshold: @@ -120,8 +120,8 @@ config_page: show: "표시" show_and_disable_enter_key: "표시 (Enter 키 전송 비활성화)" show_resend_button: - label: - desc: + label: "재전송 버튼 표시" + desc: "보낸 메시지 로그에 마우스를 올리면 재전송 버튼이 표시됩니다. 클릭하면 편집 모드로 전환되며, 길게 누르면 재전송됩니다." font_family: label: "폰트" ui_language: @@ -134,19 +134,19 @@ config_page: small: "일반 모델 ({{capacity}})" large: "정밀 모델 ({{capacity}})" ctranslate2_compute_device: - label: + label: "AI 번역 {{ctranslate2}} 처리 장치" deepl_auth_key: label: "DeepL 인증키" desc: "사용시 메인화면에 있는 {{translator}}를 DeepL_API로 변경해 주세요.\n지원하지 않는 언어도 있습니다." open_auth_key_webpage: "DeepL 계정 페이지 열기" - save: - edit: + save: "저장" + edit: "편집" auth_key_success: "인증키 갱신이 완료되었습니다." transcription: section_label_mic: "마이크" section_label_speaker: "스피커" - section_label_transcription_engines: + section_label_transcription_engines: "음성 인식 엔진" mic_record_timeout: label: "최대 무음 시간" desc: "무음을 감지하고 설정된 시간(초)만큼의 시간이 지나면 음성 입력이 종료된 것으로 판단합니다." @@ -171,47 +171,47 @@ config_page: label: "최대 입력 절(phrases) 수" desc: "식된 단어 수의 하한값으로, 이 수치를 초과하는 경우에만 결과를 로그에 표시합니다." select_transcription_engine: - label: + label: "음성 인식에 사용할 엔진" whisper_weight_type: label: "Whisper 모델 타입" - desc: + desc: "용량이 큰 모델일수록 정확도는 높지만, 그만큼 CPU나 GPU를 많이 차지합니다. ※ 특히 medium보다 용량이 큰 모델은 CPU/GPU 성능에 따라 사용 자체가 어려울 수 있습니다." model_template: "{{model_name}} 모델 ({{capacity}})" recommended_model_template: "{{model_name}} 모델 ({{capacity}}) (권장)" whisper_compute_device: - label: + label: "Whisper에서 사용할 처리 장치" vr: - single_line: - multi_lines: - overlay_enable: - restore_default_settings: - position: - rotation: - x_position: - y_position: - z_position: - x_rotation: - y_rotation: - z_rotation: + single_line: "한 줄" + multi_lines: "여러 줄" + overlay_enable: "활성화" + restore_default_settings: "초기값으로 되돌리기" + position: "위치" + rotation: "회전" + x_position: "X축 (좌우)" + y_position: "Y축 (상하)" + z_position: "Z축 (앞뒤)" + x_rotation: "X축 회전" + y_rotation: "Y축 회전" + z_rotation: "Z축 회전" sample_text_button: - start: - stop: - sample_text: - opacity: - ui_scaling: - display_duration: - fadeout_duration: - common_settings: - tracker: - hmd: - left_hand: - right_hand: + start: "샘플 텍스트를 오버레이에 전송" + stop: "전송 중지" + sample_text: "샘플 텍스트" + opacity: "투명도" + ui_scaling: "UI 크기 조정" + display_duration: "표시 시간" + fadeout_duration: "페이드 아웃 시간" + common_settings: "공통 설정" + tracker: "표시할 트래커의 위치" + hmd: "HMD" + left_hand: "왼손" + right_hand: "오른손" overlay_show_only_translated_messages: - label: + label: "번역된 메시지만 표시" others: - section_label_sounds: - section_label_message_formats: + section_label_sounds: "사운드" + section_label_message_formats: "메시지 형식" auto_clear_the_message_box: label: "챗박스 자동 삭제" send_only_translated_messages: @@ -220,58 +220,58 @@ config_page: label: "대화 로그 자동 저장" desc: "logs 폴더에 텍스트 파일로 로그가 저장됩니다." vrc_mic_mute_sync: - label: - desc: + label: "VRC 마이크 음소거 동기화" + desc: "VRChat의 마이크가 음소거 상태인 동안에는 메시지를 VRChat에 전송하지 않습니다. ※ 약간의 지연이 발생할 수 있습니다. 또한 눌러서 말하기(Push-To-Talk) 기능은 지원되지 않습니다." send_message_to_vrc: label: "VRChat에 메시지 전송" desc: "VRChat에 메시지를 보내지 않고 사용할 수 있는 방법이 있지만 지원되지 않습니다. VRChat에 메시지를 보내려면 이 기능을 활성화하세요." notification_vrc_sfx: - label: - desc: + label: "채팅 전송 시 채팅창 알림음 재생" + desc: "이 기능을 비활성화하면 다른 사람이 들을 수 있는 채팅창 알림음을 울리지 않고 조용히 전송합니다." send_received_message_to_vrc: - label: - desc: + label: "수신한 메시지를 VRChat에 전송" + desc: "스피커에서 인식된 내용을 텍스트로 변환한 메시지가 VRChat에 전송됩니다." message_format_common: example_view: - title: - original_translated: - original_translated_multi: - translated_only_multi: - translated_only: - original_only: + title: "미리 보기" + original_translated: "원문 + 번역문" + original_translated_multi: "원문 + 번역문 (다국어)" + translated_only_multi: "번역문만 (다국어)" + translated_only: "번역문만" + original_only: "원문만" settings: - title: - original: - translated: - for_multi_translation: + title: "설정" + original: "원문" + translated: "\n번역문" + for_multi_translation: "다국어 번역용" send_message_format: - label: - desc: + label: "메시지 형식 (송신)" + desc: "VRChat에서 상대방에게 실제로 보이는 형식을 변경할 수 있습니다." received_message_format: - label: - desc: + label: "메시지 형식 (Speaker2Chatbox)" + desc: "현재로서는 Speaker2Chatbox로 전송할 때의 표시용으로 사용됩니다." hotkeys: toggle_vrct_visibility: - label: + label: "VRCT 최소화/활성화 전환" toggle_translation: - label: + label: "「{{translation}}」 켜기/끄기 전환" toggle_transcription_send: - label: + label: "「{{transcription_send}}」 켜기/끄기 전환" toggle_transcription_receive: - label: + label: "「{{transcription_receive}}」 켜기/끄기 전환" plugins: - downloaded_version: - latest_version: - available_after_updating: - unavailable_downloaded: - no_latest_info: - using_latest_version: - available_latest_version: - unavailable_latest_version: - available_in_latest_vrct_version: - unavailable_not_downloaded: + downloaded_version: "다운로드된 버전: {{downloaded_version}}" + latest_version: "최신 버전: {{latest_version}}" + available_after_updating: "최신 버전으로 업데이트 후 이용 가능" + unavailable_downloaded: "현재 사용 중인 VRCT 버전과의 호환성 문제로 인해 이용 불가" + no_latest_info: "최신 정보를 가져올 수 없습니다" + using_latest_version: "최신 버전을 사용 중" + available_latest_version: "최신 버전을 이용 가능" + unavailable_latest_version: "최신 버전은 현재 이용 불가" + available_in_latest_vrct_version: "VRCT 최신 버전에서 이용 가능" + unavailable_not_downloaded: "현재 이용 불가" advanced_settings: osc_ip_address: @@ -281,28 +281,28 @@ config_page: open_config_filepath: label: "설정 파일 열기" switch_compute_device: - label: + label: "VRCT CPU/GPU 버전 전환" enable_websocket: - label: + label: "WebSocket 서버 활성화" websocket_host: - label: + label: "WebSocket 호스트" websocket_port: - label: + label: "WebSocket 포트" notifications: - save_success: + save_success: "설정을 저장했습니다." plugin_notifications: - downloading: - downloaded_success: - downloaded_error: + downloading: "플러그인을 다운로드 중입니다." + downloaded_success: "플러그인 다운로드가 완료되었습니다." + downloaded_error: "플러그인 다운로드에 실패했습니다." - updating: - updated_success: - updated_error: + updating: "플러그인을 업데이트 중입니다." + updated_success: "플러그인 업데이트가 완료되었습니다." + updated_error: "플러그인 업데이트에 실패했습니다." - disabled_out_of_support: - disabled_due_to_an_error: + disabled_out_of_support: "현재 버전과 호환되지 않습니다. 플러그인을 비활성화했습니다." + disabled_due_to_an_error: "플러그인 실행 중 오류를 감지했습니다. 플러그인 개발자에게 보고해 주세요." - is_enabled: - is_disabled: \ No newline at end of file + is_enabled: "플러그인을 활성화했습니다." + is_disabled: "플러그인을 비활성화했습니다." From 9c2123fe31b4980a87ecf5ecdbe413a5d7e0df69 Mon Sep 17 00:00:00 2001 From: Soumt Date: Wed, 3 Sep 2025 20:38:06 +0900 Subject: [PATCH 04/92] [Chore] Localization: Address minor Korean localization review feedback --- locales/ko.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/locales/ko.yml b/locales/ko.yml index 2294e974..bdbf118e 100644 --- a/locales/ko.yml +++ b/locales/ko.yml @@ -42,7 +42,7 @@ main_page: translator_label_default: "기본값" translator_selector: - is_selected_same_language: "「{{your_language}}」와 「{{target_language}}」에 동일한 언어가 선택되어 있으므로, 「{{ctranslate2}}」만 사용할 수 있습니다." + is_selected_same_language: "'{{your_language}}'와 '{{target_language}}'에 동일한 언어가 선택되어 있으므로, 「{{ctranslate2}}」만 사용할 수 있습니다." message_log: all: "전체" @@ -174,7 +174,7 @@ config_page: label: "음성 인식에 사용할 엔진" whisper_weight_type: label: "Whisper 모델 타입" - desc: "용량이 큰 모델일수록 정확도는 높지만, 그만큼 CPU나 GPU를 많이 차지합니다. ※ 특히 medium보다 용량이 큰 모델은 CPU/GPU 성능에 따라 사용 자체가 어려울 수 있습니다." + desc: "용량이 큰 모델일수록 정확도는 높지만, 그만큼 CPU나 GPU를 많이 차지합니다. * 특히 medium보다 용량이 큰 모델은 CPU/GPU 성능에 따라 사용 자체가 어려울 수 있습니다." model_template: "{{model_name}} 모델 ({{capacity}})" recommended_model_template: "{{model_name}} 모델 ({{capacity}}) (권장)" whisper_compute_device: @@ -221,7 +221,7 @@ config_page: desc: "logs 폴더에 텍스트 파일로 로그가 저장됩니다." vrc_mic_mute_sync: label: "VRC 마이크 음소거 동기화" - desc: "VRChat의 마이크가 음소거 상태인 동안에는 메시지를 VRChat에 전송하지 않습니다. ※ 약간의 지연이 발생할 수 있습니다. 또한 눌러서 말하기(Push-To-Talk) 기능은 지원되지 않습니다." + desc: "VRChat의 마이크가 음소거 상태인 동안에는 메시지를 VRChat에 전송하지 않습니다. * 약간의 지연이 발생할 수 있습니다. 또한 눌러서 말하기(Push-To-Talk) 기능은 지원되지 않습니다." send_message_to_vrc: label: "VRChat에 메시지 전송" desc: "VRChat에 메시지를 보내지 않고 사용할 수 있는 방법이 있지만 지원되지 않습니다. VRChat에 메시지를 보내려면 이 기능을 활성화하세요." @@ -255,11 +255,11 @@ config_page: toggle_vrct_visibility: label: "VRCT 최소화/활성화 전환" toggle_translation: - label: "「{{translation}}」 켜기/끄기 전환" + label: "'{{translation}}' 켜기/끄기 전환" toggle_transcription_send: - label: "「{{transcription_send}}」 켜기/끄기 전환" + label: "'{{transcription_send}}' 켜기/끄기 전환" toggle_transcription_receive: - label: "「{{transcription_receive}}」 켜기/끄기 전환" + label: "'{{transcription_receive}}' 켜기/끄기 전환" plugins: downloaded_version: "다운로드된 버전: {{downloaded_version}}" From c9dbd13a3a05f5fbe6631ce8a7693471487db05a Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Thu, 4 Sep 2025 13:49:49 +0900 Subject: [PATCH 05/92] [Chore] Adjust new line codes. --- locales/ko.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/locales/ko.yml b/locales/ko.yml index bdbf118e..52594be3 100644 --- a/locales/ko.yml +++ b/locales/ko.yml @@ -194,7 +194,7 @@ config_page: y_rotation: "Y축 회전" z_rotation: "Z축 회전" sample_text_button: - start: "샘플 텍스트를 오버레이에 전송" + start: "샘플 텍스트를\n오버레이에 전송" stop: "전송 중지" sample_text: "샘플 텍스트" opacity: "투명도" @@ -242,7 +242,7 @@ config_page: settings: title: "설정" original: "원문" - translated: "\n번역문" + translated: "번역문" for_multi_translation: "다국어 번역용" send_message_format: label: "메시지 형식 (송신)" From cad0f796fa4dab21691fa7f0971b5c180bdcf242 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Fri, 5 Sep 2025 10:44:23 +0900 Subject: [PATCH 06/92] [Update] Transcription: Add UI. The user can config 'mic/speaker avg logprob' and 'mic/speaker no speech prob' that is related with Whisper's parameter. --- .../setting_box/appearance/Appearance.jsx | 3 + .../transcription/Transcription.jsx | 195 ++++++++++++++++++ .../configs/transcription/useTranscription.js | 99 +++++++++ src-ui/logics/useReceiveRoutes.js | 10 + src-ui/store.js | 5 + 5 files changed, 312 insertions(+) diff --git a/src-ui/app/config_page/setting_section/setting_box/appearance/Appearance.jsx b/src-ui/app/config_page/setting_section/setting_box/appearance/Appearance.jsx index 1f2f9eb4..cc7e730f 100644 --- a/src-ui/app/config_page/setting_section/setting_box/appearance/Appearance.jsx +++ b/src-ui/app/config_page/setting_section/setting_box/appearance/Appearance.jsx @@ -69,6 +69,7 @@ const UiScalingContainer = () => { asyncUpdateBreakPoint(); }, [currentUiScaling.data]); + // [Duplicated] const createMarks = (min, max) => { const marks = []; for (let value = min; value <= max; value += 10) { @@ -111,6 +112,7 @@ export const MessageLogUiScalingContainer = () => { setUiMessageLogUiScaling(currentMessageLogUiScaling.data); }, [currentMessageLogUiScaling.data]); + // [Duplicated] const createMarks = (min, max) => { const marks = []; for (let value = min; value <= max; value += 10) { @@ -207,6 +209,7 @@ const TransparencyContainer = () => { setUiTransparency(currentTransparency.data); }, [currentTransparency.data]); + // [Duplicated] const createMarks = (min, max) => { const marks = []; for (let value = min; value <= max; value += 10) { diff --git a/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx b/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx index 279c1d9d..7416c970 100644 --- a/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx +++ b/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx @@ -1,3 +1,4 @@ +import { useEffect, useState } from "react"; import { useI18n } from "@useI18n"; import styles from "./Transcription.module.scss"; import { updateLabelsById, genNumObjArray } from "@utils"; @@ -12,6 +13,7 @@ import { RadioButtonContainer, DropdownMenuContainer, ComputeDeviceContainer, + SliderContainer, } from "../_templates/Templates"; import { @@ -24,6 +26,7 @@ export const Transcription = () => { + ); }; @@ -353,4 +356,196 @@ const findKeyByDeviceValue = (devices, target_value) => { } } return null; +}; + + + + + +const Advanced_Container = () => { + const { t } = useI18n(); + return ( +
+ + {/* */} + + + + +
+ ); + + +}; + +export const MicAvgLogprobContainer = () => { + const { t } = useI18n(); + const { currentMicAvgLogprob, setMicAvgLogprob } = useTranscription(); + const [ui_mic_avg_logprob, setUiMicAvgLogprob] = useState(currentMicAvgLogprob.data); + + const onchangeFunction = (value) => { + setUiMicAvgLogprob(value); + }; + const onchangeCommittedFunction = (value) => { + setMicAvgLogprob(value); + }; + useEffect(() => { + setUiMicAvgLogprob(currentMicAvgLogprob.data); + }, [currentMicAvgLogprob.data]); + + // [Duplicated] + const createMarks = (min, max) => { + const marks = []; + for (let value = min; value <= max; value += 0.2) { + value = parseFloat(value.toFixed(1)); + marks.push({ value, label: `${value}` }); + } + return marks; + }; + + const marks = createMarks(-2, 0); + + return ( + + ); +}; + +export const MicNoSpeechProbContainer = () => { + const { t } = useI18n(); + const { currentMicNoSpeechProb, setMicNoSpeechProb } = useTranscription(); + const [ui_mic_no_speech_prob, setUiMicNoSpeechProb] = useState(currentMicNoSpeechProb.data); + + const onchangeFunction = (value) => { + setUiMicNoSpeechProb(value); + }; + const onchangeCommittedFunction = (value) => { + setMicNoSpeechProb(value); + }; + useEffect(() => { + setUiMicNoSpeechProb(currentMicNoSpeechProb.data); + }, [currentMicNoSpeechProb.data]); + + // [Duplicated] + const createMarks = (min, max) => { + const marks = []; + for (let value = min; value <= max; value += 0.1) { + value = parseFloat(value.toFixed(1)); + marks.push({ value, label: `${value}` }); + } + return marks; + }; + + const marks = createMarks(0, 1); + + return ( + + ); +}; + +export const SpeakerAvgLogprobContainer = () => { + const { t } = useI18n(); + const { currentSpeakerAvgLogprob, setSpeakerAvgLogprob } = useTranscription(); + const [ui_speaker_avg_logprob, setUiSpeakerAvgLogprob] = useState(currentSpeakerAvgLogprob.data); + + const onchangeFunction = (value) => { + setUiSpeakerAvgLogprob(value); + }; + const onchangeCommittedFunction = (value) => { + setSpeakerAvgLogprob(value); + }; + useEffect(() => { + setUiSpeakerAvgLogprob(currentSpeakerAvgLogprob.data); + }, [currentSpeakerAvgLogprob.data]); + + // [Duplicated] + const createMarks = (min, max) => { + const marks = []; + for (let value = min; value <= max; value += 0.2) { + value = parseFloat(value.toFixed(1)); + marks.push({ value, label: `${value}` }); + } + return marks; + }; + + const marks = createMarks(-2, 0); + + return ( + + ); +}; + +export const SpeakerNoSpeechProbContainer = () => { + const { t } = useI18n(); + const { currentSpeakerNoSpeechProb, setSpeakerNoSpeechProb } = useTranscription(); + const [ui_speaker_no_speech_prob, setUiSpeakerNoSpeechProb] = useState(currentSpeakerNoSpeechProb.data); + + const onchangeFunction = (value) => { + setUiSpeakerNoSpeechProb(value); + }; + const onchangeCommittedFunction = (value) => { + setSpeakerNoSpeechProb(value); + }; + useEffect(() => { + setUiSpeakerNoSpeechProb(currentSpeakerNoSpeechProb.data); + }, [currentSpeakerNoSpeechProb.data]); + + // [Duplicated] + const createMarks = (min, max) => { + const marks = []; + for (let value = min; value <= max; value += 0.1) { + value = parseFloat(value.toFixed(1)); + marks.push({ value, label: `${value}` }); + } + return marks; + }; + + const marks = createMarks(0, 1); + + return ( + + ); }; \ No newline at end of file diff --git a/src-ui/logics/configs/transcription/useTranscription.js b/src-ui/logics/configs/transcription/useTranscription.js index 147aa9e6..294b0473 100644 --- a/src-ui/logics/configs/transcription/useTranscription.js +++ b/src-ui/logics/configs/transcription/useTranscription.js @@ -14,6 +14,11 @@ import { useStore_SelectedWhisperWeightType, useStore_WhisperWeightTypeStatus, + + useStore_MicAvgLogprob, + useStore_MicNoSpeechProb, + useStore_SpeakerAvgLogprob, + useStore_SpeakerNoSpeechProb, } from "@store"; import { useStdoutToPython } from "@useStdoutToPython"; import { transformToIndexedArray } from "@utils"; @@ -41,6 +46,13 @@ export const useTranscription = () => { const { currentSelectableWhisperComputeDeviceList, updateSelectableWhisperComputeDeviceList, pendingSelectableWhisperComputeDeviceList } = useStore_SelectableWhisperComputeDeviceList(); const { currentSelectedWhisperComputeDevice, updateSelectedWhisperComputeDevice, pendingSelectedWhisperComputeDevice } = useStore_SelectedWhisperComputeDevice(); + // Advanced Settings + const { currentMicAvgLogprob, updateMicAvgLogprob, pendingMicAvgLogprob } = useStore_MicAvgLogprob(); + const { currentMicNoSpeechProb, updateMicNoSpeechProb, pendingMicNoSpeechProb } = useStore_MicNoSpeechProb(); + const { currentSpeakerAvgLogprob, updateSpeakerAvgLogprob, pendingSpeakerAvgLogprob } = useStore_SpeakerAvgLogprob(); + const { currentSpeakerNoSpeechProb, updateSpeakerNoSpeechProb, pendingSpeakerNoSpeechProb } = useStore_SpeakerNoSpeechProb(); + + // Mic const getMicRecordTimeout = () => { pendingMicRecordTimeout(); @@ -276,6 +288,67 @@ export const useTranscription = () => { showNotification_SaveSuccess(); }; + // Advanced (Mic Avg Logprob) + const getMicAvgLogprob = () => { + pendingMicAvgLogprob(); + asyncStdoutToPython("/get/data/mic_avg_logprob"); + }; + + const setMicAvgLogprob = (selected_mic_avg_logprob) => { + pendingMicAvgLogprob(); + asyncStdoutToPython("/set/data/mic_avg_logprob", selected_mic_avg_logprob); + }; + + const setSuccessMicAvgLogprob = (selected_mic_avg_logprob) => { + updateMicAvgLogprob(selected_mic_avg_logprob); + showNotification_SaveSuccess(); + }; + // Advanced (Mic No Speech Prob) + const getMicNoSpeechProb = () => { + pendingMicNoSpeechProb(); + asyncStdoutToPython("/get/data/mic_no_speech_prob"); + }; + + const setMicNoSpeechProb = (selected_mic_no_speech_prob) => { + pendingMicNoSpeechProb(); + asyncStdoutToPython("/set/data/mic_no_speech_prob", selected_mic_no_speech_prob); + }; + + const setSuccessMicNoSpeechProb = (selected_mic_no_speech_prob) => { + updateMicNoSpeechProb(selected_mic_no_speech_prob); + showNotification_SaveSuccess(); + }; + // Advanced (Speaker Avg Logprob) + const getSpeakerAvgLogprob = () => { + pendingSpeakerAvgLogprob(); + asyncStdoutToPython("/get/data/speaker_avg_logprob"); + }; + + const setSpeakerAvgLogprob = (selected_speaker_avg_logprob) => { + pendingSpeakerAvgLogprob(); + asyncStdoutToPython("/set/data/speaker_avg_logprob", selected_speaker_avg_logprob); + }; + + const setSuccessSpeakerAvgLogprob = (selected_speaker_avg_logprob) => { + updateSpeakerAvgLogprob(selected_speaker_avg_logprob); + showNotification_SaveSuccess(); + }; + // Advanced (Speaker No Speech Prob) + const getSpeakerNoSpeechProb = () => { + pendingSpeakerNoSpeechProb(); + asyncStdoutToPython("/get/data/speaker_no_speech_prob"); + }; + + const setSpeakerNoSpeechProb = (selected_speaker_no_speech_prob) => { + pendingSpeakerNoSpeechProb(); + asyncStdoutToPython("/set/data/speaker_no_speech_prob", selected_speaker_no_speech_prob); + }; + + const setSuccessSpeakerNoSpeechProb = (selected_speaker_no_speech_prob) => { + updateSpeakerNoSpeechProb(selected_speaker_no_speech_prob); + showNotification_SaveSuccess(); + }; + return { // Mic currentMicRecordTimeout, @@ -353,5 +426,31 @@ export const useTranscription = () => { updateSelectedWhisperComputeDevice, setSelectedWhisperComputeDevice, setSuccessSelectedWhisperComputeDevice, + + // Advanced + // Mic Avg Logprob + currentMicAvgLogprob, + getMicAvgLogprob, + updateMicAvgLogprob, + setMicAvgLogprob, + setSuccessMicAvgLogprob, + // Mic No Speech Prob + currentMicNoSpeechProb, + getMicNoSpeechProb, + updateMicNoSpeechProb, + setMicNoSpeechProb, + setSuccessMicNoSpeechProb, + // Speaker Avg Logprob + currentSpeakerAvgLogprob, + getSpeakerAvgLogprob, + updateSpeakerAvgLogprob, + setSpeakerAvgLogprob, + setSuccessSpeakerAvgLogprob, + // Speaker No Speech Prob + currentSpeakerNoSpeechProb, + getSpeakerNoSpeechProb, + updateSpeakerNoSpeechProb, + setSpeakerNoSpeechProb, + setSuccessSpeakerNoSpeechProb, }; }; \ No newline at end of file diff --git a/src-ui/logics/useReceiveRoutes.js b/src-ui/logics/useReceiveRoutes.js index c9380612..c36b5fa7 100644 --- a/src-ui/logics/useReceiveRoutes.js +++ b/src-ui/logics/useReceiveRoutes.js @@ -224,6 +224,16 @@ export const ROUTE_META_LIST = [ { endpoint: "/get/data/selected_transcription_compute_device", ns: configs, hook_name: "useTranscription", method_name: "updateSelectedWhisperComputeDevice" }, { endpoint: "/set/data/selected_transcription_compute_device", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSelectedWhisperComputeDevice" }, + // Transcription (Advanced) + { endpoint: "/get/data/mic_avg_logprob", ns: configs, hook_name: "useTranscription", method_name: "updateMicAvgLogprob" }, + { endpoint: "/set/data/mic_avg_logprob", ns: configs, hook_name: "useTranscription", method_name: "setSuccessMicAvgLogprob" }, + { endpoint: "/get/data/mic_no_speech_prob", ns: configs, hook_name: "useTranscription", method_name: "updateMicNoSpeechProb" }, + { endpoint: "/set/data/mic_no_speech_prob", ns: configs, hook_name: "useTranscription", method_name: "setSuccessMicNoSpeechProb" }, + { endpoint: "/get/data/speaker_avg_logprob", ns: configs, hook_name: "useTranscription", method_name: "updateSpeakerAvgLogprob" }, + { endpoint: "/set/data/speaker_avg_logprob", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSpeakerAvgLogprob" }, + { endpoint: "/get/data/speaker_no_speech_prob", ns: configs, hook_name: "useTranscription", method_name: "updateSpeakerNoSpeechProb" }, + { endpoint: "/set/data/speaker_no_speech_prob", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSpeakerNoSpeechProb" }, + // VR { endpoint: "/get/data/overlay_small_log", ns: configs, hook_name: "useVr", method_name: "updateIsEnabledOverlaySmallLog" }, { endpoint: "/set/enable/overlay_small_log", ns: configs, hook_name: "useVr", method_name: "setSuccessIsEnabledOverlaySmallLog" }, diff --git a/src-ui/store.js b/src-ui/store.js index 9241ad41..e36aad46 100644 --- a/src-ui/store.js +++ b/src-ui/store.js @@ -238,6 +238,11 @@ export const { atomInstance: Atom_SelectedTranscriptionEngine, useHook: useStore export const { atomInstance: Atom_SelectableWhisperComputeDeviceList, useHook: useStore_SelectableWhisperComputeDeviceList } = createAtomWithHook({}, "SelectableWhisperComputeDeviceList"); export const { atomInstance: Atom_SelectedWhisperComputeDevice, useHook: useStore_SelectedWhisperComputeDevice } = createAtomWithHook("", "SelectedWhisperComputeDevice"); +export const { atomInstance: Atom_MicAvgLogprob, useHook: useStore_MicAvgLogprob } = createAtomWithHook(-0.8, "MicAvgLogprob"); +export const { atomInstance: Atom_MicNoSpeechProb, useHook: useStore_MicNoSpeechProb } = createAtomWithHook(0.6, "MicNoSpeechProb"); +export const { atomInstance: Atom_SpeakerAvgLogprob, useHook: useStore_SpeakerAvgLogprob } = createAtomWithHook(-0.8, "SpeakerAvgLogprob"); +export const { atomInstance: Atom_SpeakerNoSpeechProb, useHook: useStore_SpeakerNoSpeechProb } = createAtomWithHook(0.6, "SpeakerNoSpeechProb"); + // VR export const { atomInstance: Atom_OverlaySmallLogSettings, useHook: useStore_OverlaySmallLogSettings } = createAtomWithHook({ From 7a0913928ea642c2604e7805e3426ae1697eafca Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Fri, 5 Sep 2025 11:45:29 +0900 Subject: [PATCH 07/92] [Update] Localization: Unify the new line code to LF --- locales/ko.yml | 2 +- locales/zh-Hans.yml | 614 ++++++++++++++++++++++---------------------- 2 files changed, 308 insertions(+), 308 deletions(-) diff --git a/locales/ko.yml b/locales/ko.yml index 52594be3..d2e171ac 100644 --- a/locales/ko.yml +++ b/locales/ko.yml @@ -305,4 +305,4 @@ plugin_notifications: disabled_due_to_an_error: "플러그인 실행 중 오류를 감지했습니다. 플러그인 개발자에게 보고해 주세요." is_enabled: "플러그인을 활성화했습니다." - is_disabled: "플러그인을 비활성화했습니다." + is_disabled: "플러그인을 비활성화했습니다." \ No newline at end of file diff --git a/locales/zh-Hans.yml b/locales/zh-Hans.yml index 3e2ec51f..71a75390 100644 --- a/locales/zh-Hans.yml +++ b/locales/zh-Hans.yml @@ -1,308 +1,308 @@ -# ================================= -# IMPORTANT: -# Please read 'readme_first.txt' before making any changes. -# ================================= - -common: - go_back_button_label: "返回" - -common_error: - no_device_mic: # 未检测到他人语音 ? - no_device_speaker: # 未检测到他人语音 ? - threshold_invalid_value: # 数值应为 {{min}} 至 {{max}} 之间。 ? 设定的数值从 {{min}} 到 {{max}} ? - failed_download_weight_ctranslate2: - failed_download_weight_whisper: - translation_limit: - deepl_auth_key_invalid_length: - deepl_auth_key_failed_authentication: "授权密匙错误或已达API使用上限" - - invalid_value_mic_record_timeout: "数值应为 0 至 「{{mic_phrase_timeout_label}}」" - invalid_value_mic_phrase_timeout: "转录间隔时间大于0秒且不能小于「{{mic_record_timeout_label}}」" - invalid_value_mic_max_phrase: "数值应为 0 以上" - - invalid_value_speaker_record_timeout: "数值应为 0 至 「{{speaker_phrase_timeout_label}}」" - invalid_value_speaker_phrase_timeout: "转录间隔时间大于0秒且不能小于「{{speaker_record_timeout_label}}」" - invalid_value_speaker_max_phrase: "数值应为 0 以上" - -common_warning: - unable_to_use_osc_query: - -main_page: - translation: "翻译" - transcription_send: "你的语音转文字" - transcription_receive: "他人语音转文字" - foreground: "顶层显示" - - language_settings: "语言设定" - your_language: "你的语言" - translate_each_other_label: "双向翻译" - swap_button_label: "互换" - target_language: "目标语言" - translator: "翻译器" - translator_label_default: "默认" - - translator_selector: - is_selected_same_language: - - message_log: - all: "全部" - sent: "发送" - received: "接受" - system: "系统" - - resend_button_on_hover_desc: - - state_text_enabled: "启用" - state_text_disabled: "停用" - - language_selector: - title_your_language: "你的语言" - title_target_language: "目标语言" - - update_available: "有新版本可供使用!" - updating: "更新中..." - -update_modal: - cpu_desc: - cuda_desc: - cuda_compare_cpu_desc: - cuda_disk_space_desc: - close_modal: - download_latest_and_restart: - is_latest_version_already: - is_current_compute_device: - -config_page: - version: "版本 {{version}}" - model_download_button_label: - side_menu_labels: - device: - appearance: "外观" - translation: "翻译" - transcription: "转录" - others: "其他" - hotkeys: - plugins: - advanced_settings: "高级设置" - - device: - check_volume: "Check Volume" - label_auto_select: "Auto Select" - label_host: "Host/Driver" - label_device: "Device" - mic_host_device: - label: "麦克风 (设备)" - mic_dynamic_energy_threshold: - label_for_automatic: "麦克风输入阈值(当前设置:自动)" - desc_for_automatic: "自动调整麦克风输入阈值" - label_for_manual: "麦克风输入阈值(当前设置:手动)" - desc_for_manual: "使用滑杆手动确定麦克风输入灵敏度。按下麦克风图标输入语音,并在监控音量的同时调节灵敏度。" - speaker_device: - label: "他人语音 (设备)" - speaker_dynamic_energy_threshold: - label_for_automatic: "他人语音接收阈值(当前设置:自动)" - desc_for_automatic: "自动调节他人语音接收阈值" - label_for_manual: "他人语音接收阈值(当前设置:手动)" - desc_for_manual: "使用滑杆手动调整他人语音接收阈值.在按下耳机按钮时,请根据实际听到的声音调整该大小" - - appearance: - transparency: - label: "透明度" - desc: "更改主视窗透明度" - ui_size: - label: "界面大小" - textbox_ui_size: - label: "文本框字体大小" - desc: "你可以根据用户界面大小调整文本框中使用的字体大小。" - send_message_button_type: - label: "发送信息按钮" - hide: "隐藏 (可使用回车发送信息)" - show: "显示" - show_and_disable_enter_key: "显示,并且停用‘回车发送信息’" - show_resend_button: - label: - desc: - font_family: - label: "字体" - ui_language: - label: "界面语言" - - translation: - ctranslate2_weight_type: - label: "选择离线翻译模型" - desc: "可以选择用于离线翻译的翻译模型" - small: "普通模型 ({{capacity}})" - large: "高精度模型 ({{capacity}})" - ctranslate2_compute_device: - label: - deepl_auth_key: - label: "DeepL 授权密匙" - desc: "在使用的时候,使用时请在主屏幕上通过 DeepL_API 选择 {{translator}}\n※某些语言可能不支持" - open_auth_key_webpage: "打开DeepL账号页面" - save: - edit: - auth_key_success: "授权密匙认证完成。" - - transcription: - section_label_mic: "你的麦克风" - section_label_speaker: "他人声音" - section_label_transcription_engines: - mic_record_timeout: - label: "语音输入结束后的静音时间" - desc: "当检测到静音并经过设定的秒数后,语音输入即被视为完成。" - mic_phrase_timeout: - label: "转录间隔" - desc: "在经过设定的时间后执行转录" - mic_max_phrase: - label: "麦克风发送时的最小单词数" - desc: "转录字数的下限,只有超过这个数字,才会记录翻译结果并发送到VRC" - mic_word_filter: - label: "单词过滤器" - desc: "检测出被记录的单词时,不会发送这段话\n如要添加多个单词,可以用逗号来分割\n※不会记录重复的单词" - add_button_label: "添加" - count_desc: "现在被记录的单词数: {{count}}" - speaker_record_timeout: - label: "语音接收结束后的静音时间" - desc: "当检测到静音并经过设定的秒数后,语音接收即被视为完成。" - speaker_phrase_timeout: - label: "转录间隔" - desc: "在经过设定的时间后执行转录" - speaker_max_phrase: - label: "语音接收时的最小单词数" - desc: "转录字数的下限,只有超过这个数字,才会记录转录结果" - select_transcription_engine: - label: - whisper_weight_type: - label: "选择某个Whisper模型" - desc: - model_template: "{{model_name}} 模型 ({{capacity}})" - recommended_model_template: "{{model_name}} 模型 ({{capacity}}) (推荐)" - whisper_compute_device: - label: - - vr: - single_line: - multi_lines: - overlay_enable: - restore_default_settings: "恢复默认设置" - position: - rotation: - x_position: "X轴(左右)" - y_position: "Y轴(上下)" - z_position: "Z轴(前后)" - x_rotation: "X轴旋转" - y_rotation: "Y轴旋转" - z_rotation: "Z轴旋转" - sample_text_button: - start: - stop: - sample_text: - opacity: "透明度" - ui_scaling: "大小" - display_duration: "显示持续时间" - fadeout_duration: "渐隐持续时间" - common_settings: - tracker: - hmd: - left_hand: - right_hand: - overlay_show_only_translated_messages: - label: - - others: - section_label_sounds: - section_label_message_formats: - auto_clear_the_message_box: - label: "发言后自动清空chatbox" - send_only_translated_messages: - label: "只发送翻译后的信息" - auto_export_message_logs: - label: "自动导出聊天记录" - desc: "以文本文件的形式在logs文件夹中保存。" - vrc_mic_mute_sync: - label: "与VRC中的麦克风静音同步" - desc: "当VRChat的麦克风处于静音时,不在VRChat中发送信息\n※存在少许延迟且不支持按键发言." - send_message_to_vrc: - label: "发送信息至VRChat" - desc: "不发送信息至VRChat的情况下也能使用它,但该功能现在并未完成.在想要发送信息时,请不要忘记打开这个功能." - notification_vrc_sfx: - label: - desc: - send_received_message_to_vrc: - label: - desc: - message_format_common: - example_view: - title: - original_translated: - original_translated_multi: - translated_only_multi: - translated_only: - original_only: - settings: - title: - original: - translated: - for_multi_translation: - send_message_format: - label: - desc: - received_message_format: - label: - desc: - - hotkeys: - toggle_vrct_visibility: - label: - toggle_translation: - label: - toggle_transcription_send: - label: - toggle_transcription_receive: - label: - - plugins: - downloaded_version: - latest_version: - available_after_updating: - unavailable_downloaded: - no_latest_info: - using_latest_version: - available_latest_version: - unavailable_latest_version: - available_in_latest_vrct_version: - unavailable_not_downloaded: - - advanced_settings: - osc_ip_address: - label: "OSC IP 地址" - osc_port: - label: "OSC 端口" - open_config_filepath: - label: "打开设置文件" - switch_compute_device: - label: - enable_websocket: - label: - websocket_host: - label: - websocket_port: - label: - - notifications: - save_success: - -plugin_notifications: - downloading: - downloaded_success: - downloaded_error: - - updating: - updated_success: - updated_error: - - disabled_out_of_support: - disabled_due_to_an_error: - - is_enabled: +# ================================= +# IMPORTANT: +# Please read 'readme_first.txt' before making any changes. +# ================================= + +common: + go_back_button_label: "返回" + +common_error: + no_device_mic: # 未检测到他人语音 ? + no_device_speaker: # 未检测到他人语音 ? + threshold_invalid_value: # 数值应为 {{min}} 至 {{max}} 之间。 ? 设定的数值从 {{min}} 到 {{max}} ? + failed_download_weight_ctranslate2: + failed_download_weight_whisper: + translation_limit: + deepl_auth_key_invalid_length: + deepl_auth_key_failed_authentication: "授权密匙错误或已达API使用上限" + + invalid_value_mic_record_timeout: "数值应为 0 至 「{{mic_phrase_timeout_label}}」" + invalid_value_mic_phrase_timeout: "转录间隔时间大于0秒且不能小于「{{mic_record_timeout_label}}」" + invalid_value_mic_max_phrase: "数值应为 0 以上" + + invalid_value_speaker_record_timeout: "数值应为 0 至 「{{speaker_phrase_timeout_label}}」" + invalid_value_speaker_phrase_timeout: "转录间隔时间大于0秒且不能小于「{{speaker_record_timeout_label}}」" + invalid_value_speaker_max_phrase: "数值应为 0 以上" + +common_warning: + unable_to_use_osc_query: + +main_page: + translation: "翻译" + transcription_send: "你的语音转文字" + transcription_receive: "他人语音转文字" + foreground: "顶层显示" + + language_settings: "语言设定" + your_language: "你的语言" + translate_each_other_label: "双向翻译" + swap_button_label: "互换" + target_language: "目标语言" + translator: "翻译器" + translator_label_default: "默认" + + translator_selector: + is_selected_same_language: + + message_log: + all: "全部" + sent: "发送" + received: "接受" + system: "系统" + + resend_button_on_hover_desc: + + state_text_enabled: "启用" + state_text_disabled: "停用" + + language_selector: + title_your_language: "你的语言" + title_target_language: "目标语言" + + update_available: "有新版本可供使用!" + updating: "更新中..." + +update_modal: + cpu_desc: + cuda_desc: + cuda_compare_cpu_desc: + cuda_disk_space_desc: + close_modal: + download_latest_and_restart: + is_latest_version_already: + is_current_compute_device: + +config_page: + version: "版本 {{version}}" + model_download_button_label: + side_menu_labels: + device: + appearance: "外观" + translation: "翻译" + transcription: "转录" + others: "其他" + hotkeys: + plugins: + advanced_settings: "高级设置" + + device: + check_volume: "Check Volume" + label_auto_select: "Auto Select" + label_host: "Host/Driver" + label_device: "Device" + mic_host_device: + label: "麦克风 (设备)" + mic_dynamic_energy_threshold: + label_for_automatic: "麦克风输入阈值(当前设置:自动)" + desc_for_automatic: "自动调整麦克风输入阈值" + label_for_manual: "麦克风输入阈值(当前设置:手动)" + desc_for_manual: "使用滑杆手动确定麦克风输入灵敏度。按下麦克风图标输入语音,并在监控音量的同时调节灵敏度。" + speaker_device: + label: "他人语音 (设备)" + speaker_dynamic_energy_threshold: + label_for_automatic: "他人语音接收阈值(当前设置:自动)" + desc_for_automatic: "自动调节他人语音接收阈值" + label_for_manual: "他人语音接收阈值(当前设置:手动)" + desc_for_manual: "使用滑杆手动调整他人语音接收阈值.在按下耳机按钮时,请根据实际听到的声音调整该大小" + + appearance: + transparency: + label: "透明度" + desc: "更改主视窗透明度" + ui_size: + label: "界面大小" + textbox_ui_size: + label: "文本框字体大小" + desc: "你可以根据用户界面大小调整文本框中使用的字体大小。" + send_message_button_type: + label: "发送信息按钮" + hide: "隐藏 (可使用回车发送信息)" + show: "显示" + show_and_disable_enter_key: "显示,并且停用‘回车发送信息’" + show_resend_button: + label: + desc: + font_family: + label: "字体" + ui_language: + label: "界面语言" + + translation: + ctranslate2_weight_type: + label: "选择离线翻译模型" + desc: "可以选择用于离线翻译的翻译模型" + small: "普通模型 ({{capacity}})" + large: "高精度模型 ({{capacity}})" + ctranslate2_compute_device: + label: + deepl_auth_key: + label: "DeepL 授权密匙" + desc: "在使用的时候,使用时请在主屏幕上通过 DeepL_API 选择 {{translator}}\n※某些语言可能不支持" + open_auth_key_webpage: "打开DeepL账号页面" + save: + edit: + auth_key_success: "授权密匙认证完成。" + + transcription: + section_label_mic: "你的麦克风" + section_label_speaker: "他人声音" + section_label_transcription_engines: + mic_record_timeout: + label: "语音输入结束后的静音时间" + desc: "当检测到静音并经过设定的秒数后,语音输入即被视为完成。" + mic_phrase_timeout: + label: "转录间隔" + desc: "在经过设定的时间后执行转录" + mic_max_phrase: + label: "麦克风发送时的最小单词数" + desc: "转录字数的下限,只有超过这个数字,才会记录翻译结果并发送到VRC" + mic_word_filter: + label: "单词过滤器" + desc: "检测出被记录的单词时,不会发送这段话\n如要添加多个单词,可以用逗号来分割\n※不会记录重复的单词" + add_button_label: "添加" + count_desc: "现在被记录的单词数: {{count}}" + speaker_record_timeout: + label: "语音接收结束后的静音时间" + desc: "当检测到静音并经过设定的秒数后,语音接收即被视为完成。" + speaker_phrase_timeout: + label: "转录间隔" + desc: "在经过设定的时间后执行转录" + speaker_max_phrase: + label: "语音接收时的最小单词数" + desc: "转录字数的下限,只有超过这个数字,才会记录转录结果" + select_transcription_engine: + label: + whisper_weight_type: + label: "选择某个Whisper模型" + desc: + model_template: "{{model_name}} 模型 ({{capacity}})" + recommended_model_template: "{{model_name}} 模型 ({{capacity}}) (推荐)" + whisper_compute_device: + label: + + vr: + single_line: + multi_lines: + overlay_enable: + restore_default_settings: "恢复默认设置" + position: + rotation: + x_position: "X轴(左右)" + y_position: "Y轴(上下)" + z_position: "Z轴(前后)" + x_rotation: "X轴旋转" + y_rotation: "Y轴旋转" + z_rotation: "Z轴旋转" + sample_text_button: + start: + stop: + sample_text: + opacity: "透明度" + ui_scaling: "大小" + display_duration: "显示持续时间" + fadeout_duration: "渐隐持续时间" + common_settings: + tracker: + hmd: + left_hand: + right_hand: + overlay_show_only_translated_messages: + label: + + others: + section_label_sounds: + section_label_message_formats: + auto_clear_the_message_box: + label: "发言后自动清空chatbox" + send_only_translated_messages: + label: "只发送翻译后的信息" + auto_export_message_logs: + label: "自动导出聊天记录" + desc: "以文本文件的形式在logs文件夹中保存。" + vrc_mic_mute_sync: + label: "与VRC中的麦克风静音同步" + desc: "当VRChat的麦克风处于静音时,不在VRChat中发送信息\n※存在少许延迟且不支持按键发言." + send_message_to_vrc: + label: "发送信息至VRChat" + desc: "不发送信息至VRChat的情况下也能使用它,但该功能现在并未完成.在想要发送信息时,请不要忘记打开这个功能." + notification_vrc_sfx: + label: + desc: + send_received_message_to_vrc: + label: + desc: + message_format_common: + example_view: + title: + original_translated: + original_translated_multi: + translated_only_multi: + translated_only: + original_only: + settings: + title: + original: + translated: + for_multi_translation: + send_message_format: + label: + desc: + received_message_format: + label: + desc: + + hotkeys: + toggle_vrct_visibility: + label: + toggle_translation: + label: + toggle_transcription_send: + label: + toggle_transcription_receive: + label: + + plugins: + downloaded_version: + latest_version: + available_after_updating: + unavailable_downloaded: + no_latest_info: + using_latest_version: + available_latest_version: + unavailable_latest_version: + available_in_latest_vrct_version: + unavailable_not_downloaded: + + advanced_settings: + osc_ip_address: + label: "OSC IP 地址" + osc_port: + label: "OSC 端口" + open_config_filepath: + label: "打开设置文件" + switch_compute_device: + label: + enable_websocket: + label: + websocket_host: + label: + websocket_port: + label: + + notifications: + save_success: + +plugin_notifications: + downloading: + downloaded_success: + downloaded_error: + + updating: + updated_success: + updated_error: + + disabled_out_of_support: + disabled_due_to_an_error: + + is_enabled: is_disabled: \ No newline at end of file From 2efba7247e97e47b6767ac974ae541281fca89af Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Thu, 11 Sep 2025 13:32:32 +0900 Subject: [PATCH 08/92] [Update] Localization: Update English localization. --- locales/en.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/locales/en.yml b/locales/en.yml index f311ced3..1faf9639 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -25,7 +25,7 @@ common_error: invalid_value_speaker_max_phrase: "You can set a number equal to or greater than 0." common_warning: - unable_to_use_osc_query: "The functions below have been automatically disabled because receiving OSC data is not possible due to OSC IP Address settings." + unable_to_use_osc_query: "The following functions have been automatically disabled due to the OSC IP Address settings preventing OSC data from being received." main_page: translation: "Translation" @@ -234,22 +234,22 @@ config_page: message_format_common: example_view: title: "Preview" - original_translated: "Original + Translated" - original_translated_multi: "Original + Translated (Multi-Translation)" - translated_only_multi: "Translated Only (Multi-Translation)" - translated_only: "Translated Only" + original_translated: "Original + Translation" + original_translated_multi: "Original + Translation (Multi-language)" + translated_only_multi: "Translation Only (Multi-language)" + translated_only: "Translation Only" original_only: "Original Only" settings: title: "Settings" original: "Original" - translated: "Translated" + translated: "Translation" for_multi_translation: "For Multi-Translation" send_message_format: label: "Message Format (Send)" - desc: "You can change the decoration of the message you want to send." + desc: "This allows you to change the format of the message that others see in VRChat." received_message_format: label: "Message Format (Speaker2Chatbox)" - desc: "It will be used in Speaker2Chatbox for now." + desc: "Currently, it is used in Speaker2Chatbox." hotkeys: toggle_vrct_visibility: @@ -265,7 +265,7 @@ config_page: downloaded_version: "Downloaded version: {{downloaded_version}}" latest_version: "Latest version: {{latest_version}}" available_after_updating: "Available after updating to the latest version" - unavailable_downloaded: "Currently unavailable due to incompatibility with the VRCT version in use" + unavailable_downloaded: "Currently Unavailable - Incompatible with the VRCT version in use" no_latest_info: "Unable to retrieve the latest information" using_latest_version: "Using the latest version" available_latest_version: "Latest version available" @@ -304,5 +304,5 @@ plugin_notifications: disabled_out_of_support: "The plugin has been disabled. It's not supported on this VRCT version." disabled_due_to_an_error: "An error was detected while running the plugin. Please report this to the plugin developer." - is_enabled: "The plugin has enabled." - is_disabled: "The plugin has disabled." \ No newline at end of file + is_enabled: "The plugin enabled." + is_disabled: "The plugin disabled." \ No newline at end of file From 9096d05e65f62a5a333099d844bee6d3d473b4d7 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Thu, 11 Sep 2025 13:51:14 +0900 Subject: [PATCH 09/92] [Update] Update 'readme_first.txt'. --- locales/readme_first.txt | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/locales/readme_first.txt b/locales/readme_first.txt index 02323f8f..6d87d7d0 100644 --- a/locales/readme_first.txt +++ b/locales/readme_first.txt @@ -1 +1,13 @@ -Thank you for considering translating VRCT's UI. However, please refrain from making any changes at this time. I am currently organizing the files, including reordering, adding, and removing elements, and some parts may change frequently until the UI becomes stable. (Note: This message was updated in February 2025.) \ No newline at end of file +Thank you for considering contributing to VRCT's UI translations. +If you want to edit a language: +- If it already exists: edit the corresponding [xx].yml file. +- If it doesn’t exist: please open a GitHub issue or contact us through another channel. (If you know how to add a new language yourself, feel free to do so! Sorry for the complicated structure...) + +Languages currently supported: +- en: English +- ja: 日本語 +- ko: 한국어 +- zh-Hant: 繁體中文 +- zh-Hans: 简体中文 + +Last updated: 2025/09 \ No newline at end of file From 684d9c3283cf5c3afaf4842338be1d0f520186fa Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Thu, 11 Sep 2025 17:00:15 +0900 Subject: [PATCH 10/92] [Update] Version 3.2.1 -> 3.2.2 --- src-python/config.py | 2 +- src-tauri/tauri.conf.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src-python/config.py b/src-python/config.py index 76638fbe..c544727d 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -1020,7 +1020,7 @@ class Config: def init_config(self): # Read Only - self._VERSION = "3.2.1" + self._VERSION = "3.2.2" if getattr(sys, 'frozen', False): self._PATH_LOCAL = os_path.dirname(sys.executable) else: diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index 898312d6..9e2402ef 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -1,7 +1,7 @@ { "$schema": "https://schema.tauri.app/config/2", "productName": "VRCT", - "version": "3.2.1", + "version": "3.2.2", "identifier": "com.vrct.app", "build": { "beforeDevCommand": "", From 06e1e73018d599381f674b5ee9c08488f21c4b28 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 11 Sep 2025 18:05:50 +0900 Subject: [PATCH 11/92] [Update] Change Hugging Face CLI commands to use new syntax and specify version for huggingface_hub installation --- .github/workflows/release.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ff2362a1..f38375ef 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -55,18 +55,18 @@ jobs: - name: Upload to Hugging Face Hub env: HF_TOKEN: ${{ secrets.HF_TOKEN }} - shell: pwsh # Explicitly use PowerShell for file operations + shell: pwsh run: | - pip install huggingface_hub + pip install huggingface_hub==0.34.4 $UPLOAD_DIR = "./hf_upload_temp" New-Item -ItemType Directory -Force -Path $UPLOAD_DIR Copy-Item -Path ./VRCT.zip -Destination "$UPLOAD_DIR/VRCT.zip" Copy-Item -Path ./VRCT_cuda.zip -Destination "$UPLOAD_DIR/VRCT_cuda.zip" - huggingface-cli upload ms-software/VRCT $UPLOAD_DIR . --repo-type model --commit-message "👍️[Update] ${{ env.VERSION }} Release" + hf upload --repo-type model --commit-message "👍️[Update] ${{ env.VERSION }} Release" ms-software/VRCT $UPLOAD_DIR . - huggingface-cli tag ms-software/VRCT ${{ github.ref_name }} --repo-type model --message "Release ${{ github.ref_name }}" + hf repo tag ms-software/VRCT --repo-type model --message "Release ${{ github.ref_name }}" ${{ github.ref_name }} Remove-Item -Recurse -Force $UPLOAD_DIR From bbfc61b3351289dda809b96f2905cded5c447311 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 11 Sep 2025 19:05:26 +0900 Subject: [PATCH 12/92] [Update] Fix Hugging Face CLI commands for tagging and uploading model --- .github/workflows/release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f38375ef..d0e7eac0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -64,9 +64,9 @@ jobs: Copy-Item -Path ./VRCT.zip -Destination "$UPLOAD_DIR/VRCT.zip" Copy-Item -Path ./VRCT_cuda.zip -Destination "$UPLOAD_DIR/VRCT_cuda.zip" - hf upload --repo-type model --commit-message "👍️[Update] ${{ env.VERSION }} Release" ms-software/VRCT $UPLOAD_DIR . + hf upload --repo-type model --commit-message "👍️[Update] ${{ env.VERSION }} Release" ms-software/VRCT $UPLOAD_DIR - hf repo tag ms-software/VRCT --repo-type model --message "Release ${{ github.ref_name }}" ${{ github.ref_name }} + hf repo tag create ms-software/VRCT ${{ github.ref_name }} --repo-type model --message "Release ${{ github.ref_name }}" Remove-Item -Recurse -Force $UPLOAD_DIR From 5a9a13146c8864dcccca1bbe82bb9391459e4bdd Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Fri, 12 Sep 2025 16:06:12 +0900 Subject: [PATCH 13/92] [Update] Refactor transliteration function to accept parameters for hiragana and romaji conversion --- src-python/controller.py | 24 ++++++++++++++++++------ src-python/model.py | 20 ++++++++++++++------ 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index c34abaf8..28c30734 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -298,9 +298,13 @@ class Controller: # その他のエラーは通常通り処理 raise - if config.CONVERT_MESSAGE_TO_ROMAJI is True or config.CONVERT_MESSAGE_TO_HIRAGANA is True: + if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: if config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": - transliteration = model.convertMessageToTransliteration(translation[0]) + transliteration = model.convertMessageToTransliteration( + translation[0], + hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, + romaji=config.CONVERT_MESSAGE_TO_ROMAJI + ) if config.ENABLE_TRANSCRIPTION_SEND is True: if config.SEND_MESSAGE_TO_VRC is True: @@ -425,9 +429,13 @@ class Controller: # その他のエラーは通常通り処理 raise - if config.CONVERT_MESSAGE_TO_ROMAJI is True or config.CONVERT_MESSAGE_TO_HIRAGANA is True: + if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: if config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": - transliteration = model.convertMessageToTransliteration(message) + transliteration = model.convertMessageToTransliteration( + message, + hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, + romaji=config.CONVERT_MESSAGE_TO_ROMAJI + ) if config.ENABLE_TRANSCRIPTION_RECEIVE is True: if config.OVERLAY_SMALL_LOG is True and model.overlay.initialized is True: @@ -571,9 +579,13 @@ class Controller: # その他のエラーは通常通り処理 raise - if config.CONVERT_MESSAGE_TO_ROMAJI is True or config.CONVERT_MESSAGE_TO_HIRAGANA is True: + if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: if config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": - transliteration = model.convertMessageToTransliteration(translation[0]) + transliteration = model.convertMessageToTransliteration( + translation[0], + hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, + romaji=config.CONVERT_MESSAGE_TO_ROMAJI + ) # send OSC message if config.SEND_MESSAGE_TO_VRC is True: diff --git a/src-python/model.py b/src-python/model.py index 333f1394..f7f1462c 100644 --- a/src-python/model.py +++ b/src-python/model.py @@ -275,13 +275,21 @@ class Model: self.previous_receive_message = message return repeat_flag - def convertMessageToTransliteration(self, message: str) -> str: + def convertMessageToTransliteration(self, message: str, hiragana: bool=True, romaji: bool=True) -> str: + if hiragana is False and romaji is False: + return message + + keys_to_keep = {"orig"} + if hiragana: + keys_to_keep.add("hira") + if romaji: + keys_to_keep.add("hepburn") + data_list = self.kks.convert(message) - keys_to_keep = {"orig", "hira", "hepburn"} - filtered_list = [] - for item in data_list: - filtered_item = {key: value for key, value in item.items() if key in keys_to_keep} - filtered_list.append(filtered_item) + filtered_list = [ + {key: value for key, value in item.items() if key in keys_to_keep} + for item in data_list + ] return filtered_list def setOscIpAddress(self, ip_address): From 2efedfb57280b0597f3c628dbb05318c410d40c9 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Mon, 15 Sep 2025 05:00:24 +0900 Subject: [PATCH 14/92] [Add] Implement transliterator and katakana to Hepburn conversion functions --- .../transliterate/transliterate_japanese.py | 110 +++++++++ .../transliterate_kana_to_hepburn.py | 215 ++++++++++++++++++ 2 files changed, 325 insertions(+) create mode 100644 src-python/models/transliterate/transliterate_japanese.py create mode 100644 src-python/models/transliterate/transliterate_kana_to_hepburn.py diff --git a/src-python/models/transliterate/transliterate_japanese.py b/src-python/models/transliterate/transliterate_japanese.py new file mode 100644 index 00000000..70a68189 --- /dev/null +++ b/src-python/models/transliterate/transliterate_japanese.py @@ -0,0 +1,110 @@ +from sudachipy import tokenizer +from sudachipy import dictionary +try: + from .transliterate_kana_to_hepburn import katakana_to_hepburn +except ImportError: + from transliterate_kana_to_hepburn import katakana_to_hepburn + +class Transliterator: + def __init__(self): + self.tokenizer_obj = dictionary.Dictionary().create() + self.mode = tokenizer.Tokenizer.SplitMode.A + + @staticmethod + def is_kanji(ch: str) -> bool: + return '\u4e00' <= ch <= '\u9fff' + + @staticmethod + def kata_to_hira(text: str) -> str: + return "".join( + chr(ord(c) - 0x60) if 'ァ' <= c <= 'ン' else c + for c in text + ) + + @staticmethod + def split_kanji_okurigana(surface: str, reading_kana: str): + """ + 1語の表層形(surface)と読み(reading_kana)を + [ {"orig":..., "kana":..., "hira":..., "hepburn":...}, ... ] に分割 + """ + result = [] + + # 表層を「漢字ブロック」と「非漢字ブロック」に分割 + buf = "" + prev_is_kanji = None + blocks = [] + for ch in surface: + now_is_kanji = Transliterator.is_kanji(ch) + if prev_is_kanji is None or now_is_kanji == prev_is_kanji: + buf += ch + else: + blocks.append((prev_is_kanji, buf)) + buf = ch + prev_is_kanji = now_is_kanji + if buf: + blocks.append((prev_is_kanji, buf)) + + # 読みを分配 + kana_left = reading_kana + for is_kan, part in blocks: + if is_kan: + # 仮ルール:残りの読みのうち、送り仮名分を除いた前半を充てる + # ex. "美しい"(うつくしい): 漢字=美, 残り送り仮名=しい + okuri_len = len(blocks[-1][1]) if not blocks[-1][0] else 0 + kana_for_kan = kana_left[:-okuri_len] if okuri_len else kana_left + result.append( + { + "orig": part, + "kana": kana_for_kan, + } + ) + kana_left = kana_left[len(kana_for_kan):] + else: + # 送り仮名部分 → そのまま残りを割り当てる + kana_for_okuri = kana_left + result.append( + { + "orig": part, + "kana": kana_for_okuri, + } + ) + kana_left = "" + + return result + + def analyze(self, text: str, use_macron: bool = True): + tokens = self.tokenizer_obj.tokenize(text, self.mode) + + results = [] + for t in tokens: + surface = t.surface() + parts = self.split_kanji_okurigana(surface, t.reading_form()) + for p in parts: + results.append({ + "orig": p["orig"], + "kana": p["kana"], + "hira": self.kata_to_hira(p["kana"]), + "hepburn": katakana_to_hepburn(p["kana"], use_macron=use_macron) + }) + return results + +# --- テスト --- +if __name__ == "__main__": + test_cases = [ + "美しい花を見る", + "東京に行く", + "漢字とカタカナの混在", + "パーティーに行く", + "コンピューターを使う", + "シェアハウスに住む", + "ヴァイオリンを弾く", + "ギュウニュウを飲む", + "ニューヨークに行く", + "ラーメンを食べる", + "チョコレートが好き", + "SessionIDを取得する", + ] + + transliterator = Transliterator() + for case in test_cases: + print(transliterator.analyze(case)) \ No newline at end of file diff --git a/src-python/models/transliterate/transliterate_kana_to_hepburn.py b/src-python/models/transliterate/transliterate_kana_to_hepburn.py new file mode 100644 index 00000000..e7ba04c2 --- /dev/null +++ b/src-python/models/transliterate/transliterate_kana_to_hepburn.py @@ -0,0 +1,215 @@ +# katakana_to_hepburn.py +# カタカナ -> ヘボン式ローマ字(パッケージ不要) + +def katakana_to_hepburn(kata: str, use_macron: bool = True) -> str: + """ + カタカナ文字列をヘボン式ローマ字に変換する。 + use_macron=True のとき ā ī ū ē ō で長音を表現(マクロン)。 + use_macron=False のときは単純に連続母音を残す(例: ou, oo)。 + """ + # 基本音の対応(主要なカタカナ) + base = { + 'ア':'a','イ':'i','ウ':'u','エ':'e','オ':'o', + 'カ':'ka','キ':'ki','ク':'ku','ケ':'ke','コ':'ko', + 'サ':'sa','シ':'shi','ス':'su','セ':'se','ソ':'so', + 'タ':'ta','チ':'chi','ツ':'tsu','テ':'te','ト':'to', + 'ナ':'na','ニ':'ni','ヌ':'nu','ネ':'ne','ノ':'no', + 'ハ':'ha','ヒ':'hi','フ':'fu','ヘ':'he','ホ':'ho', + 'マ':'ma','ミ':'mi','ム':'mu','メ':'me','モ':'mo', + 'ヤ':'ya','ユ':'yu','ヨ':'yo', + 'ラ':'ra','リ':'ri','ル':'ru','レ':'re','ロ':'ro', + 'ワ':'wa','ヲ':'wo','ン':'n', + 'ガ':'ga','ギ':'gi','グ':'gu','ゲ':'ge','ゴ':'go', + 'ザ':'za','ジ':'ji','ズ':'zu','ゼ':'ze','ゾ':'zo', + 'ダ':'da','ヂ':'ji','ヅ':'zu','デ':'de','ド':'do', + 'バ':'ba','ビ':'bi','ブ':'bu','ベ':'be','ボ':'bo', + 'パ':'pa','ピ':'pi','プ':'pu','ペ':'pe','ポ':'po', + # 小書き(単独で使われることは少ないがマップしておく) + 'ァ':'a','ィ':'i','ゥ':'u','ェ':'e','ォ':'o', + 'ャ':'ya','ュ':'yu','ョ':'yo','ッ':'xtsu','ー':'-', + 'ヴ':'vu','シェ':'she' # 特殊は下で組合せで処理 + } + + # 拡張:子音 + 小ャユョ の組合せ(主要なもの) + digraphs = { + ('キ','ャ'):'kya', ('キ','ュ'):'kyu', ('キ','ョ'):'kyo', + ('ギ','ャ'):'gya', ('ギ','ュ'):'gyu', ('ギ','ョ'):'gyo', + ('シ','ャ'):'sha', ('シ','ュ'):'shu', ('シ','ョ'):'sho', + ('ジ','ャ'):'ja', ('ジ','ュ'):'ju', ('ジ','ョ'):'jo', + ('チ','ャ'):'cha', ('チ','ュ'):'chu', ('チ','ョ'):'cho', + ('ニ','ャ'):'nya', ('ニ','ュ'):'nyu', ('ニ','ョ'):'nyo', + ('ヒ','ャ'):'hya', ('ヒ','ュ'):'hyu', ('ヒ','ョ'):'hyo', + ('ビ','ャ'):'bya', ('ビ','ュ'):'byu', ('ビ','ョ'):'byo', + ('ピ','ャ'):'pya', ('ピ','ュ'):'pyu', ('ピ','ョ'):'pyo', + ('ミ','ャ'):'mya', ('ミ','ュ'):'myu', ('ミ','ョ'):'myo', + ('リ','ャ'):'rya', ('リ','ュ'):'ryu', ('リ','ョ'):'ryo', + # 外来音対応(ファ/フィ/チェ 等) + ('フ','ャ'):'fya', ('フ','ュ'):'fyu', ('フ','ョ'):'fyo', + ('ト','ゥ'):'tu', ('ド','ゥ'):'du', + # F-sounds (ファ フィ フェ フォ) + ('フ','ァ'):'fa', ('フ','ィ'):'fi', ('フ','ェ'):'fe', ('フ','ォ'):'fo', + # シェ チェ ティ etc. + ('シ','ェ'):'she', ('チ','ェ'):'che', + ('テ','ィ'):'ti', ('ト','ゥ'):'tu', ('ド','ゥ'):'du', + ('ウ','ァ'):'wa', ('ウ','ィ'):'wi', ('ウ','ェ'):'we', ('ウ','ォ'):'wo', + # その他外来語によくある組合せ + ('ス','ィ'):'si', ('ズ','ィ'):'zi', ('ツ','ァ'):'tsa', ('ツ','ィ'):'tsi', ('ツ','ェ'):'tse', ('ツ','ォ'):'tso', + ('キ','ェ'):'kye', ('ギ','ェ'):'gye', + ('ヴ','ァ'):'va', ('ヴ','ィ'):'vi', ('ヴ','ェ'):'ve', ('ヴ','ォ'):'vo', ('ヴ','ュ'):'vyu' + } + + # 小文字一覧(ゃゅょぁぃぅぇぉ など) + small_kana = set(['ャ','ュ','ョ','ァ','ィ','ゥ','ェ','ォ','ヮ','ヵ','ヶ','ッ','ャ','ュ','ョ']) + + # マクロン変換マップ(連続母音 -> マクロン) + macron_map = { + 'aa':'ā','ii':'ī','uu':'ū','ee':'ē','oo':'ō', + # ou -> ō という扱いを多くのヘボン式はする(特に日本語由来の長音) + 'ou':'ō' + } + + # Helper: 次のローマ字の先頭子音を取り出す(促音処理用) + def initial_consonant(rom: str) -> str: + # romはローマ字(例 'shi','chi','ta') + # 子音は最初の母音直前までと考える(母音: a,i,u,e,o) + for i,ch in enumerate(rom): + if ch in 'aeiou': + return rom[:i] + return rom # 母音がないなら全部 + + # 変換メイン + res = [] + i = 0 + kata = kata.strip() + length = len(kata) + + while i < length: + ch = kata[i] + + # 促音(ッ):次の音の初めの子音を重ねる + if ch == 'ッ': + # lookahead + if i+1 < length: + # 先の1文字 or 合字を取り得る(小書きが続く可能性) + # まず合字優先で調べる + next_pair = None + if i+2 < length and (kata[i+1], kata[i+2]) in digraphs: + next_pair = digraphs[(kata[i+1], kata[i+2])] + elif kata[i+1] in base: + next_pair = base.get(kata[i+1]) + + if next_pair: + cons = initial_consonant(next_pair) + if cons == '': + # もし母音始まりなら促音は無視(稀) + pass + else: + # Hepburnでは "ch" の場合 "cch"(matcha)等の扱いになるように + # cons の先頭1文字を倍にするより、cons全体の先頭文字を重ねるのが一般的(例: 'shi' -> 'ssh' ? いい例は少ない) + # 実務上は先頭子音の最初の文字を重複する: + res.append(cons[0]) + # advance only the 促音 itself here; next loop handles next kana + i += 1 + continue + + # 長音符(ー):前の母音を伸ばす(マクロン処理は後でまとめて) + if ch == 'ー': + # append marker '-' to indicate prolong; we'll post-process + res.append('-') + i += 1 + continue + + # 合字(子 + 小ャュョ等) + if i+1 < length and (ch, kata[i+1]) in digraphs: + res.append(digraphs[(ch, kata[i+1])]) + i += 2 + continue + + # 小書きが前に独立して出てきた場合(通常は合字で処理されるが念のため) + if ch in small_kana and ch != 'ッ': + # 小書きを単独で英字に変換(例: 'ァ' -> 'a') + res.append(base.get(ch, '')) + i += 1 + continue + + # 普通のカタカナ + if ch in base: + res.append(base[ch]) + i += 1 + continue + + # 英数字や記号・ひらがななどはそのまま(変換対象外) + res.append(ch) + i += 1 + + # ここまでで res はローマ字パーツのリスト(長音は '-' でマーク) + raw = ''.join(res) + + # 撥音(ン)処理: n の前が b/p/m の場合 m にする + # ただし既に 'n' のまま次が母音や y の時は通常 n' を入れるべきだが簡易処理として n のまま保持。 + # 我々は 'n' の後に b/p/m が来たら 'm' に置換 + import re + raw = re.sub(r'n(?=[bmp])', 'm', raw) + + # 長音処理('-' マークを見て前の母音を伸ばす) + # raw 中の '-' を削って該当の母音を伸ばす + while '-' in raw: + idx = raw.find('-') + if idx == 0: + # 先頭に長音符が来るのはおかしいので削除 + raw = raw[:idx] + raw[idx+1:] + continue + # 前の文字が母音ならそれを重ねる + prev = raw[idx-1] + if prev in 'aiueo': + # 直前に既に vowel がある場合、後でマクロン処理に任せて母音を2つにする + raw = raw[:idx] + prev + raw[idx+1:] + else: + # 直前が子音なら何もして取り除く + raw = raw[:idx] + raw[idx+1:] + + # 小さな例外対応: 'ti' 等の表記は 'chi' と扱いたいが上述マップでカバー済み + # macron の適用(長音の正規化) + if use_macron: + # まず 'ou' を ō に(ただし語による例外はあるが、一般的ヘボンに合わせる) + # その前に 'oo' を 'ō' に(稀) + for pair, mac in macron_map.items(): + raw = raw.replace(pair, mac) + # else: leave as is (ou/oo/aa...) + + # 仕上げ:小文字統一(ヘボンは小文字) + raw = raw.lower() + + # 最後に、n の後に母音または y が来る場合は「んあ->n'a」的扱いが必要だが + # シンプル実装では n の後に母音や y が来るときは n' を入れる(明瞭化) + # ただし多くの実例では省略されることも多いのでコメントアウトしておく + # raw = re.sub(r"n(?=[aiueoy])", "n'", raw) + + return raw + + +# --- テスト例 --- +if __name__ == "__main__": + tests = [ + "カタカナ", + "コンピューター", + "キャッチ", + "マッチャ", + "シェア", + "ジェット", + "ヴァイオリン", + "ホテル", + "スーパー", + "ギュウニュウ", + "パーティー", + "トウキョウ", # 東京(トウキョウ -> tōkyō) + "オーケー", + "ファイル", + "ニューヨーク", + "ラーメン", + "パン", + "チョコレート", + ] + + for s in tests: + print(s, "->", katakana_to_hepburn(s, use_macron=True)) \ No newline at end of file From 0d121ecaad4ab7e120dd2b6beb0e38838b3c7d15 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Mon, 15 Sep 2025 05:22:57 +0900 Subject: [PATCH 15/92] [Update] Enhance kanji and okurigana processing in Transliterator class --- .../transliterate/transliterate_japanese.py | 105 +++++++++++++++--- 1 file changed, 91 insertions(+), 14 deletions(-) diff --git a/src-python/models/transliterate/transliterate_japanese.py b/src-python/models/transliterate/transliterate_japanese.py index 70a68189..9472bdee 100644 --- a/src-python/models/transliterate/transliterate_japanese.py +++ b/src-python/models/transliterate/transliterate_japanese.py @@ -46,29 +46,51 @@ class Transliterator: # 読みを分配 kana_left = reading_kana - for is_kan, part in blocks: + for i, (is_kan, part) in enumerate(blocks): if is_kan: - # 仮ルール:残りの読みのうち、送り仮名分を除いた前半を充てる - # ex. "美しい"(うつくしい): 漢字=美, 残り送り仮名=しい - okuri_len = len(blocks[-1][1]) if not blocks[-1][0] else 0 - kana_for_kan = kana_left[:-okuri_len] if okuri_len else kana_left + # 漢字ブロックの処理 + if len(blocks) == 1: + # 単一ブロック(全て漢字)の場合 + kana_for_kan = kana_left + elif i == len(blocks) - 1: + # 最後のブロック(漢字)の場合 + kana_for_kan = kana_left + else: + # 中間の漢字ブロックの場合 + # 後続の非漢字ブロックの文字数を計算 + remaining_non_kanji = sum(len(p) for is_k, p in blocks[i+1:] if not is_k) + if remaining_non_kanji > 0 and len(kana_left) > remaining_non_kanji: + kana_for_kan = kana_left[:-remaining_non_kanji] + else: + # 漢字1文字あたり最低1文字の読みを割り当て + min_kana = len(part) + kana_for_kan = kana_left[:max(min_kana, len(kana_left) - remaining_non_kanji)] + + # 空の読みを避ける + if not kana_for_kan and kana_left: + kana_for_kan = kana_left[:1] + result.append( { "orig": part, "kana": kana_for_kan, + "hira": Transliterator.kata_to_hira(kana_for_kan), + "hepburn": katakana_to_hepburn(kana_for_kan, use_macron=True) } ) kana_left = kana_left[len(kana_for_kan):] else: - # 送り仮名部分 → そのまま残りを割り当てる - kana_for_okuri = kana_left + # 非漢字部分(送り仮名など) + kana_for_okuri = kana_left[:len(part)] result.append( { "orig": part, "kana": kana_for_okuri, + "hira": Transliterator.kata_to_hira(kana_for_okuri), + "hepburn": katakana_to_hepburn(kana_for_okuri, use_macron=True) } ) - kana_left = "" + kana_left = kana_left[len(kana_for_okuri):] return result @@ -78,14 +100,66 @@ class Transliterator: results = [] for t in tokens: surface = t.surface() - parts = self.split_kanji_okurigana(surface, t.reading_form()) - for p in parts: + reading = t.reading_form() + + # 単純に1文字ずつ処理 + if len(surface) == 1: + # 1文字の場合はそのまま results.append({ - "orig": p["orig"], - "kana": p["kana"], - "hira": self.kata_to_hira(p["kana"]), - "hepburn": katakana_to_hepburn(p["kana"], use_macron=use_macron) + "orig": surface, + "kana": reading, + "hira": self.kata_to_hira(reading), + "hepburn": katakana_to_hepburn(reading, use_macron=use_macron) }) + else: + # 複数文字の場合は文字種別で分割 + i = 0 + reading_pos = 0 + + while i < len(surface): + char = surface[i] + + if self.is_kanji(char): + # 漢字の場合、連続する漢字をまとめて処理 + kanji_block = "" + while i < len(surface) and self.is_kanji(surface[i]): + kanji_block += surface[i] + i += 1 + + # 漢字ブロックの読みを推定 + if i < len(surface): + # 後に文字がある場合、送り仮名を考慮 + remaining_chars = len(surface) - i + kanji_reading = reading[reading_pos:-remaining_chars] if remaining_chars > 0 else reading[reading_pos:] + else: + # 最後の漢字ブロックの場合 + kanji_reading = reading[reading_pos:] + + results.append({ + "orig": kanji_block, + "kana": kanji_reading, + "hira": self.kata_to_hira(kanji_reading), + "hepburn": katakana_to_hepburn(kanji_reading, use_macron=use_macron) + }) + reading_pos += len(kanji_reading) + else: + # 非漢字の場合 + non_kanji_block = "" + while i < len(surface) and not self.is_kanji(surface[i]): + non_kanji_block += surface[i] + i += 1 + + # 非漢字部分の読み(通常は文字数分) + non_kanji_reading = reading[reading_pos:reading_pos + len(non_kanji_block)] + + results.append({ + "orig": non_kanji_block, + "kana": non_kanji_reading, + "hira": self.kata_to_hira(non_kanji_reading), + "hepburn": katakana_to_hepburn(non_kanji_reading, use_macron=use_macron) + }) + reading_pos += len(non_kanji_reading) + return results # --- テスト --- @@ -103,6 +177,9 @@ if __name__ == "__main__": "ラーメンを食べる", "チョコレートが好き", "SessionIDを取得する", + "取り敢えず検索してみる", + "見知らぬ土地で冒険する", + "彼は優れたエンジニアです", ] transliterator = Transliterator() From d3c206fb26ded71ad6e731dc670498a4e8fab121 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Mon, 15 Sep 2025 05:33:30 +0900 Subject: [PATCH 16/92] [Add] Implement Transliterator class for kanji and okurigana analysis; update requirements and spec files --- backend.spec | 2 +- backend_cuda.spec | 2 +- requirements.txt | 4 +++- requirements_cuda.txt | 4 +++- src-python/model.py | 6 +++--- ...literate_japanese.py => transliterate_transliterator.py} | 0 6 files changed, 11 insertions(+), 7 deletions(-) rename src-python/models/transliterate/{transliterate_japanese.py => transliterate_transliterator.py} (100%) diff --git a/backend.spec b/backend.spec index 3933d942..c5c33da0 100644 --- a/backend.spec +++ b/backend.spec @@ -5,7 +5,7 @@ a = Analysis( ['src-python\\mainloop.py'], pathex=[], binaries=[], - datas=[('./fonts', 'fonts/'), ('.venv/Lib/site-packages/zeroconf', 'zeroconf/'), ('.venv/Lib/site-packages/openvr', 'openvr/'), ('.venv/Lib/site-packages/pykakasi', 'pykakasi/'), ('.venv/Lib/site-packages/faster_whisper', 'faster_whisper/'), ('.venv/Lib/site-packages/hf_xet', 'hf_xet/')], + datas=[('./fonts', 'fonts/'), ('.venv/Lib/site-packages/zeroconf', 'zeroconf/'), ('.venv/Lib/site-packages/openvr', 'openvr/'), ('.venv/Lib/site-packages/faster_whisper', 'faster_whisper/'), ('.venv/Lib/site-packages/hf_xet', 'hf_xet/')], hiddenimports=[], hookspath=[], hooksconfig={}, diff --git a/backend_cuda.spec b/backend_cuda.spec index 08ba5fd7..e9cb35c8 100644 --- a/backend_cuda.spec +++ b/backend_cuda.spec @@ -5,7 +5,7 @@ a = Analysis( ['src-python\\mainloop.py'], pathex=[], binaries=[], - datas=[('./fonts', 'fonts/'), ('.venv_cuda/Lib/site-packages/zeroconf', 'zeroconf/'), ('.venv_cuda/Lib/site-packages/openvr', 'openvr/'), ('.venv_cuda/Lib/site-packages/pykakasi', 'pykakasi/'), ('.venv_cuda/Lib/site-packages/faster_whisper', 'faster_whisper/'), ('.venv/Lib/site-packages/hf_xet', 'hf_xet/')], + datas=[('./fonts', 'fonts/'), ('.venv_cuda/Lib/site-packages/zeroconf', 'zeroconf/'), ('.venv_cuda/Lib/site-packages/openvr', 'openvr/'), ('.venv_cuda/Lib/site-packages/faster_whisper', 'faster_whisper/'), ('.venv/Lib/site-packages/hf_xet', 'hf_xet/')], hiddenimports=[], hookspath=[], hooksconfig={}, diff --git a/requirements.txt b/requirements.txt index 244a9238..11f1d694 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,12 +13,14 @@ sentencepiece==0.2.0 openvr==1.26.701 pydub==0.25.1 psutil==5.9.8 -pykakasi==2.3.0 pycaw==20240210 websockets==15.0.1 huggingface_hub==0.32.2 hf-xet==1.1.2 setuptools==80.8.0 +SudachiPy==0.6.10 +SudachiDict-core==20250825 +SudachiDict-full==20250825 translators @ git+https://github.com/misyaguziya/translators@5.9.2.1 SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@3.10.4.1 tinyoscquery @ git+https://github.com/cyberkitsune/tinyoscquery@0.1.3 \ No newline at end of file diff --git a/requirements_cuda.txt b/requirements_cuda.txt index 4dadf192..cc0743df 100644 --- a/requirements_cuda.txt +++ b/requirements_cuda.txt @@ -14,12 +14,14 @@ sentencepiece==0.2.0 openvr==1.26.701 pydub==0.25.1 psutil==5.9.8 -pykakasi==2.3.0 pycaw==20240210 websockets==15.0.1 huggingface_hub==0.32.2 hf-xet==1.1.2 setuptools==80.8.0 +SudachiPy==0.6.10 +SudachiDict-core==20250825 +SudachiDict-full==20250825 translators @ git+https://github.com/misyaguziya/translators@5.9.2.1 SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@3.10.4.1 tinyoscquery @ git+https://github.com/cyberkitsune/tinyoscquery@0.1.3 \ No newline at end of file diff --git a/src-python/model.py b/src-python/model.py index f7f1462c..797f7bee 100644 --- a/src-python/model.py +++ b/src-python/model.py @@ -14,7 +14,6 @@ from typing import Callable from packaging.version import parse from flashtext import KeywordProcessor -from pykakasi import kakasi from device_manager import device_manager from config import config @@ -28,6 +27,7 @@ from models.translation.translation_languages import translation_lang from models.transcription.transcription_languages import transcription_lang from models.translation.translation_utils import checkCTranslate2Weight, downloadCTranslate2Weight, downloadCTranslate2Tokenizer from models.transcription.transcription_whisper import checkWhisperWeight, downloadWhisperWeight +from models.transliterate.transliterate_transliterator import Transliterator from models.overlay.overlay import Overlay from models.overlay.overlay_image import OverlayImage from models.watchdog.watchdog import Watchdog @@ -99,7 +99,7 @@ class Model: self.overlay_image = OverlayImage(config.PATH_LOCAL) self.mic_audio_queue = None self.mic_mute_status = None - self.kks = kakasi() + self.transliterator = Transliterator() self.watchdog = Watchdog(config.WATCHDOG_TIMEOUT, config.WATCHDOG_INTERVAL) self.osc_handler = OSCHandler(config.OSC_IP_ADDRESS, config.OSC_PORT) self.websocket_server = None @@ -285,7 +285,7 @@ class Model: if romaji: keys_to_keep.add("hepburn") - data_list = self.kks.convert(message) + data_list = self.transliterator.analyze(message, use_macron=False) filtered_list = [ {key: value for key, value in item.items() if key in keys_to_keep} for item in data_list diff --git a/src-python/models/transliterate/transliterate_japanese.py b/src-python/models/transliterate/transliterate_transliterator.py similarity index 100% rename from src-python/models/transliterate/transliterate_japanese.py rename to src-python/models/transliterate/transliterate_transliterator.py From 245855d0ca9d0ee316f0e183be800b2959404c17 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:35:34 +0900 Subject: [PATCH 17/92] [Update] Add compute type management for CTranslate2 and Whisper models --- src-python/config.py | 38 ++++++++++++++++++- src-python/controller.py | 33 ++++++++++++++++ src-python/mainloop.py | 8 ++++ src-python/model.py | 12 ++++-- .../transcription_transcriber.py | 4 +- .../transcription/transcription_whisper.py | 5 ++- .../translation/translation_translator.py | 5 ++- src-python/utils.py | 5 ++- 8 files changed, 98 insertions(+), 12 deletions(-) diff --git a/src-python/config.py b/src-python/config.py index c544727d..1a605701 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -11,7 +11,7 @@ from models.translation.translation_languages import translation_lang from models.translation.translation_utils import ctranslate2_weights from models.transcription.transcription_languages import transcription_lang from models.transcription.transcription_whisper import _MODELS as whisper_models -from utils import errorLogging, validateDictStructure +from utils import errorLogging, validateDictStructure, getComputeTypeList json_serializable_vars = {} def json_serializable(var_name): @@ -135,6 +135,14 @@ class Config: def SELECTABLE_COMPUTE_DEVICE_LIST(self): return self._SELECTABLE_COMPUTE_DEVICE_LIST + @property + def SELECTABLE_CTRANSLATE2_COMPUTE_TYPE_LIST(self): + return self._SELECTABLE_CTRANSLATE2_COMPUTE_TYPE_LIST + + @property + def SELECTABLE_WHISPER_COMPUTE_TYPE_LIST(self): + return self._SELECTABLE_WHISPER_COMPUTE_TYPE_LIST + @property def SEND_MESSAGE_BUTTON_TYPE_LIST(self): return self._SEND_MESSAGE_BUTTON_TYPE_LIST @@ -814,6 +822,18 @@ class Config: self._CTRANSLATE2_WEIGHT_TYPE = value self.saveConfig(inspect.currentframe().f_code.co_name, value) + @property + @json_serializable('CTRANSLATE2_COMPUTE_TYPE') + def CTRANSLATE2_COMPUTE_TYPE(self): + return self._CTRANSLATE2_COMPUTE_TYPE + + @CTRANSLATE2_COMPUTE_TYPE.setter + def CTRANSLATE2_COMPUTE_TYPE(self, value): + if isinstance(value, str): + if value in self.SELECTABLE_CTRANSLATE2_COMPUTE_TYPE_LIST: + self._CTRANSLATE2_COMPUTE_TYPE = value + self.saveConfig(inspect.currentframe().f_code.co_name, value) + @property @json_serializable('WHISPER_WEIGHT_TYPE') def WHISPER_WEIGHT_TYPE(self): @@ -826,6 +846,18 @@ class Config: self._WHISPER_WEIGHT_TYPE = value self.saveConfig(inspect.currentframe().f_code.co_name, value) + @property + @json_serializable('WHISPER_COMPUTE_TYPE') + def WHISPER_COMPUTE_TYPE(self): + return self._WHISPER_COMPUTE_TYPE + + @WHISPER_COMPUTE_TYPE.setter + def WHISPER_COMPUTE_TYPE(self, value): + if isinstance(value, str): + if value in self.SELECTABLE_WHISPER_COMPUTE_TYPE_LIST: + self._WHISPER_COMPUTE_TYPE = value + self.saveConfig(inspect.currentframe().f_code.co_name, value) + @property @json_serializable('AUTO_CLEAR_MESSAGE_BOX') def AUTO_CLEAR_MESSAGE_BOX(self): @@ -1051,6 +1083,8 @@ class Config: for i in range(torch.cuda.device_count()): self._SELECTABLE_COMPUTE_DEVICE_LIST.append({"device":"cuda", "device_index": i, "device_name": torch.cuda.get_device_name(i)}) self._SELECTABLE_COMPUTE_DEVICE_LIST.append({"device":"cpu", "device_index": 0, "device_name": "cpu"}) + self._SELECTABLE_CTRANSLATE2_COMPUTE_TYPE_LIST = ["auto"] + getComputeTypeList() + self._SELECTABLE_WHISPER_COMPUTE_TYPE_LIST = ["auto"] + getComputeTypeList() self._SEND_MESSAGE_BUTTON_TYPE_LIST = ["show", "hide", "show_and_disable_enter_key"] self._SEND_MESSAGE_FORMAT_PARTS = { "message": { @@ -1189,7 +1223,9 @@ class Config: self._SELECTED_TRANSLATION_COMPUTE_DEVICE = copy.deepcopy(self.SELECTABLE_COMPUTE_DEVICE_LIST[0]) self._SELECTED_TRANSCRIPTION_COMPUTE_DEVICE = copy.deepcopy(self.SELECTABLE_COMPUTE_DEVICE_LIST[0]) self._CTRANSLATE2_WEIGHT_TYPE = "small" + self._CTRANSLATE2_COMPUTE_TYPE = "auto" self._WHISPER_WEIGHT_TYPE = "base" + self._WHISPER_COMPUTE_TYPE = "auto" self._AUTO_CLEAR_MESSAGE_BOX = True self._SEND_ONLY_TRANSLATED_MESSAGES = False self._OVERLAY_SMALL_LOG = False diff --git a/src-python/controller.py b/src-python/controller.py index c34abaf8..afa9d266 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -652,6 +652,14 @@ class Controller: def getComputeDeviceList(*args, **kwargs) -> dict: return {"status":200, "result":config.SELECTABLE_COMPUTE_DEVICE_LIST} + @staticmethod + def getCTranslate2ComputeTypeList(*args, **kwargs) -> dict: + return {"status":200, "result":config.SELECTABLE_CTRANSLATE2_COMPUTE_TYPE_LIST} + + @staticmethod + def getWhisperComputeTypeList(*args, **kwargs) -> dict: + return {"status":200, "result":config.SELECTABLE_WHISPER_COMPUTE_TYPE_LIST} + @staticmethod def getSelectedTranslationComputeDevice(*args, **kwargs) -> dict: return {"status":200, "result":config.SELECTED_TRANSLATION_COMPUTE_DEVICE} @@ -1447,6 +1455,22 @@ class Controller: th_callback.join() return {"status":200, "result":config.CTRANSLATE2_WEIGHT_TYPE} + @staticmethod + def getCtranslateComputeType(*args, **kwargs) -> dict: + return {"status":200, "result":config.CTRANSLATE2_COMPUTE_TYPE} + + @staticmethod + def setCtranslateComputeType(data, *args, **kwargs) -> dict: + config.CTRANSLATE2_COMPUTE_TYPE = str(data) + if model.checkTranslatorCTranslate2ModelWeight(config.CTRANSLATE2_WEIGHT_TYPE): + def callback(): + model.changeTranslatorCTranslate2Model() + th_callback = Thread(target=callback) + th_callback.daemon = True + th_callback.start() + th_callback.join() + return {"status":200, "result":config.CTRANSLATE2_COMPUTE_TYPE} + @staticmethod def getWhisperWeightType(*args, **kwargs) -> dict: return {"status":200, "result":config.WHISPER_WEIGHT_TYPE} @@ -1456,6 +1480,15 @@ class Controller: config.WHISPER_WEIGHT_TYPE = str(data) return {"status":200, "result": config.WHISPER_WEIGHT_TYPE} + @staticmethod + def getWhisperComputeType(*args, **kwargs) -> dict: + return {"status":200, "result":config.WHISPER_COMPUTE_TYPE} + + @staticmethod + def setWhisperComputeType(data, *args, **kwargs) -> dict: + config.WHISPER_COMPUTE_TYPE = str(data) + return {"status":200, "result":config.WHISPER_COMPUTE_TYPE} + @staticmethod def getSendMessageFormatParts(*args, **kwargs) -> dict: return {"status":200, "result":config.SEND_MESSAGE_FORMAT_PARTS} diff --git a/src-python/mainloop.py b/src-python/mainloop.py index 0010b98a..2ad6e078 100644 --- a/src-python/mainloop.py +++ b/src-python/mainloop.py @@ -162,6 +162,9 @@ mapping = { "/get/data/ctranslate2_weight_type": {"status": True, "variable":controller.getCtranslate2WeightType}, "/set/data/ctranslate2_weight_type": {"status": True, "variable":controller.setCtranslate2WeightType}, + "/get/data/ctranslate2_compute_type": {"status": True, "variable":controller.getCtranslateComputeType}, + "/set/data/ctranslate2_compute_type": {"status": True, "variable":controller.setCtranslateComputeType}, + "/run/download_ctranslate2_weight": {"status": True, "variable":controller.downloadCtranslate2Weight}, "/get/data/deepl_auth_key": {"status": False, "variable":controller.getDeepLAuthKey}, @@ -261,8 +264,13 @@ mapping = { "/set/disable/check_speaker_threshold": {"status": True, "variable":controller.setDisableCheckSpeakerThreshold}, "/get/data/selectable_whisper_weight_type_dict": {"status": True, "variable":controller.getSelectableWhisperWeightTypeDict}, + "/get/data/whisper_weight_type": {"status": True, "variable":controller.getWhisperWeightType}, "/set/data/whisper_weight_type": {"status": True, "variable":controller.setWhisperWeightType}, + + "/get/data/whisper_compute_type": {"status": True, "variable":controller.getWhisperComputeType}, + "/set/data/whisper_compute_type": {"status": True, "variable":controller.setWhisperComputeType}, + "/run/download_whisper_weight": {"status": True, "variable":controller.downloadWhisperWeight}, # VR diff --git a/src-python/model.py b/src-python/model.py index 333f1394..445b0a5e 100644 --- a/src-python/model.py +++ b/src-python/model.py @@ -112,10 +112,12 @@ class Model: def changeTranslatorCTranslate2Model(self): self.translator.changeCTranslate2Model( - config.PATH_LOCAL, - config.CTRANSLATE2_WEIGHT_TYPE, - config.SELECTED_TRANSLATION_COMPUTE_DEVICE["device"], - config.SELECTED_TRANSLATION_COMPUTE_DEVICE["device_index"]) + path=config.PATH_LOCAL, + model_type=config.CTRANSLATE2_WEIGHT_TYPE, + device=config.SELECTED_TRANSLATION_COMPUTE_DEVICE["device"], + device_index=config.SELECTED_TRANSLATION_COMPUTE_DEVICE["device_index"], + compute_type=config.CTRANSLATE2_COMPUTE_TYPE + ) def downloadCTranslate2ModelWeight(self, weight_type, callback=None, end_callback=None): return downloadCTranslate2Weight(config.PATH_LOCAL, weight_type, callback, end_callback) @@ -438,6 +440,7 @@ class Model: whisper_weight_type=config.WHISPER_WEIGHT_TYPE, device=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device"], device_index=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device_index"], + compute_type=config.WHISPER_COMPUTE_TYPE, ) def sendMicTranscript(): try: @@ -621,6 +624,7 @@ class Model: whisper_weight_type=config.WHISPER_WEIGHT_TYPE, device=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device"], device_index=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device_index"], + compute_type=config.WHISPER_COMPUTE_TYPE, ) def sendSpeakerTranscript(): try: diff --git a/src-python/models/transcription/transcription_transcriber.py b/src-python/models/transcription/transcription_transcriber.py index 5407253a..9d874b30 100644 --- a/src-python/models/transcription/transcription_transcriber.py +++ b/src-python/models/transcription/transcription_transcriber.py @@ -21,7 +21,7 @@ PHRASE_TIMEOUT = 3 MAX_PHRASES = 10 class AudioTranscriber: - def __init__(self, speaker, source, phrase_timeout, max_phrases, transcription_engine, root=None, whisper_weight_type=None, device="cpu", device_index=0): + def __init__(self, speaker, source, phrase_timeout, max_phrases, transcription_engine, root=None, whisper_weight_type=None, device="cpu", device_index=0, compute_type="auto"): self.speaker = speaker self.phrase_timeout = phrase_timeout self.max_phrases = max_phrases @@ -41,7 +41,7 @@ class AudioTranscriber: } if transcription_engine == "Whisper" and checkWhisperWeight(root, whisper_weight_type) is True: - self.whisper_model = getWhisperModel(root, whisper_weight_type, device=device, device_index=device_index) + self.whisper_model = getWhisperModel(root, whisper_weight_type, device=device, device_index=device_index, compute_type=compute_type) self.transcription_engine = "Whisper" def transcribeAudioQueue(self, audio_queue, languages, countries, avg_logprob=-0.8, no_speech_prob=0.6): diff --git a/src-python/models/transcription/transcription_whisper.py b/src-python/models/transcription/transcription_whisper.py index 04f89626..5f61a121 100644 --- a/src-python/models/transcription/transcription_whisper.py +++ b/src-python/models/transcription/transcription_whisper.py @@ -74,9 +74,10 @@ def downloadWhisperWeight(root, weight_type, callback=None, end_callback=None): if isinstance(end_callback, Callable): end_callback() -def getWhisperModel(root, weight_type, device="cpu", device_index=0): +def getWhisperModel(root, weight_type, device="cpu", device_index=0, compute_type="auto"): path = os_path.join(root, "weights", "whisper", weight_type) - compute_type = getBestComputeType(device, device_index) + if compute_type == "auto": + compute_type = getBestComputeType(device, device_index) try: model = WhisperModel( path, diff --git a/src-python/models/translation/translation_translator.py b/src-python/models/translation/translation_translator.py index 42eb828e..897fcd1b 100644 --- a/src-python/models/translation/translation_translator.py +++ b/src-python/models/translation/translation_translator.py @@ -36,14 +36,15 @@ class Translator(): result = False return result - def changeCTranslate2Model(self, path, model_type, device="cpu", device_index=0): + def changeCTranslate2Model(self, path, model_type, device="cpu", device_index=0, compute_type="auto"): self.is_loaded_ctranslate2_model = False directory_name = ctranslate2_weights[model_type]["directory_name"] tokenizer = ctranslate2_weights[model_type]["tokenizer"] weight_path = os_path.join(path, "weights", "ctranslate2", directory_name) tokenizer_path = os_path.join(path, "weights", "ctranslate2", directory_name, "tokenizer") - compute_type = getBestComputeType(device, device_index) + if compute_type == "auto": + compute_type = getBestComputeType(device, device_index) self.ctranslate2_translator = ctranslate2.Translator( weight_path, device=device, diff --git a/src-python/utils.py b/src-python/utils.py index c3a857f2..1b28fcf6 100644 --- a/src-python/utils.py +++ b/src-python/utils.py @@ -78,10 +78,13 @@ def isValidIpAddress(ip_address: str) -> bool: except ValueError: return False +def getComputeTypeList() -> list: + return ["int8_bfloat16", "int8_float16", "int8", "bfloat16", "float16", "int8_float32", "float32"] + def getBestComputeType(device, device_index) -> str: compute_types = get_supported_compute_types(device, device_index) compute_types = set(compute_types) - preferred_types = ["int8_bfloat16", "int8_float16", "int8", "bfloat16", "float16", "int8_float32", "float32"] + preferred_types = getComputeTypeList() for preferred_type in preferred_types: if preferred_type in compute_types: From 5f0f9186422c887040a60c9bb66873cc7fa6ade2 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:52:56 +0900 Subject: [PATCH 18/92] [Update] Rename and add methods for CTranslate2 compute type management in Controller --- src-python/controller.py | 8 ++++++-- src-python/mainloop.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index afa9d266..77717918 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -1456,11 +1456,15 @@ class Controller: return {"status":200, "result":config.CTRANSLATE2_WEIGHT_TYPE} @staticmethod - def getCtranslateComputeType(*args, **kwargs) -> dict: + def getCtranslate2ComputeTypeList(*args, **kwargs) -> dict: + return {"status":200, "result":config.SELECTABLE_CTRANSLATE2_COMPUTE_TYPE_LIST} + + @staticmethod + def getCtranslate2ComputeType(*args, **kwargs) -> dict: return {"status":200, "result":config.CTRANSLATE2_COMPUTE_TYPE} @staticmethod - def setCtranslateComputeType(data, *args, **kwargs) -> dict: + def setCtranslate2ComputeType(data, *args, **kwargs) -> dict: config.CTRANSLATE2_COMPUTE_TYPE = str(data) if model.checkTranslatorCTranslate2ModelWeight(config.CTRANSLATE2_WEIGHT_TYPE): def callback(): diff --git a/src-python/mainloop.py b/src-python/mainloop.py index 2ad6e078..c7b03ea6 100644 --- a/src-python/mainloop.py +++ b/src-python/mainloop.py @@ -162,8 +162,10 @@ mapping = { "/get/data/ctranslate2_weight_type": {"status": True, "variable":controller.getCtranslate2WeightType}, "/set/data/ctranslate2_weight_type": {"status": True, "variable":controller.setCtranslate2WeightType}, - "/get/data/ctranslate2_compute_type": {"status": True, "variable":controller.getCtranslateComputeType}, - "/set/data/ctranslate2_compute_type": {"status": True, "variable":controller.setCtranslateComputeType}, + "/get/data/ctranslate2_compute_type_list": {"status": True, "variable":controller.getCtranslate2ComputeTypeList}, + + "/get/data/ctranslate2_compute_type": {"status": True, "variable":controller.getCtranslate2ComputeType}, + "/set/data/ctranslate2_compute_type": {"status": True, "variable":controller.setCtranslate2ComputeType}, "/run/download_ctranslate2_weight": {"status": True, "variable":controller.downloadCtranslate2Weight}, @@ -268,6 +270,8 @@ mapping = { "/get/data/whisper_weight_type": {"status": True, "variable":controller.getWhisperWeightType}, "/set/data/whisper_weight_type": {"status": True, "variable":controller.setWhisperWeightType}, + "/get/data/whisper_compute_type_list": {"status": True, "variable":controller.getWhisperComputeTypeList}, + "/get/data/whisper_compute_type": {"status": True, "variable":controller.getWhisperComputeType}, "/set/data/whisper_compute_type": {"status": True, "variable":controller.setWhisperComputeType}, From fad01f6f900b8ec998cb1fdf4fbba73726fcd847 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:26:44 +0900 Subject: [PATCH 19/92] =?UTF-8?q?[Change]=20=E3=83=95=E3=82=A1=E3=82=A4?= =?UTF-8?q?=E3=83=AB=E5=90=8D=E3=82=92=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ..._kana_to_hepburn.py => transliteration_kana_to_hepburn.py} | 0 ...te_transliterator.py => transliteration_transliterator.py} | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename src-python/models/transliterate/{transliterate_kana_to_hepburn.py => transliteration_kana_to_hepburn.py} (100%) rename src-python/models/transliterate/{transliterate_transliterator.py => transliteration_transliterator.py} (98%) diff --git a/src-python/models/transliterate/transliterate_kana_to_hepburn.py b/src-python/models/transliterate/transliteration_kana_to_hepburn.py similarity index 100% rename from src-python/models/transliterate/transliterate_kana_to_hepburn.py rename to src-python/models/transliterate/transliteration_kana_to_hepburn.py diff --git a/src-python/models/transliterate/transliterate_transliterator.py b/src-python/models/transliterate/transliteration_transliterator.py similarity index 98% rename from src-python/models/transliterate/transliterate_transliterator.py rename to src-python/models/transliterate/transliteration_transliterator.py index 9472bdee..f2a9780f 100644 --- a/src-python/models/transliterate/transliterate_transliterator.py +++ b/src-python/models/transliterate/transliteration_transliterator.py @@ -1,9 +1,9 @@ from sudachipy import tokenizer from sudachipy import dictionary try: - from .transliterate_kana_to_hepburn import katakana_to_hepburn + from .transliteration_kana_to_hepburn import katakana_to_hepburn except ImportError: - from transliterate_kana_to_hepburn import katakana_to_hepburn + from transliteration_kana_to_hepburn import katakana_to_hepburn class Transliterator: def __init__(self): From 3d34b507933b019026e37e74905e5af99c51ea0a Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 17 Sep 2025 12:35:14 +0900 Subject: [PATCH 20/92] =?UTF-8?q?Config=E3=82=AF=E3=83=A9=E3=82=B9?= =?UTF-8?q?=E3=81=ABSELECTED=5FTAB=5FTARGET=5FLANGUAGES=5FNO=5FLIST?= =?UTF-8?q?=E3=83=97=E3=83=AD=E3=83=91=E3=83=86=E3=82=A3=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0=E3=81=97=E3=80=81=E3=83=A1=E3=83=83=E3=82=BB=E3=83=BC?= =?UTF-8?q?=E3=82=B8=E3=81=AE=E5=A4=89=E6=8F=9B=E5=87=A6=E7=90=86=E3=82=92?= =?UTF-8?q?=E6=94=B9=E5=96=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/config.py | 31 +++++------ src-python/controller.py | 110 +++++++++++++++++++++++++++++++-------- 2 files changed, 101 insertions(+), 40 deletions(-) diff --git a/src-python/config.py b/src-python/config.py index c544727d..d99114cc 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -107,6 +107,10 @@ class Config: def SELECTABLE_TAB_NO_LIST(self): return self._SELECTABLE_TAB_NO_LIST + @property + def SELECTED_TAB_TARGET_LANGUAGES_NO_LIST(self): + return self._SELECTED_TAB_TARGET_LANGUAGES_NO_LIST + @property def SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_LIST(self): return self._SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_LIST @@ -1115,24 +1119,17 @@ class Config: }, } self._SELECTED_TARGET_LANGUAGES = {} + self._SELECTED_TAB_TARGET_LANGUAGES_NO_LIST = ["1", "2", "3"] for tab_no in self.SELECTABLE_TAB_NO_LIST: - self._SELECTED_TARGET_LANGUAGES[tab_no] = { - "1": { - "language": "English", - "country": "United States", - "enable": True, - }, - "2": { - "language": "English", - "country": "United States", - "enable": False, - }, - "3": { - "language": "English", - "country": "United States", - "enable": False, - }, - } + for tab_target_lang_no in self.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST: + if tab_no not in self._SELECTED_TARGET_LANGUAGES: + self.SELECTED_TARGET_LANGUAGES[tab_no] = {} + if tab_target_lang_no not in self._SELECTED_TARGET_LANGUAGES[tab_no]: + self.SELECTED_TARGET_LANGUAGES[tab_no][tab_target_lang_no] = { + "language": "English", + "country": "United States", + "enable": True, + } self._SELECTED_TRANSCRIPTION_ENGINE = "Google" self._CONVERT_MESSAGE_TO_ROMAJI = False self._CONVERT_MESSAGE_TO_HIRAGANA = False diff --git a/src-python/controller.py b/src-python/controller.py index 28c30734..82dc6943 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -298,14 +298,28 @@ class Controller: # その他のエラーは通常通り処理 raise + transliteration_message = [] + transliteration_translation = [] if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: - if config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": - transliteration = model.convertMessageToTransliteration( - translation[0], + if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": + transliteration_message = model.convertMessageToTransliteration( + message, hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, romaji=config.CONVERT_MESSAGE_TO_ROMAJI ) + for i, no in enumerate(config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST): + if config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["language"] == "Japanese": + transliteration_translation.append( + model.convertMessageToTransliteration( + translation[i], + hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, + romaji=config.CONVERT_MESSAGE_TO_ROMAJI + ) + ) + else: + transliteration_translation.append([]) + if config.ENABLE_TRANSCRIPTION_SEND is True: if config.SEND_MESSAGE_TO_VRC is True: if config.SEND_ONLY_TRANSLATED_MESSAGES is True: @@ -321,9 +335,16 @@ class Controller: 200, self.run_mapping["transcription_mic"], { - "message":message, - "translation":translation, - "transliteration":transliteration + "original": { + "message": message, + "transliteration": transliteration_message + }, + "translations": [ + { + "message": translation_message, + "transliteration": transliteration + } for translation_message, transliteration in zip(translation, transliteration_translation) + ] }) if config.OVERLAY_LARGE_LOG is True and model.overlay.initialized is True: @@ -429,14 +450,24 @@ class Controller: # その他のエラーは通常通り処理 raise + transliteration_message = [] + transliteration_translation = [] if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: - if config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": - transliteration = model.convertMessageToTransliteration( + if language == "Japanese": + transliteration_message = model.convertMessageToTransliteration( message, hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, romaji=config.CONVERT_MESSAGE_TO_ROMAJI ) + transliteration_translation = [] + if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": + transliteration_translation = model.convertMessageToTransliteration( + translation[0], + hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, + romaji=config.CONVERT_MESSAGE_TO_ROMAJI + ) + if config.ENABLE_TRANSCRIPTION_RECEIVE is True: if config.OVERLAY_SMALL_LOG is True and model.overlay.initialized is True: if config.OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES is True: @@ -492,9 +523,16 @@ class Controller: 200, self.run_mapping["transcription_speaker"], { - "message":message, - "translation":translation, - "transliteration":transliteration, + "original": { + "message": message, + "transliteration": transliteration_message + }, + "translations": [ + { + "message": translation_message, + "transliteration": transliteration + } for translation_message, transliteration in zip(translation, transliteration_translation) + ] }) if model.checkWebSocketServerAlive() is True: @@ -570,22 +608,41 @@ class Controller: "result": { "id":id, - "message":message, - "translation":[], - "transliteration":[], + "original": { + "message":message, + "transliteration":[] }, - } + "translations": [ + { + "message": "", + "transliteration": [] + } for _ in config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST + ] + }, + } else: # その他のエラーは通常通り処理 raise + transliteration_message = [] + transliteration_translation = [] if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: - if config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": - transliteration = model.convertMessageToTransliteration( - translation[0], + if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": + transliteration_message = model.convertMessageToTransliteration( + message, hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, romaji=config.CONVERT_MESSAGE_TO_ROMAJI ) + for i, no in enumerate(config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST): + if config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["language"] == "Japanese": + transliteration = model.convertMessageToTransliteration( + translation[i], + hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, + romaji=config.CONVERT_MESSAGE_TO_ROMAJI + ) + transliteration_translation.append(transliteration) + else: + transliteration_translation.append([]) # send OSC message if config.SEND_MESSAGE_TO_VRC is True: @@ -635,14 +692,21 @@ class Controller: translation_text = f" ({'/'.join(translation)})" if translation else "" model.logger.info(f"[CHAT] {message}{translation_text}") - return {"status":200, + return { + "status":200, "result":{ "id":id, - "message":message, - "translation":translation, - "transliteration":transliteration, + "original": { + "message":message, + "transliteration":transliteration_message }, - } + "translations": [ + { + "message": translation_message, + "transliteration": transliteration + } for translation_message, transliteration in zip(translation, transliteration_translation) + ] + }} @staticmethod def getVersion(*args, **kwargs) -> dict: From 4617954928d4580208ba253be7a674a1f245638f Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:09:36 +0900 Subject: [PATCH 21/92] [Add] Implement Transliterator class and katakana to Hepburn conversion function --- src-python/model.py | 2 +- .../transliteration_kana_to_hepburn.py | 0 .../transliteration_transliterator.py | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename src-python/models/{transliterate => transliteration}/transliteration_kana_to_hepburn.py (100%) rename src-python/models/{transliterate => transliteration}/transliteration_transliterator.py (100%) diff --git a/src-python/model.py b/src-python/model.py index 797f7bee..236593a4 100644 --- a/src-python/model.py +++ b/src-python/model.py @@ -27,7 +27,7 @@ from models.translation.translation_languages import translation_lang from models.transcription.transcription_languages import transcription_lang from models.translation.translation_utils import checkCTranslate2Weight, downloadCTranslate2Weight, downloadCTranslate2Tokenizer from models.transcription.transcription_whisper import checkWhisperWeight, downloadWhisperWeight -from models.transliterate.transliterate_transliterator import Transliterator +from models.transliteration.transliteration_transliterator import Transliterator from models.overlay.overlay import Overlay from models.overlay.overlay_image import OverlayImage from models.watchdog.watchdog import Watchdog diff --git a/src-python/models/transliterate/transliteration_kana_to_hepburn.py b/src-python/models/transliteration/transliteration_kana_to_hepburn.py similarity index 100% rename from src-python/models/transliterate/transliteration_kana_to_hepburn.py rename to src-python/models/transliteration/transliteration_kana_to_hepburn.py diff --git a/src-python/models/transliterate/transliteration_transliterator.py b/src-python/models/transliteration/transliteration_transliterator.py similarity index 100% rename from src-python/models/transliterate/transliteration_transliterator.py rename to src-python/models/transliteration/transliteration_transliterator.py From 5ed0d555b928f986d633d81c4189f7726161e06b Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 17 Sep 2025 15:28:45 +0900 Subject: [PATCH 22/92] =?UTF-8?q?Transliteration=E5=A4=89=E6=95=B0?= =?UTF-8?q?=E3=81=AE=E5=90=8D=E7=A7=B0=E3=82=92=E5=A4=89=E6=9B=B4=E3=81=97?= =?UTF-8?q?=E3=80=81=E3=83=A1=E3=83=83=E3=82=BB=E3=83=BC=E3=82=B8=E5=87=A6?= =?UTF-8?q?=E7=90=86=E3=81=AE=E4=B8=80=E8=B2=AB=E6=80=A7=E3=82=92=E5=90=91?= =?UTF-8?q?=E4=B8=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/controller.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index 82dc6943..dff9b2e4 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -246,7 +246,8 @@ class Controller: elif isinstance(message, str) and len(message) > 0: translation = [] - transliteration = [] + transliteration_message = [] + transliteration_translation = [] if model.checkKeywords(message): self.run( 200, @@ -298,8 +299,6 @@ class Controller: # その他のエラーは通常通り処理 raise - transliteration_message = [] - transliteration_translation = [] if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": transliteration_message = model.convertMessageToTransliteration( @@ -376,7 +375,7 @@ class Controller: "dst_languages":config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO], "message":message, "translation":translation, - "transliteration":transliteration + "transliteration":transliteration_translation } ) @@ -398,7 +397,8 @@ class Controller: ) elif isinstance(message, str) and len(message) > 0: translation = [] - transliteration = [] + transliteration_message = [] + transliteration_translation = [] if model.checkKeywords(message): self.run( 200, @@ -450,8 +450,6 @@ class Controller: # その他のエラーは通常通り処理 raise - transliteration_message = [] - transliteration_translation = [] if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: if language == "Japanese": transliteration_message = model.convertMessageToTransliteration( @@ -460,7 +458,6 @@ class Controller: romaji=config.CONVERT_MESSAGE_TO_ROMAJI ) - transliteration_translation = [] if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": transliteration_translation = model.convertMessageToTransliteration( translation[0], @@ -543,7 +540,7 @@ class Controller: "dst_languages":config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO], "message":message, "translation":translation, - "transliteration":transliteration + "transliteration":transliteration_translation } ) @@ -556,7 +553,8 @@ class Controller: message = data["message"] if len(message) > 0: translation = [] - transliteration = [] + transliteration_message = [] + transliteration_translation = [] if config.ENABLE_TRANSLATION is False: pass else: @@ -624,8 +622,6 @@ class Controller: # その他のエラーは通常通り処理 raise - transliteration_message = [] - transliteration_translation = [] if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": transliteration_message = model.convertMessageToTransliteration( @@ -684,7 +680,7 @@ class Controller: "dst_languages":config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO], "message":message, "translation":translation, - "transliteration":transliteration + "transliteration":transliteration_translation } ) From c9dd35256567926dd682fb90fdc00d544cb27ec4 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 17 Sep 2025 17:05:59 +0900 Subject: [PATCH 23/92] =?UTF-8?q?=E3=83=A1=E3=83=83=E3=82=BB=E3=83=BC?= =?UTF-8?q?=E3=82=B8=E3=81=AE=E5=A4=89=E6=8F=9B=E5=87=A6=E7=90=86=E3=82=92?= =?UTF-8?q?=E6=94=B9=E5=96=84=E3=81=97=E3=80=81=E6=97=A5=E6=9C=AC=E8=AA=9E?= =?UTF-8?q?=E3=81=AE=E3=83=88=E3=83=A9=E3=83=B3=E3=82=B9=E3=83=AA=E3=83=86?= =?UTF-8?q?=E3=83=AC=E3=83=BC=E3=82=B7=E3=83=A7=E3=83=B3=E3=81=AE=E6=9D=A1?= =?UTF-8?q?=E4=BB=B6=E3=82=92=E6=95=B4=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/controller.py | 99 ++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 44 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index dff9b2e4..bbaf0a70 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -299,25 +299,28 @@ class Controller: # その他のエラーは通常通り処理 raise - if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: - if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": - transliteration_message = model.convertMessageToTransliteration( - message, - hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, - romaji=config.CONVERT_MESSAGE_TO_ROMAJI - ) + if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: + if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": + transliteration_message = model.convertMessageToTransliteration( + message, + hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, + romaji=config.CONVERT_MESSAGE_TO_ROMAJI + ) - for i, no in enumerate(config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST): - if config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["language"] == "Japanese": - transliteration_translation.append( - model.convertMessageToTransliteration( - translation[i], - hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, - romaji=config.CONVERT_MESSAGE_TO_ROMAJI - ) + for i, no in enumerate(config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST): + if (config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["language"] == "Japanese" and + config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["enable"] is True): + transliteration_translation.append( + model.convertMessageToTransliteration( + translation[i], + hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, + romaji=config.CONVERT_MESSAGE_TO_ROMAJI ) - else: - transliteration_translation.append([]) + ) + else: + transliteration_translation.append([]) + else: + transliteration_translation = [[] for _ in config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST] if config.ENABLE_TRANSCRIPTION_SEND is True: if config.SEND_MESSAGE_TO_VRC is True: @@ -450,21 +453,26 @@ class Controller: # その他のエラーは通常通り処理 raise - if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: - if language == "Japanese": - transliteration_message = model.convertMessageToTransliteration( - message, - hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, - romaji=config.CONVERT_MESSAGE_TO_ROMAJI - ) - - if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": - transliteration_translation = model.convertMessageToTransliteration( - translation[0], - hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, - romaji=config.CONVERT_MESSAGE_TO_ROMAJI + if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: + if language == "Japanese": + transliteration_message = model.convertMessageToTransliteration( + message, + hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, + romaji=config.CONVERT_MESSAGE_TO_ROMAJI ) + if (config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese" and + config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["enable"] is True): + transliteration_translation = model.convertMessageToTransliteration( + translation[0], + hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, + romaji=config.CONVERT_MESSAGE_TO_ROMAJI + ) + else: + transliteration_translation.append([]) + else: + transliteration_translation = [[] for _ in config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST] + if config.ENABLE_TRANSCRIPTION_RECEIVE is True: if config.OVERLAY_SMALL_LOG is True and model.overlay.initialized is True: if config.OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES is True: @@ -622,23 +630,26 @@ class Controller: # その他のエラーは通常通り処理 raise - if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: - if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": - transliteration_message = model.convertMessageToTransliteration( - message, + if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True: + if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": + transliteration_message = model.convertMessageToTransliteration( + message, + hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, + romaji=config.CONVERT_MESSAGE_TO_ROMAJI + ) + for i, no in enumerate(config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST): + if (config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["language"] == "Japanese" and + config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["enable"] is True): + transliteration = model.convertMessageToTransliteration( + translation[i], hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, romaji=config.CONVERT_MESSAGE_TO_ROMAJI ) - for i, no in enumerate(config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST): - if config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["language"] == "Japanese": - transliteration = model.convertMessageToTransliteration( - translation[i], - hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, - romaji=config.CONVERT_MESSAGE_TO_ROMAJI - ) - transliteration_translation.append(transliteration) - else: - transliteration_translation.append([]) + transliteration_translation.append(transliteration) + else: + transliteration_translation.append([]) + else: + transliteration_translation = [[] for _ in config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST] # send OSC message if config.SEND_MESSAGE_TO_VRC is True: From 396d5d7d886196e35da74f03bcef24632b81308a Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 17 Sep 2025 17:52:12 +0900 Subject: [PATCH 24/92] =?UTF-8?q?=E3=83=A1=E3=83=83=E3=82=BB=E3=83=BC?= =?UTF-8?q?=E3=82=B8=E3=81=AE=E3=83=88=E3=83=A9=E3=83=B3=E3=82=B9=E3=83=AA?= =?UTF-8?q?=E3=83=86=E3=83=AC=E3=83=BC=E3=82=B7=E3=83=A7=E3=83=B3=E5=87=A6?= =?UTF-8?q?=E7=90=86=E3=82=92=E6=94=B9=E5=96=84=E3=81=97=E3=80=81=E6=97=A5?= =?UTF-8?q?=E6=9C=AC=E8=AA=9E=E3=81=AE=E6=9D=A1=E4=BB=B6=E3=82=92=E7=B0=A1?= =?UTF-8?q?=E7=B4=A0=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/controller.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index bbaf0a70..7f43fc9b 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -461,17 +461,18 @@ class Controller: romaji=config.CONVERT_MESSAGE_TO_ROMAJI ) - if (config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese" and - config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["enable"] is True): - transliteration_translation = model.convertMessageToTransliteration( - translation[0], - hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, - romaji=config.CONVERT_MESSAGE_TO_ROMAJI + if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": + transliteration_translation.append( + model.convertMessageToTransliteration( + translation[0], + hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, + romaji=config.CONVERT_MESSAGE_TO_ROMAJI + ) ) else: transliteration_translation.append([]) else: - transliteration_translation = [[] for _ in config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST] + transliteration_translation = [[]] if config.ENABLE_TRANSCRIPTION_RECEIVE is True: if config.OVERLAY_SMALL_LOG is True and model.overlay.initialized is True: @@ -640,12 +641,13 @@ class Controller: for i, no in enumerate(config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST): if (config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["language"] == "Japanese" and config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["enable"] is True): - transliteration = model.convertMessageToTransliteration( - translation[i], - hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, - romaji=config.CONVERT_MESSAGE_TO_ROMAJI + transliteration_translation.append( + model.convertMessageToTransliteration( + translation[i], + hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA, + romaji=config.CONVERT_MESSAGE_TO_ROMAJI + ) ) - transliteration_translation.append(transliteration) else: transliteration_translation.append([]) else: From 9cf4a378f134e96e1dee6ef1e792deda13b0e785 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Wed, 17 Sep 2025 18:07:10 +0900 Subject: [PATCH 25/92] =?UTF-8?q?[Update]=20UI:=20=E3=83=AD=E3=83=BC?= =?UTF-8?q?=E3=83=9E=E5=AD=97/=E3=81=8B=E3=81=AA=20=E8=A1=A8=E7=A4=BA?= =?UTF-8?q?=E3=81=99=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- locales/en.yml | 7 ++ locales/ja.yml | 6 ++ .../setting_box/others/Others.jsx | 35 ++++++++ .../message_container/MessageContainer.jsx | 79 ++++++++++++++++--- src-ui/logics/common/useMessage.js | 28 ++++--- src-ui/logics/configs/others/useOthers.js | 60 ++++++++++++++ src-ui/logics/useReceiveRoutes.js | 10 ++- src-ui/store.js | 2 + 8 files changed, 205 insertions(+), 22 deletions(-) diff --git a/locales/en.yml b/locales/en.yml index 1faf9639..5dabb4b2 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -250,6 +250,13 @@ config_page: received_message_format: label: "Message Format (Speaker2Chatbox)" desc: "Currently, it is used in Speaker2Chatbox." + convert_message_to_romaji: + label: Show Romaji + desc: Supported only when Japanese is selected as the translation language. When enabled along with '{{convert_message_to_hiragana}}', romaji will be shown on mouse hover. + convert_message_to_hiragana: + label: Show Hiragana + desc: Supported only when Japanese is selected as the translation language. + hotkeys: toggle_vrct_visibility: diff --git a/locales/ja.yml b/locales/ja.yml index d3da554e..b4097a9a 100644 --- a/locales/ja.yml +++ b/locales/ja.yml @@ -250,6 +250,12 @@ config_page: received_message_format: label: メッセージフォーマット(Speaker2Chatbox) desc: 今のところ、Speaker2Chatboxで送信した時の表示に使われます。 + convert_message_to_romaji: + label: ローマ字を表示 + desc: 翻訳言語として日本語を選択した時のみサポート。「{{convert_message_to_hiragana}}」と同時に有効にした場合は、マウスホバーで表示されます。 + convert_message_to_hiragana: + label: ひらがなを表示 + desc: 翻訳言語として日本語を選択した時のみサポート。 hotkeys: toggle_vrct_visibility: diff --git a/src-ui/app/config_page/setting_section/setting_box/others/Others.jsx b/src-ui/app/config_page/setting_section/setting_box/others/Others.jsx index 65027ef3..5ef39ce0 100644 --- a/src-ui/app/config_page/setting_section/setting_box/others/Others.jsx +++ b/src-ui/app/config_page/setting_section/setting_box/others/Others.jsx @@ -45,6 +45,10 @@ export const Others = () => { +
+ + +
); }; @@ -201,4 +205,35 @@ const ReceivedMessageFormatPartsContainer = () => { format_id="received" /> ); +}; + +const ConvertMessageToRomajiContainer = () => { + const { t } = useI18n(); + const { currentConvertMessageToRomaji, toggleConvertMessageToRomaji } = useOthers(); + + return ( + + ); +}; + +const ConvertMessageToHiraganaContainer = () => { + const { t } = useI18n(); + const { currentConvertMessageToHiragana, toggleConvertMessageToHiragana } = useOthers(); + + return ( + + ); }; \ No newline at end of file diff --git a/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx b/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx index d982f27c..cd36754d 100644 --- a/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx +++ b/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx @@ -39,7 +39,7 @@ export const MessageContainer = ({ messages, status, category, created_at }) => setIsLocked(true); }; - const is_translated_exist = messages.translated?.length >= 1; + const is_translation_exist = messages.translations?.length > 0; const is_pending = status === "pending"; const is_sent_message = category === "sent"; const is_system_message = category === "system"; @@ -69,11 +69,11 @@ export const MessageContainer = ({ messages, status, category, created_at }) =>
{is_system_message ? ( -

{messages.message}

- ) : is_translated_exist ? ( +

{messages.original.message}

+ ) : is_translation_exist ? ( ) : ( -

{messages.original}

+ )}
@@ -88,13 +88,74 @@ export const MessageContainer = ({ messages, status, category, created_at }) => ); }; -const WithTranslatedMessages = ({ messages }) => { - const translated_data = Array.isArray(messages.translated) ? messages.translated : [messages.translated]; +const MessageWithTransliteration = ({ item }) => { + const renderTokenNode = (token, key) => { + const orig = token.orig ?? ""; + const hira = token.hira ?? ""; + const hepburn = token.hepburn ?? ""; + + if ((hira && orig === hira) || (hepburn && orig === hepburn) || (!hira && !hepburn)) { + return ( + + {orig} + + ); + } + + if (hira && hira !== orig) { + const needHepburn = hepburn && hepburn !== orig; + const titleAttr = needHepburn ? hepburn : undefined; + return ( + + {orig} + {hira} + + ); + } + + if (hepburn && hepburn !== orig) { + return ( + + {orig} + {hepburn} + + ); + } + + return ( + + {orig} + + ); + }; + + if (!item.transliteration.length) { + return

{item.message}

; + } + + return ( +

+ {item.transliteration.map((token, idx) => renderTokenNode(token, idx))} +

+ ); +}; + +const OriginalMessage = ({ messages }) => { return ( <> -

{messages.original}

- {translated_data.map((message, index) => ( -

{message}

+ + + ); +}; + +const WithTranslatedMessages = ({ messages }) => { + return ( + <> +

{messages.original.message}

+ {messages.translations.map((item, idx) => ( +
+ +
))} ); diff --git a/src-ui/logics/common/useMessage.js b/src-ui/logics/common/useMessage.js index 78986b55..1b50bbbd 100644 --- a/src-ui/logics/common/useMessage.js +++ b/src-ui/logics/common/useMessage.js @@ -24,8 +24,8 @@ export const useMessage = () => { status: "pending", created_at: generateTimeData(), messages: { - original: message, - translated: [], + original: { message: message, transliteration: [] }, + translations: [], }, }); }; @@ -39,20 +39,26 @@ export const useMessage = () => { category: "system", status: "system", created_at: date, - messages: {message: message}, + messages: { + original: { message: message, transliteration: [] }, + translations: [], + }, }); }; + const addSystemMessageLog_FromBackend = (payload) => { addSystemMessageLog(payload.message); }; const updateSentMessageLogById = (payload) => { - updateMessageLogs(updateItemById(payload.id, payload.translation)); + updateMessageLogs(updateItemById(payload.id, payload)); }; + const addSentMessageLog = (payload) => { const message_object = generateMessageObject(payload, "sent"); addMessageLogs(message_object); }; + const addReceivedMessageLog = (payload) => { const message_object = generateMessageObject(payload, "received"); addMessageLogs(message_object); @@ -61,6 +67,7 @@ export const useMessage = () => { const startTyping = () => { asyncStdoutToPython("/run/typing_message_box"); }; + const stopTyping = () => { asyncStdoutToPython("/run/stop_typing_message_box"); }; @@ -83,11 +90,10 @@ export const useMessage = () => { }; const generateTimeData = () => { - const data = new Date().toLocaleTimeString( + return new Date().toLocaleTimeString( "ja-JP", - { hour12: false, hour: "2-digit", minute: "2-digit" }, + { hour12: false, hour: "2-digit", minute: "2-digit" } ); - return data; }; const generateMessageObject = (data, category) => { @@ -97,17 +103,17 @@ const generateMessageObject = (data, category) => { category: category, status: "ok", messages: { - original: data.message, - translated: data.translation, + original: data.original, + translations: data.translations ?? [], }, }; }; -const updateItemById = (id, translated_data) => (current_items) => { +const updateItemById = (id, updated_data) => (current_items) => { return current_items.data.map(item => { if (item.id === id) { item.status = "ok"; - item.messages.translated = translated_data; + if (updated_data.translations) item.messages.translations = updated_data.translations; } return item; }); diff --git a/src-ui/logics/configs/others/useOthers.js b/src-ui/logics/configs/others/useOthers.js index c2b0179f..855ab24e 100644 --- a/src-ui/logics/configs/others/useOthers.js +++ b/src-ui/logics/configs/others/useOthers.js @@ -9,6 +9,8 @@ import { useStore_MessageFormat_ExampleViewFilter, useStore_SendMessageFormatParts, useStore_ReceivedMessageFormatParts, + useStore_ConvertMessageToRomaji, + useStore_ConvertMessageToHiragana, } from "@store"; import { useStdoutToPython } from "@useStdoutToPython"; import { useNotificationStatus } from "@logics_common"; @@ -39,6 +41,11 @@ export const useOthers = () => { // Received const { currentReceivedMessageFormatParts, updateReceivedMessageFormatParts, pendingReceivedMessageFormatParts } = useStore_ReceivedMessageFormatParts(); + // Convert Message To Romaji + const { currentConvertMessageToRomaji, updateConvertMessageToRomaji, pendingConvertMessageToRomaji } = useStore_ConvertMessageToRomaji(); + // Convert Message To Hiragana + const { currentConvertMessageToHiragana, updateConvertMessageToHiragana, pendingConvertMessageToHiragana } = useStore_ConvertMessageToHiragana(); + const { showNotification_SaveSuccess } = useNotificationStatus(); // Auto Clear Message Input Box @@ -233,6 +240,45 @@ export const useOthers = () => { }); }; + // Convert Message To Romaji + const getConvertMessageToRomaji = () => { + pendingConvertMessageToRomaji(); + asyncStdoutToPython("/get/data/convert_message_to_romaji"); + }; + + const toggleConvertMessageToRomaji = () => { + pendingConvertMessageToRomaji(); + if (currentConvertMessageToRomaji.data) { + asyncStdoutToPython("/set/disable/convert_message_to_romaji"); + } else { + asyncStdoutToPython("/set/enable/convert_message_to_romaji"); + } + }; + + const setSuccessConvertMessageToRomaji = (enabled) => { + updateConvertMessageToRomaji(enabled); + showNotification_SaveSuccess(); + }; + + // Convert Message To Hiragana + const getConvertMessageToHiragana = () => { + pendingConvertMessageToHiragana(); + asyncStdoutToPython("/get/data/convert_message_to_hiragana"); + }; + + const toggleConvertMessageToHiragana = () => { + pendingConvertMessageToHiragana(); + if (currentConvertMessageToHiragana.data) { + asyncStdoutToPython("/set/disable/convert_message_to_hiragana"); + } else { + asyncStdoutToPython("/set/enable/convert_message_to_hiragana"); + } + }; + + const setSuccessConvertMessageToHiragana = (enabled) => { + updateConvertMessageToHiragana(enabled); + showNotification_SaveSuccess(); + }; return { // Auto Clear Message Input Box @@ -303,5 +349,19 @@ export const useOthers = () => { getReceivedMessageFormatParts, setReceivedMessageFormatParts, setSuccessReceivedMessageFormatParts, + + // Convert Message To Romaji + currentConvertMessageToRomaji, + getConvertMessageToRomaji, + toggleConvertMessageToRomaji, + updateConvertMessageToRomaji, + setSuccessConvertMessageToRomaji, + + // Convert Message To Hiragana + currentConvertMessageToHiragana, + getConvertMessageToHiragana, + toggleConvertMessageToHiragana, + updateConvertMessageToHiragana, + setSuccessConvertMessageToHiragana, }; }; \ No newline at end of file diff --git a/src-ui/logics/useReceiveRoutes.js b/src-ui/logics/useReceiveRoutes.js index c36b5fa7..a5e6d8ed 100644 --- a/src-ui/logics/useReceiveRoutes.js +++ b/src-ui/logics/useReceiveRoutes.js @@ -292,6 +292,14 @@ export const ROUTE_META_LIST = [ { endpoint: "/get/data/received_message_format_parts", ns: configs, hook_name: "useOthers", method_name: "updateReceivedMessageFormatParts" }, { endpoint: "/set/data/received_message_format_parts", ns: configs, hook_name: "useOthers", method_name: "setSuccessReceivedMessageFormatParts" }, + { endpoint: "/get/data/convert_message_to_romaji", ns: configs, hook_name: "useOthers", method_name: "updateConvertMessageToRomaji" }, + { endpoint: "/set/enable/convert_message_to_romaji", ns: configs, hook_name: "useOthers", method_name: "setSuccessConvertMessageToRomaji" }, + { endpoint: "/set/disable/convert_message_to_romaji", ns: configs, hook_name: "useOthers", method_name: "setSuccessConvertMessageToRomaji" }, + + { endpoint: "/get/data/convert_message_to_hiragana", ns: configs, hook_name: "useOthers", method_name: "updateConvertMessageToHiragana" }, + { endpoint: "/set/enable/convert_message_to_hiragana", ns: configs, hook_name: "useOthers", method_name: "setSuccessConvertMessageToHiragana" }, + { endpoint: "/set/disable/convert_message_to_hiragana", ns: configs, hook_name: "useOthers", method_name: "setSuccessConvertMessageToHiragana" }, + // Hotkeys { endpoint: "/get/data/hotkeys", ns: configs, hook_name: "useHotkeys", method_name: "updateHotkeys" }, { endpoint: "/set/data/hotkeys", ns: configs, hook_name: "useHotkeys", method_name: "setSuccessHotkeys" }, @@ -323,8 +331,6 @@ export const ROUTE_META_LIST = [ { endpoint: "/get/data/mic_no_speech_prob", ns: null, hook_name: null, method_name: null }, // Not implemented on UI yet { endpoint: "/get/data/speaker_avg_logprob", ns: null, hook_name: null, method_name: null }, // Not implemented on UI yet { endpoint: "/get/data/speaker_no_speech_prob", ns: null, hook_name: null, method_name: null }, // Not implemented on UI yet - { endpoint: "/get/data/convert_message_to_romaji", ns: null, hook_name: null, method_name: null }, // Not implemented on UI yet - { endpoint: "/get/data/convert_message_to_hiragana", ns: null, hook_name: null, method_name: null }, // Not implemented on UI yet { endpoint: "/get/data/transcription_engines", ns: null, hook_name: null, method_name: null }, // Not implemented on UI yet. (if ai_models has not been detected, this will be blank array[]. if the ai_models are ok but just network has not connected, it'l be only ["Whisper"]) ]; diff --git a/src-ui/store.js b/src-ui/store.js index e36aad46..6c31e87f 100644 --- a/src-ui/store.js +++ b/src-ui/store.js @@ -309,6 +309,8 @@ export const { atomInstance: Atom_ReceivedMessageFormatParts, useHook: useStore_ }, translation_first: false, }, "ReceivedMessageFormatParts"); +export const { atomInstance: Atom_ConvertMessageToRomaji, useHook: useStore_ConvertMessageToRomaji } = createAtomWithHook(false, "ConvertMessageToRomaji"); +export const { atomInstance: Atom_ConvertMessageToHiragana, useHook: useStore_ConvertMessageToHiragana } = createAtomWithHook(false, "ConvertMessageToHiragana"); // Hotkeys From 7274c514a7036aebc8c1b7658032334b596c22fa Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 17 Sep 2025 21:40:28 +0900 Subject: [PATCH 26/92] =?UTF-8?q?[Update]=20=E3=83=88=E3=83=A9=E3=83=B3?= =?UTF-8?q?=E3=82=B9=E3=83=AA=E3=83=86=E3=83=AC=E3=83=BC=E3=82=B7=E3=83=A7?= =?UTF-8?q?=E3=83=B3=E5=87=A6=E7=90=86=E3=82=92=E6=9C=89=E5=8A=B9=E5=8C=96?= =?UTF-8?q?=E3=81=99=E3=82=8B=E6=9D=A1=E4=BB=B6=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/controller.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index 7f43fc9b..b10d5617 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -308,8 +308,10 @@ class Controller: ) for i, no in enumerate(config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST): - if (config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["language"] == "Japanese" and - config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["enable"] is True): + if (config.ENABLE_TRANSLATION is True and + config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["language"] == "Japanese" and + config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["enable"] is True + ): transliteration_translation.append( model.convertMessageToTransliteration( translation[i], @@ -461,7 +463,9 @@ class Controller: romaji=config.CONVERT_MESSAGE_TO_ROMAJI ) - if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese": + if (config.ENABLE_TRANSLATION is True and + config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese" + ): transliteration_translation.append( model.convertMessageToTransliteration( translation[0], @@ -639,8 +643,10 @@ class Controller: romaji=config.CONVERT_MESSAGE_TO_ROMAJI ) for i, no in enumerate(config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST): - if (config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["language"] == "Japanese" and - config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["enable"] is True): + if (config.ENABLE_TRANSLATION is True and + config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["language"] == "Japanese" and + config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["enable"] is True + ): transliteration_translation.append( model.convertMessageToTransliteration( translation[i], From 17e450310eb2dbdf2b611aca023139497461c4b2 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:32:35 +0900 Subject: [PATCH 27/92] [bugfix] Fix the bugs: A message resend/edit feature that was not working. Romaji didn't show up even when mouse hover at some point. --- .../log_box/message_container/MessageContainer.jsx | 12 +++++------- .../message_input_box/MessageInputBox.jsx | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx b/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx index cd36754d..3c7c0c2c 100644 --- a/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx +++ b/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx @@ -17,10 +17,10 @@ export const MessageContainer = ({ messages, status, category, created_at }) => const [is_locked, setIsLocked] = useState(false); const resendFunction = () => { - sendMessage(messages.original); + sendMessage(messages.original.message); }; const editFunction = () => { - updateMessageInputValue(messages.original); + updateMessageInputValue(messages.original.message); }; const handleMouseEnter = () => { @@ -94,19 +94,17 @@ const MessageWithTransliteration = ({ item }) => { const hira = token.hira ?? ""; const hepburn = token.hepburn ?? ""; - if ((hira && orig === hira) || (hepburn && orig === hepburn) || (!hira && !hepburn)) { + if ((hira && orig === hira)) { return ( - + {orig} ); } if (hira && hira !== orig) { - const needHepburn = hepburn && hepburn !== orig; - const titleAttr = needHepburn ? hepburn : undefined; return ( - + {orig} {hira} diff --git a/src-ui/app/main_page/main_section/message_container/message_input_box/MessageInputBox.jsx b/src-ui/app/main_page/main_section/message_container/message_input_box/MessageInputBox.jsx index 6aa32092..6619f5fb 100644 --- a/src-ui/app/main_page/main_section/message_container/message_input_box/MessageInputBox.jsx +++ b/src-ui/app/main_page/main_section/message_container/message_input_box/MessageInputBox.jsx @@ -33,7 +33,7 @@ export const MessageInputBox = () => { if (currentMessageLogs.data) { const sentMessages = currentMessageLogs.data .filter(log => log.category === "sent") - .map(log => log.messages.original); + .map(log => log.messages.original.message); setMessageHistory(sentMessages); } }, [currentMessageLogs.data]); From 56af62029a324fba8a5389d339a45c82b9ff7e13 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Thu, 18 Sep 2025 00:06:09 +0900 Subject: [PATCH 28/92] [Update] Add transliteration position navigator when hiragana and romaji is both enabled and mouse hovered. --- .../log_box/message_container/MessageContainer.jsx | 12 ++++++------ .../message_container/MessageContainer.module.scss | 7 +++++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx b/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx index 3c7c0c2c..d53ccaa6 100644 --- a/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx +++ b/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx @@ -94,17 +94,17 @@ const MessageWithTransliteration = ({ item }) => { const hira = token.hira ?? ""; const hepburn = token.hepburn ?? ""; - if ((hira && orig === hira)) { + if (hira && hira === orig && hepburn) { return ( - + {orig} ); } - if (hira && hira !== orig) { + if (hira && hira !== orig && hepburn) { return ( - + {orig} {hira} @@ -113,7 +113,7 @@ const MessageWithTransliteration = ({ item }) => { if (hepburn && hepburn !== orig) { return ( - + {orig} {hepburn} @@ -121,7 +121,7 @@ const MessageWithTransliteration = ({ item }) => { } return ( - + {orig} ); diff --git a/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.module.scss b/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.module.scss index ea82b297..b0986c3f 100644 --- a/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.module.scss +++ b/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.module.scss @@ -112,4 +112,11 @@ font-size: 1.2rem; color: var(--dark_500_color); } +} + +// For ruby +.with_hepburn { + &:hover { + color: var(--dark_500_color); + } } \ No newline at end of file From 6501e0c1d72b16a84a9f34ce8261ad94cad64cd8 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Fri, 19 Sep 2025 11:06:23 +0900 Subject: [PATCH 29/92] =?UTF-8?q?[Update]=20=E4=BF=AE=E6=AD=A3:=20SELECTED?= =?UTF-8?q?=5FTARGET=5FLANGUAGES=E3=81=AE=E5=88=9D=E6=9C=9F=E5=8C=96?= =?UTF-8?q?=E3=83=AD=E3=82=B8=E3=83=83=E3=82=AF=E3=82=92=E6=94=B9=E5=96=84?= =?UTF-8?q?=E3=81=97=E3=80=81=E6=9C=80=E5=88=9D=E3=81=AE=E8=A8=80=E8=AA=9E?= =?UTF-8?q?=E3=81=AE=E3=81=BF=E3=82=92=E6=9C=89=E5=8A=B9=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/config.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src-python/config.py b/src-python/config.py index d99114cc..0431b2f9 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -1122,13 +1122,13 @@ class Config: self._SELECTED_TAB_TARGET_LANGUAGES_NO_LIST = ["1", "2", "3"] for tab_no in self.SELECTABLE_TAB_NO_LIST: for tab_target_lang_no in self.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST: - if tab_no not in self._SELECTED_TARGET_LANGUAGES: - self.SELECTED_TARGET_LANGUAGES[tab_no] = {} - if tab_target_lang_no not in self._SELECTED_TARGET_LANGUAGES[tab_no]: - self.SELECTED_TARGET_LANGUAGES[tab_no][tab_target_lang_no] = { + if tab_no not in self.SELECTED_TARGET_LANGUAGES: + self._SELECTED_TARGET_LANGUAGES[tab_no] = {} + if tab_target_lang_no not in self.SELECTED_TARGET_LANGUAGES[tab_no]: + self._SELECTED_TARGET_LANGUAGES[tab_no][tab_target_lang_no] = { "language": "English", "country": "United States", - "enable": True, + "enable": True if tab_target_lang_no == self.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST[0] else False, } self._SELECTED_TRANSCRIPTION_ENGINE = "Google" self._CONVERT_MESSAGE_TO_ROMAJI = False From 4808dcbc96c57fbf627b1a824804dffbada68705 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Fri, 19 Sep 2025 14:43:13 +0900 Subject: [PATCH 30/92] [Update/Chore] Config Page: Add 'CTranslate2 Compute Type' Selection. UI: Rename 'CTranslate2 compute device' to 'Translation compute device'. --- locales/en.yml | 6 +- locales/ja.yml | 6 +- locales/ko.yml | 6 +- locales/zh-Hans.yml | 4 +- locales/zh-Hant.yml | 4 +- .../setting_box/translation/Translation.jsx | 62 ++++++++---- .../configs/translation/useTranslation.js | 96 ++++++++++++++----- src-ui/logics/useReceiveRoutes.js | 11 ++- src-ui/store.js | 9 +- 9 files changed, 148 insertions(+), 56 deletions(-) diff --git a/locales/en.yml b/locales/en.yml index 1faf9639..e96de8fb 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -133,8 +133,10 @@ config_page: desc: "You can choose the translation model when using the {{ctranslate2}} translation engine." small: "Basic Model ({{capacity}})" large: "High Accuracy Model ({{capacity}})" - ctranslate2_compute_device: - label: "Processing device for AI translation {{ctranslate2}}" + ctranslate2_compute_type: + label: "Processing type for AI translation {{ctranslate2}}" + translation_compute_device: + label: "Processing device for AI translation" deepl_auth_key: label: "DeepL Auth Key" desc: "When using it, please change {{translator}} on the main screen to DeepL_API. ※Some languages may not be supported." diff --git a/locales/ja.yml b/locales/ja.yml index d3da554e..a16b6b1d 100644 --- a/locales/ja.yml +++ b/locales/ja.yml @@ -133,8 +133,10 @@ config_page: desc: "翻訳エンジン「{{ctranslate2}}」で翻訳する際に、使用する翻訳モデルを選択できます。" small: "通常モデル ({{capacity}})" large: "高精度モデル ({{capacity}})" - ctranslate2_compute_device: - label: "AI翻訳 {{ctranslate2}} の処理デバイス" + ctranslate2_compute_type: + label: "AI翻訳 {{ctranslate2}} の処理タイプ" + translation_compute_device: + label: "AI翻訳の処理デバイス" deepl_auth_key: label: "DeepL APIキーの登録" desc: "使用の際は、メイン画面にある {{translator}} をDeepL_APIに変更してください。\n※対応していない言語もあります。" diff --git a/locales/ko.yml b/locales/ko.yml index d2e171ac..df188d5f 100644 --- a/locales/ko.yml +++ b/locales/ko.yml @@ -133,8 +133,10 @@ config_page: desc: "오프라인 번역 시의 번역 모델을 변경합니다." small: "일반 모델 ({{capacity}})" large: "정밀 모델 ({{capacity}})" - ctranslate2_compute_device: - label: "AI 번역 {{ctranslate2}} 처리 장치" + ctranslate2_compute_type: + label: + translation_compute_device: + label: "AI 번역 처리 장치" deepl_auth_key: label: "DeepL 인증키" desc: "사용시 메인화면에 있는 {{translator}}를 DeepL_API로 변경해 주세요.\n지원하지 않는 언어도 있습니다." diff --git a/locales/zh-Hans.yml b/locales/zh-Hans.yml index 71a75390..e2bd4a14 100644 --- a/locales/zh-Hans.yml +++ b/locales/zh-Hans.yml @@ -133,7 +133,9 @@ config_page: desc: "可以选择用于离线翻译的翻译模型" small: "普通模型 ({{capacity}})" large: "高精度模型 ({{capacity}})" - ctranslate2_compute_device: + ctranslate2_compute_type: + label: + translation_compute_device: label: deepl_auth_key: label: "DeepL 授权密匙" diff --git a/locales/zh-Hant.yml b/locales/zh-Hant.yml index f091fd03..bc190b25 100644 --- a/locales/zh-Hant.yml +++ b/locales/zh-Hant.yml @@ -133,7 +133,9 @@ config_page: desc: "你可以選擇用於離線翻譯引擎的翻譯模型。" small: "基本模型({{capacity}})" large: "高準確率模型({{capacity}})" - ctranslate2_compute_device: + ctranslate2_compute_type: + label: + translation_compute_device: label: deepl_auth_key: label: "DeepL 授權金鑰" diff --git a/src-ui/app/config_page/setting_section/setting_box/translation/Translation.jsx b/src-ui/app/config_page/setting_section/setting_box/translation/Translation.jsx index 38c5a1e3..cdb8d351 100644 --- a/src-ui/app/config_page/setting_section/setting_box/translation/Translation.jsx +++ b/src-ui/app/config_page/setting_section/setting_box/translation/Translation.jsx @@ -18,7 +18,8 @@ export const Translation = () => { return ( <> - + + ); @@ -62,7 +63,7 @@ const CTranslate2WeightType_Box = () => { "config_page.translation.ctranslate2_weight_type.desc", {ctranslate2: "CTranslate2"} )} - name="ctransalte2_weight_type" + name="ctranslate2_weight_type" options={c_translate2_weight_types} checked_variable={currentSelectedCTranslate2WeightType} selectFunction={selectFunction} @@ -72,47 +73,72 @@ const CTranslate2WeightType_Box = () => { ); }; -// Duplicate -import { useComputeMode } from "@logics_common"; -const CTranslation2ComputeDevice_Box = () => { +const CTranslate2ComputeType_Box = () => { const { t } = useI18n(); - const { currentSelectedCTranslate2ComputeDevice, setSelectedCTranslate2ComputeDevice } = useTranslation(); - const { currentSelectableCTranslate2ComputeDeviceList } = useTranslation(); + const { currentSelectableCTranslate2ComputeTypeList } = useTranslation(); + const { currentSelectedCTranslate2ComputeType, setSelectedCTranslate2ComputeType } = useTranslation(); const selectFunction = (selected_data) => { - const target_obj = currentSelectableCTranslate2ComputeDeviceList.data[selected_data.selected_id]; - setSelectedCTranslate2ComputeDevice(target_obj); + setSelectedCTranslate2ComputeType(selected_data.selected_id); }; - const list_for_ui = transformDeviceArray(currentSelectableCTranslate2ComputeDeviceList.data); + const ctranslate2_compute_type_label = t("config_page.translation.ctranslate2_compute_type.label", { + ctranslate2: "CTranslate2" + }); - const target_index = findKeyByDeviceValue(currentSelectableCTranslate2ComputeDeviceList.data, currentSelectedCTranslate2ComputeDevice.data); + return ( + + ); +}; + +// Duplicate +import { useComputeMode } from "@logics_common"; +const TranslationComputeDevice_Box = () => { + const { t } = useI18n(); + const { currentSelectedTranslationComputeDevice, setSelectedTranslationComputeDevice } = useTranslation(); + const { currentSelectableTranslationComputeDeviceList } = useTranslation(); + + const selectFunction = (selected_data) => { + const target_obj = currentSelectableTranslationComputeDeviceList.data[selected_data.selected_id]; + setSelectedTranslationComputeDevice(target_obj); + }; + + const list_for_ui = transformDeviceArray(currentSelectableTranslationComputeDeviceList.data); + + const target_index = findKeyByDeviceValue(currentSelectableTranslationComputeDeviceList.data, currentSelectedTranslationComputeDevice.data); const { currentComputeMode } = useComputeMode(); - const ctranslate2_compute_device_label = t("config_page.translation.ctranslate2_compute_device.label", { - ctranslate2: "Ctranslate2" + const translation_compute_device_label = t("config_page.translation.translation_compute_device.label", { + ctranslate2: "CTranslate2" }); if (currentComputeMode.data === "cpu") { return ( ) } return ( ); }; diff --git a/src-ui/logics/configs/translation/useTranslation.js b/src-ui/logics/configs/translation/useTranslation.js index b282bf90..28ab10a4 100644 --- a/src-ui/logics/configs/translation/useTranslation.js +++ b/src-ui/logics/configs/translation/useTranslation.js @@ -1,13 +1,15 @@ import { useStore_CTranslate2WeightTypeStatus, useStore_SelectedCTranslate2WeightType, - useStore_SelectableCTranslate2ComputeDeviceList, - useStore_SelectedCTranslate2ComputeDevice, + useStore_SelectableCTranslate2ComputeTypeList, + useStore_SelectedCTranslate2ComputeType, + useStore_SelectableTranslationComputeDeviceList, + useStore_SelectedTranslationComputeDevice, useStore_DeepLAuthKey, } from "@store"; import { useStdoutToPython } from "@useStdoutToPython"; import { useI18n } from "@useI18n"; -import { transformToIndexedArray } from "@utils"; +import { transformToIndexedArray, arrayToObject } from "@utils"; import { useNotificationStatus } from "@logics_common"; export const useTranslation = () => { @@ -17,8 +19,13 @@ export const useTranslation = () => { const { currentCTranslate2WeightTypeStatus, updateCTranslate2WeightTypeStatus, pendingCTranslate2WeightTypeStatus } = useStore_CTranslate2WeightTypeStatus(); const { currentSelectedCTranslate2WeightType, updateSelectedCTranslate2WeightType, pendingSelectedCTranslate2WeightType } = useStore_SelectedCTranslate2WeightType(); - const { currentSelectableCTranslate2ComputeDeviceList, updateSelectableCTranslate2ComputeDeviceList, pendingSelectableCTranslate2ComputeDeviceList } = useStore_SelectableCTranslate2ComputeDeviceList(); - const { currentSelectedCTranslate2ComputeDevice, updateSelectedCTranslate2ComputeDevice, pendingSelectedCTranslate2ComputeDevice } = useStore_SelectedCTranslate2ComputeDevice(); + + const { currentSelectableCTranslate2ComputeTypeList, updateSelectableCTranslate2ComputeTypeList, pendingSelectableCTranslate2ComputeTypeList } = useStore_SelectableCTranslate2ComputeTypeList(); + const { currentSelectedCTranslate2ComputeType, updateSelectedCTranslate2ComputeType, pendingSelectedCTranslate2ComputeType } = useStore_SelectedCTranslate2ComputeType(); + + const { currentSelectableTranslationComputeDeviceList, updateSelectableTranslationComputeDeviceList, pendingSelectableTranslationComputeDeviceList } = useStore_SelectableTranslationComputeDeviceList(); + const { currentSelectedTranslationComputeDevice, updateSelectedTranslationComputeDevice, pendingSelectedTranslationComputeDevice } = useStore_SelectedTranslationComputeDevice(); + const { currentDeepLAuthKey, updateDeepLAuthKey, pendingDeepLAuthKey } = useStore_DeepLAuthKey(); @@ -80,28 +87,56 @@ export const useTranslation = () => { }; - const getSelectableCTranslate2ComputeDeviceList = () => { - pendingSelectableCTranslate2ComputeDeviceList(); + + const getSelectableCTranslate2ComputeTypeList = () => { + pendingSelectableCTranslate2ComputeTypeList(); + asyncStdoutToPython("/get/data/ctranslate2_compute_type_list"); + }; + + const updateSelectableCTranslate2ComputeTypeList_FromBackend = (payload) => { + updateSelectableCTranslate2ComputeTypeList(arrayToObject(payload)); + }; + + + const getSelectedCTranslate2ComputeType = () => { + pendingSelectedCTranslate2ComputeType(); + asyncStdoutToPython("/get/data/ctranslate2_compute_type"); + }; + + const setSelectedCTranslate2ComputeType = (selected_ctranslate2_compute_type) => { + pendingSelectedCTranslate2ComputeType(); + asyncStdoutToPython("/set/data/ctranslate2_compute_type", selected_ctranslate2_compute_type); + }; + + const setSuccessSelectedCTranslate2ComputeType = (selected_ctranslate2_compute_type) => { + updateSelectedCTranslate2ComputeType(selected_ctranslate2_compute_type); + showNotification_SaveSuccess(); + }; + + + + const getSelectableTranslationComputeDeviceList = () => { + pendingSelectableTranslationComputeDeviceList(); asyncStdoutToPython("/get/data/translation_compute_device_list"); }; - const updateSelectableCTranslate2ComputeDeviceList_FromBackend = (payload) => { - updateSelectableCTranslate2ComputeDeviceList(transformToIndexedArray(payload)); + const updateSelectableTranslationComputeDeviceList_FromBackend = (payload) => { + updateSelectableTranslationComputeDeviceList(transformToIndexedArray(payload)); }; - const getSelectedCTranslate2ComputeDevice = () => { - pendingSelectedCTranslate2ComputeDevice(); + const getSelectedTranslationComputeDevice = () => { + pendingSelectedTranslationComputeDevice(); asyncStdoutToPython("/get/data/selected_translation_compute_device"); }; - const setSelectedCTranslate2ComputeDevice = (selected_translation_compute_device) => { - pendingSelectedCTranslate2ComputeDevice(); + const setSelectedTranslationComputeDevice = (selected_translation_compute_device) => { + pendingSelectedTranslationComputeDevice(); asyncStdoutToPython("/set/data/selected_translation_compute_device", selected_translation_compute_device); }; - const setSuccessSelectedCTranslate2ComputeDevice = (selected_translation_compute_device) => { - updateSelectedCTranslate2ComputeDevice(selected_translation_compute_device); + const setSuccessSelectedTranslationComputeDevice = (selected_translation_compute_device) => { + updateSelectedTranslationComputeDevice(selected_translation_compute_device); showNotification_SaveSuccess(); }; @@ -146,16 +181,29 @@ export const useTranslation = () => { setSelectedCTranslate2WeightType, setSuccessSelectedCTranslate2WeightType, - currentSelectableCTranslate2ComputeDeviceList, - getSelectableCTranslate2ComputeDeviceList, - updateSelectableCTranslate2ComputeDeviceList, - updateSelectableCTranslate2ComputeDeviceList_FromBackend, - currentSelectedCTranslate2ComputeDevice, - getSelectedCTranslate2ComputeDevice, - updateSelectedCTranslate2ComputeDevice, - setSelectedCTranslate2ComputeDevice, - setSuccessSelectedCTranslate2ComputeDevice, + currentSelectableCTranslate2ComputeTypeList, + getSelectableCTranslate2ComputeTypeList, + updateSelectableCTranslate2ComputeTypeList, + updateSelectableCTranslate2ComputeTypeList_FromBackend, + + currentSelectedCTranslate2ComputeType, + getSelectedCTranslate2ComputeType, + updateSelectedCTranslate2ComputeType, + setSelectedCTranslate2ComputeType, + setSuccessSelectedCTranslate2ComputeType, + + + currentSelectableTranslationComputeDeviceList, + getSelectableTranslationComputeDeviceList, + updateSelectableTranslationComputeDeviceList, + updateSelectableTranslationComputeDeviceList_FromBackend, + + currentSelectedTranslationComputeDevice, + getSelectedTranslationComputeDevice, + updateSelectedTranslationComputeDevice, + setSelectedTranslationComputeDevice, + setSuccessSelectedTranslationComputeDevice, currentDeepLAuthKey, getDeepLAuthKey, diff --git a/src-ui/logics/useReceiveRoutes.js b/src-ui/logics/useReceiveRoutes.js index c36b5fa7..d3452721 100644 --- a/src-ui/logics/useReceiveRoutes.js +++ b/src-ui/logics/useReceiveRoutes.js @@ -168,19 +168,22 @@ export const ROUTE_META_LIST = [ { endpoint: "/delete/data/deepl_auth_key", ns: configs, hook_name: "useTranslation", method_name: "deleteSuccessDeepLAuthKey" }, // Translation (AI Models) + { endpoint: "/get/data/selectable_ctranslate2_weight_type_dict", ns: configs, hook_name: "useTranslation", method_name: "updateDownloadedCTranslate2WeightTypeStatus" }, { endpoint: "/get/data/ctranslate2_weight_type", ns: configs, hook_name: "useTranslation", method_name: "updateSelectedCTranslate2WeightType" }, { endpoint: "/set/data/ctranslate2_weight_type", ns: configs, hook_name: "useTranslation", method_name: "setSuccessSelectedCTranslate2WeightType" }, - { endpoint: "/get/data/selectable_ctranslate2_weight_type_dict", ns: configs, hook_name: "useTranslation", method_name: "updateDownloadedCTranslate2WeightTypeStatus" }, + { endpoint: "/get/data/ctranslate2_compute_type_list", ns: configs, hook_name: "useTranslation", method_name: "updateSelectableCTranslate2ComputeTypeList_FromBackend" }, + { endpoint: "/get/data/ctranslate2_compute_type", ns: configs, hook_name: "useTranslation", method_name: "updateSelectedCTranslate2ComputeType" }, + { endpoint: "/set/data/ctranslate2_compute_type", ns: configs, hook_name: "useTranslation", method_name: "setSuccessSelectedCTranslate2ComputeType" }, { endpoint: "/run/downloaded_ctranslate2_weight", ns: configs, hook_name: "useTranslation", method_name: "downloadedCTranslate2WeightType" }, { endpoint: "/run/download_ctranslate2_weight", ns: null, hook_name: null, method_name: null }, { endpoint: "/run/download_progress_ctranslate2_weight", ns: configs, hook_name: "useTranslation", method_name: "updateDownloadProgressCTranslate2WeightTypeStatus" }, - { endpoint: "/get/data/translation_compute_device_list", ns: configs, hook_name: "useTranslation", method_name: "updateSelectableCTranslate2ComputeDeviceList_FromBackend" }, + { endpoint: "/get/data/translation_compute_device_list", ns: configs, hook_name: "useTranslation", method_name: "updateSelectableTranslationComputeDeviceList_FromBackend" }, - { endpoint: "/get/data/selected_translation_compute_device", ns: configs, hook_name: "useTranslation", method_name: "updateSelectedCTranslate2ComputeDevice" }, - { endpoint: "/set/data/selected_translation_compute_device", ns: configs, hook_name: "useTranslation", method_name: "setSuccessSelectedCTranslate2ComputeDevice" }, + { endpoint: "/get/data/selected_translation_compute_device", ns: configs, hook_name: "useTranslation", method_name: "updateSelectedTranslationComputeDevice" }, + { endpoint: "/set/data/selected_translation_compute_device", ns: configs, hook_name: "useTranslation", method_name: "setSuccessSelectedTranslationComputeDevice" }, // Transcription diff --git a/src-ui/store.js b/src-ui/store.js index e36aad46..ba00127b 100644 --- a/src-ui/store.js +++ b/src-ui/store.js @@ -218,10 +218,15 @@ export const { atomInstance: Atom_MicWordFilterList, useHook: useStore_MicWordFi // Translation export const { atomInstance: Atom_DeepLAuthKey, useHook: useStore_DeepLAuthKey } = createAtomWithHook(null, "DeepLAuthKey"); export const { atomInstance: Atom_SelectedCTranslate2WeightType, useHook: useStore_SelectedCTranslate2WeightType } = createAtomWithHook("", "SelectedCTranslate2WeightType"); -export const { atomInstance: Atom_SelectableCTranslate2ComputeDeviceList, useHook: useStore_SelectableCTranslate2ComputeDeviceList } = createAtomWithHook({}, "SelectableCTranslate2ComputeDeviceList"); -export const { atomInstance: Atom_SelectedCTranslate2ComputeDevice, useHook: useStore_SelectedCTranslate2ComputeDevice } = createAtomWithHook("", "SelectedCTranslate2ComputeDevice"); export const { atomInstance: Atom_CTranslate2WeightTypeStatus, useHook: useStore_CTranslate2WeightTypeStatus } = createAtomWithHook(ctranslate2_weight_type_status, "CTranslate2WeightTypeStatus"); +export const { atomInstance: Atom_SelectableCTranslate2ComputeTypeList, useHook: useStore_SelectableCTranslate2ComputeTypeList } = createAtomWithHook({}, "SelectableCTranslate2ComputeTypeList"); +export const { atomInstance: Atom_SelectedCTranslate2ComputeType, useHook: useStore_SelectedCTranslate2ComputeType } = createAtomWithHook("", "SelectedCTranslate2ComputeType"); + + +export const { atomInstance: Atom_SelectableTranslationComputeDeviceList, useHook: useStore_SelectableTranslationComputeDeviceList } = createAtomWithHook({}, "SelectableTranslationComputeDeviceList"); +export const { atomInstance: Atom_SelectedTranslationComputeDevice, useHook: useStore_SelectedTranslationComputeDevice } = createAtomWithHook("", "SelectedTranslationComputeDevice"); + // Transcription export const { atomInstance: Atom_MicRecordTimeout, useHook: useStore_MicRecordTimeout } = createAtomWithHook(0, "MicRecordTimeout"); export const { atomInstance: Atom_MicPhraseTimeout, useHook: useStore_MicPhraseTimeout } = createAtomWithHook(0, "MicPhraseTimeout"); From 9fd61677f9ea8fd4fd9792d1b0f484ee5bb0de06 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Fri, 19 Sep 2025 15:58:19 +0900 Subject: [PATCH 31/92] [Update/Chore] Config Page: Add 'Whisper Compute Type' Selection. UI: Rename 'Whisper compute device' to 'Transcription compute device'. --- locales/en.yml | 6 +- locales/ja.yml | 6 +- locales/ko.yml | 6 +- locales/zh-Hans.yml | 4 +- locales/zh-Hant.yml | 4 +- .../transcription/Transcription.jsx | 53 +++++++--- .../configs/transcription/useTranscription.js | 98 ++++++++++++++----- src-ui/logics/useReceiveRoutes.js | 12 ++- src-ui/store.js | 8 +- 9 files changed, 144 insertions(+), 53 deletions(-) diff --git a/locales/en.yml b/locales/en.yml index e96de8fb..25b04e62 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -179,8 +179,10 @@ config_page: desc: "Larger models have higher accuracy, but they also consume more CPU or GPU resources.\nEspecially for models larger than medium, it may be difficult or even impossible to use them depending on the performance of your CPU/GPU." model_template: "{{model_name}} model ({{capacity}})" recommended_model_template: "{{model_name}} model ({{capacity}}) (Recommended)" - whisper_compute_device: - label: "Processing Device Used For Whisper" + whisper_compute_type: + label: "Processing type for AI transcription {{whisper}}" + transcription_compute_device: + label: "Processing Device Used For AI transcription" vr: single_line: "Single line" diff --git a/locales/ja.yml b/locales/ja.yml index a16b6b1d..39542767 100644 --- a/locales/ja.yml +++ b/locales/ja.yml @@ -179,8 +179,10 @@ config_page: desc: "容量が大きいモデルほど精度は高いですが、その分CPUやGPUを占有します。\n※特にmediumより容量の大きいモデルは、CPU/GPUの性能によっては使用すらも困難です。" model_template: "{{model_name}} モデル ({{capacity}})" recommended_model_template: "{{model_name}} モデル ({{capacity}}) [推奨]" - whisper_compute_device: - label: "Whisperで使用する処理デバイス" + whisper_compute_type: + label: "AI音声認識 {{whisper}} の処理タイプ" + transcription_compute_device: + label: "AI音声認識で使用する処理デバイス" vr: single_line: "一行" diff --git a/locales/ko.yml b/locales/ko.yml index df188d5f..288c63e3 100644 --- a/locales/ko.yml +++ b/locales/ko.yml @@ -179,8 +179,10 @@ config_page: desc: "용량이 큰 모델일수록 정확도는 높지만, 그만큼 CPU나 GPU를 많이 차지합니다. * 특히 medium보다 용량이 큰 모델은 CPU/GPU 성능에 따라 사용 자체가 어려울 수 있습니다." model_template: "{{model_name}} 모델 ({{capacity}})" recommended_model_template: "{{model_name}} 모델 ({{capacity}}) (권장)" - whisper_compute_device: - label: "Whisper에서 사용할 처리 장치" + whisper_compute_type: + label: + transcription_compute_device: + label: vr: single_line: "한 줄" diff --git a/locales/zh-Hans.yml b/locales/zh-Hans.yml index e2bd4a14..7cfc594e 100644 --- a/locales/zh-Hans.yml +++ b/locales/zh-Hans.yml @@ -179,7 +179,9 @@ config_page: desc: model_template: "{{model_name}} 模型 ({{capacity}})" recommended_model_template: "{{model_name}} 模型 ({{capacity}}) (推荐)" - whisper_compute_device: + whisper_compute_type: + label: + transcription_compute_device: label: vr: diff --git a/locales/zh-Hant.yml b/locales/zh-Hant.yml index bc190b25..039b60ba 100644 --- a/locales/zh-Hant.yml +++ b/locales/zh-Hant.yml @@ -179,7 +179,9 @@ config_page: desc: model_template: "{{model_name}}模型({{capacity}})" recommended_model_template: "{{model_name}}模型({{capacity}})(推薦)" - whisper_compute_device: + whisper_compute_type: + label: + transcription_compute_device: label: vr: diff --git a/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx b/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx index 7416c970..2e0daf92 100644 --- a/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx +++ b/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx @@ -201,7 +201,8 @@ const TranscriptionEngine_Container = () => { - + + ); }; @@ -274,46 +275,70 @@ const WhisperWeightType_Box = () => { ); }; +const WhisperComputeType_Box = () => { + const { t } = useI18n(); + const { currentSelectableWhisperComputeTypeList } = useTranscription(); + const { currentSelectedWhisperComputeType, setSelectedWhisperComputeType } = useTranscription(); + + const selectFunction = (selected_data) => { + setSelectedWhisperComputeType(selected_data.selected_id); + }; + + const whisper_compute_type_label = t("config_page.transcription.whisper_compute_type.label", { + whisper: "Whisper" + }); + + return ( + + ); +}; // Duplicate import { useComputeMode } from "@logics_common"; -const WhisperComputeDevice_Box = () => { +const TranscriptionComputeDevice_Box = () => { const { t } = useI18n(); - const { currentSelectedWhisperComputeDevice, setSelectedWhisperComputeDevice } = useTranscription(); - const { currentSelectableWhisperComputeDeviceList } = useTranscription(); + const { currentSelectedTranscriptionComputeDevice, setSelectedTranscriptionComputeDevice } = useTranscription(); + const { currentSelectableTranscriptionComputeDeviceList } = useTranscription(); const selectFunction = (selected_data) => { - const target_obj = currentSelectableWhisperComputeDeviceList.data[selected_data.selected_id]; - setSelectedWhisperComputeDevice(target_obj); + const target_obj = currentSelectableTranscriptionComputeDeviceList.data[selected_data.selected_id]; + setSelectedTranscriptionComputeDevice(target_obj); }; - const list_for_ui = transformDeviceArray(currentSelectableWhisperComputeDeviceList.data); + const list_for_ui = transformDeviceArray(currentSelectableTranscriptionComputeDeviceList.data); - const target_index = findKeyByDeviceValue(currentSelectableWhisperComputeDeviceList.data, currentSelectedWhisperComputeDevice.data); + const target_index = findKeyByDeviceValue(currentSelectableTranscriptionComputeDeviceList.data, currentSelectedTranscriptionComputeDevice.data); const { currentComputeMode } = useComputeMode(); if (currentComputeMode.data === "cpu") { return ( ) } return ( ); }; diff --git a/src-ui/logics/configs/transcription/useTranscription.js b/src-ui/logics/configs/transcription/useTranscription.js index 294b0473..4af6a28c 100644 --- a/src-ui/logics/configs/transcription/useTranscription.js +++ b/src-ui/logics/configs/transcription/useTranscription.js @@ -8,12 +8,14 @@ import { useStore_SpeakerPhraseTimeout, useStore_SpeakerRecordTimeout, - useStore_SelectableWhisperComputeDeviceList, + useStore_SelectableTranscriptionComputeDeviceList, useStore_SelectedTranscriptionEngine, - useStore_SelectedWhisperComputeDevice, - useStore_SelectedWhisperWeightType, + useStore_SelectedTranscriptionComputeDevice, useStore_WhisperWeightTypeStatus, + useStore_SelectedWhisperWeightType, + useStore_SelectedWhisperComputeType, + useStore_SelectableWhisperComputeTypeList, useStore_MicAvgLogprob, useStore_MicNoSpeechProb, @@ -21,7 +23,7 @@ import { useStore_SpeakerNoSpeechProb, } from "@store"; import { useStdoutToPython } from "@useStdoutToPython"; -import { transformToIndexedArray } from "@utils"; +import { transformToIndexedArray, arrayToObject } from "@utils"; import { useNotificationStatus } from "@logics_common"; export const useTranscription = () => { @@ -41,10 +43,16 @@ export const useTranscription = () => { // Transcription Engines const { currentSelectedTranscriptionEngine, updateSelectedTranscriptionEngine, pendingSelectedTranscriptionEngine } = useStore_SelectedTranscriptionEngine(); + const { currentWhisperWeightTypeStatus, updateWhisperWeightTypeStatus, pendingWhisperWeightTypeStatus } = useStore_WhisperWeightTypeStatus(); const { currentSelectedWhisperWeightType, updateSelectedWhisperWeightType, pendingSelectedWhisperWeightType } = useStore_SelectedWhisperWeightType(); - const { currentSelectableWhisperComputeDeviceList, updateSelectableWhisperComputeDeviceList, pendingSelectableWhisperComputeDeviceList } = useStore_SelectableWhisperComputeDeviceList(); - const { currentSelectedWhisperComputeDevice, updateSelectedWhisperComputeDevice, pendingSelectedWhisperComputeDevice } = useStore_SelectedWhisperComputeDevice(); + + + const { currentSelectableWhisperComputeTypeList, updateSelectableWhisperComputeTypeList, pendingSelectableWhisperComputeTypeList } = useStore_SelectableWhisperComputeTypeList(); + const { currentSelectedWhisperComputeType, updateSelectedWhisperComputeType, pendingSelectedWhisperComputeType } = useStore_SelectedWhisperComputeType(); + + const { currentSelectableTranscriptionComputeDeviceList, updateSelectableTranscriptionComputeDeviceList, pendingSelectableTranscriptionComputeDeviceList } = useStore_SelectableTranscriptionComputeDeviceList(); + const { currentSelectedTranscriptionComputeDevice, updateSelectedTranscriptionComputeDevice, pendingSelectedTranscriptionComputeDevice } = useStore_SelectedTranscriptionComputeDevice(); // Advanced Settings const { currentMicAvgLogprob, updateMicAvgLogprob, pendingMicAvgLogprob } = useStore_MicAvgLogprob(); @@ -246,6 +254,33 @@ export const useTranscription = () => { asyncStdoutToPython("/run/download_whisper_weight", weight_type); }; + + const getSelectableWhisperComputeTypeList = () => { + pendingSelectableWhisperComputeTypeList(); + asyncStdoutToPython("/get/data/whisper_compute_type_list"); + }; + + const updateSelectableWhisperComputeTypeList_FromBackend = (payload) => { + updateSelectableWhisperComputeTypeList(arrayToObject(payload)); + }; + + + const getSelectedWhisperComputeType = () => { + pendingSelectedWhisperComputeType(); + asyncStdoutToPython("/get/data/whisper_compute_type"); + }; + + const setSelectedWhisperComputeType = (selected_whisper_compute_type) => { + pendingSelectedWhisperComputeType(); + asyncStdoutToPython("/set/data/whisper_compute_type", selected_whisper_compute_type); + }; + + const setSuccessSelectedWhisperComputeType = (selected_whisper_compute_type) => { + updateSelectedWhisperComputeType(selected_whisper_compute_type); + showNotification_SaveSuccess(); + }; + + // Transcription Engines (Selected Weight Type) const getSelectedWhisperWeightType = () => { pendingSelectedWhisperWeightType(); @@ -263,28 +298,28 @@ export const useTranscription = () => { }; // Transcription Engines (Compute Device List) - const getSelectableWhisperComputeDeviceList = () => { - pendingSelectableWhisperComputeDeviceList(); + const getSelectableTranscriptionComputeDeviceList = () => { + pendingSelectableTranscriptionComputeDeviceList(); asyncStdoutToPython("/get/data/transcription_compute_device_list"); }; - const updateSelectableWhisperComputeDeviceList_FromBackend = (payload) => { - updateSelectableWhisperComputeDeviceList(transformToIndexedArray(payload)); + const updateSelectableTranscriptionComputeDeviceList_FromBackend = (payload) => { + updateSelectableTranscriptionComputeDeviceList(transformToIndexedArray(payload)); }; // Transcription Engines (Selected Compute Device) - const getSelectedWhisperComputeDevice = () => { - pendingSelectedWhisperComputeDevice(); + const getSelectedTranscriptionComputeDevice = () => { + pendingSelectedTranscriptionComputeDevice(); asyncStdoutToPython("/get/data/selected_transcription_compute_device"); }; - const setSelectedWhisperComputeDevice = (selected_transcription_compute_device) => { - pendingSelectedWhisperComputeDevice(); + const setSelectedTranscriptionComputeDevice = (selected_transcription_compute_device) => { + pendingSelectedTranscriptionComputeDevice(); asyncStdoutToPython("/set/data/selected_transcription_compute_device", selected_transcription_compute_device); }; - const setSuccessSelectedWhisperComputeDevice = (dev) => { - updateSelectedWhisperComputeDevice(dev); + const setSuccessSelectedTranscriptionComputeDevice = (dev) => { + updateSelectedTranscriptionComputeDevice(dev); showNotification_SaveSuccess(); }; @@ -416,16 +451,29 @@ export const useTranscription = () => { setSelectedWhisperWeightType, setSuccessSelectedWhisperWeightType, - currentSelectableWhisperComputeDeviceList, - getSelectableWhisperComputeDeviceList, - updateSelectableWhisperComputeDeviceList, - updateSelectableWhisperComputeDeviceList_FromBackend, - currentSelectedWhisperComputeDevice, - getSelectedWhisperComputeDevice, - updateSelectedWhisperComputeDevice, - setSelectedWhisperComputeDevice, - setSuccessSelectedWhisperComputeDevice, + currentSelectableWhisperComputeTypeList, + getSelectableWhisperComputeTypeList, + updateSelectableWhisperComputeTypeList, + updateSelectableWhisperComputeTypeList_FromBackend, + + currentSelectedWhisperComputeType, + getSelectedWhisperComputeType, + updateSelectedWhisperComputeType, + setSelectedWhisperComputeType, + setSuccessSelectedWhisperComputeType, + + + currentSelectableTranscriptionComputeDeviceList, + getSelectableTranscriptionComputeDeviceList, + updateSelectableTranscriptionComputeDeviceList, + updateSelectableTranscriptionComputeDeviceList_FromBackend, + + currentSelectedTranscriptionComputeDevice, + getSelectedTranscriptionComputeDevice, + updateSelectedTranscriptionComputeDevice, + setSelectedTranscriptionComputeDevice, + setSuccessSelectedTranscriptionComputeDevice, // Advanced // Mic Avg Logprob diff --git a/src-ui/logics/useReceiveRoutes.js b/src-ui/logics/useReceiveRoutes.js index d3452721..a4b92e6e 100644 --- a/src-ui/logics/useReceiveRoutes.js +++ b/src-ui/logics/useReceiveRoutes.js @@ -214,18 +214,22 @@ export const ROUTE_META_LIST = [ { endpoint: "/get/data/selected_transcription_engine", ns: configs, hook_name: "useTranscription", method_name: "updateSelectedTranscriptionEngine" }, { endpoint: "/set/data/selected_transcription_engine", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSelectedTranscriptionEngine" }, + { endpoint: "/get/data/selectable_whisper_weight_type_dict", ns: configs, hook_name: "useTranscription", method_name: "updateDownloadedWhisperWeightTypeStatus" }, { endpoint: "/get/data/whisper_weight_type", ns: configs, hook_name: "useTranscription", method_name: "updateSelectedWhisperWeightType" }, { endpoint: "/set/data/whisper_weight_type", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSelectedWhisperWeightType" }, - { endpoint: "/get/data/selectable_whisper_weight_type_dict", ns: configs, hook_name: "useTranscription", method_name: "updateDownloadedWhisperWeightTypeStatus" }, + { endpoint: "/get/data/whisper_compute_type_list", ns: configs, hook_name: "useTranscription", method_name: "updateSelectableWhisperComputeTypeList_FromBackend" }, + { endpoint: "/get/data/whisper_compute_type", ns: configs, hook_name: "useTranscription", method_name: "updateSelectedWhisperComputeType" }, + { endpoint: "/set/data/whisper_compute_type", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSelectedWhisperComputeType" }, + { endpoint: "/run/downloaded_whisper_weight", ns: configs, hook_name: "useTranscription", method_name: "downloadedWhisperWeightType" }, { endpoint: "/run/download_whisper_weight", ns: null, hook_name: null, method_name: null }, { endpoint: "/run/download_progress_whisper_weight", ns: configs, hook_name: "useTranscription", method_name: "updateDownloadProgressWhisperWeightTypeStatus" }, - { endpoint: "/get/data/transcription_compute_device_list", ns: configs, hook_name: "useTranscription", method_name: "updateSelectableWhisperComputeDeviceList_FromBackend" }, - { endpoint: "/get/data/selected_transcription_compute_device", ns: configs, hook_name: "useTranscription", method_name: "updateSelectedWhisperComputeDevice" }, - { endpoint: "/set/data/selected_transcription_compute_device", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSelectedWhisperComputeDevice" }, + { endpoint: "/get/data/transcription_compute_device_list", ns: configs, hook_name: "useTranscription", method_name: "updateSelectableTranscriptionComputeDeviceList_FromBackend" }, + { endpoint: "/get/data/selected_transcription_compute_device", ns: configs, hook_name: "useTranscription", method_name: "updateSelectedTranscriptionComputeDevice" }, + { endpoint: "/set/data/selected_transcription_compute_device", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSelectedTranscriptionComputeDevice" }, // Transcription (Advanced) { endpoint: "/get/data/mic_avg_logprob", ns: configs, hook_name: "useTranscription", method_name: "updateMicAvgLogprob" }, diff --git a/src-ui/store.js b/src-ui/store.js index ba00127b..6d110bce 100644 --- a/src-ui/store.js +++ b/src-ui/store.js @@ -240,8 +240,12 @@ export const { atomInstance: Atom_SelectedWhisperWeightType, useHook: useStore_S export const { atomInstance: Atom_WhisperWeightTypeStatus, useHook: useStore_WhisperWeightTypeStatus } = createAtomWithHook(whisper_weight_type_status, "WhisperWeightTypeStatus"); export const { atomInstance: Atom_SelectedTranscriptionEngine, useHook: useStore_SelectedTranscriptionEngine } = createAtomWithHook(whisper_weight_type_status, "SelectedTranscriptionEngine"); -export const { atomInstance: Atom_SelectableWhisperComputeDeviceList, useHook: useStore_SelectableWhisperComputeDeviceList } = createAtomWithHook({}, "SelectableWhisperComputeDeviceList"); -export const { atomInstance: Atom_SelectedWhisperComputeDevice, useHook: useStore_SelectedWhisperComputeDevice } = createAtomWithHook("", "SelectedWhisperComputeDevice"); +export const { atomInstance: Atom_SelectableWhisperComputeTypeList, useHook: useStore_SelectableWhisperComputeTypeList } = createAtomWithHook({}, "SelectableWhisperComputeTypeList"); +export const { atomInstance: Atom_SelectedWhisperComputeType, useHook: useStore_SelectedWhisperComputeType } = createAtomWithHook("", "SelectedWhisperComputeType"); + + +export const { atomInstance: Atom_SelectableTranscriptionComputeDeviceList, useHook: useStore_SelectableTranscriptionComputeDeviceList } = createAtomWithHook({}, "SelectableTranscriptionComputeDeviceList"); +export const { atomInstance: Atom_SelectedTranscriptionComputeDevice, useHook: useStore_SelectedTranscriptionComputeDevice } = createAtomWithHook("", "SelectedTranscriptionComputeDevice"); export const { atomInstance: Atom_MicAvgLogprob, useHook: useStore_MicAvgLogprob } = createAtomWithHook(-0.8, "MicAvgLogprob"); export const { atomInstance: Atom_MicNoSpeechProb, useHook: useStore_MicNoSpeechProb } = createAtomWithHook(0.6, "MicNoSpeechProb"); From 9d94fd6a5e23ca35a50d55330fb2173e51489989 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Fri, 19 Sep 2025 18:09:39 +0900 Subject: [PATCH 32/92] [Update] Refactor compute type management: unify device list retrieval and remove deprecated methods --- src-python/config.py | 22 +++--------- src-python/controller.py | 12 ------- src-python/mainloop.py | 4 --- src-python/utils.py | 74 ++++++++++++++++++++++++++++++++++------ 4 files changed, 68 insertions(+), 44 deletions(-) diff --git a/src-python/config.py b/src-python/config.py index 1a605701..bd3af244 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -11,7 +11,7 @@ from models.translation.translation_languages import translation_lang from models.translation.translation_utils import ctranslate2_weights from models.transcription.transcription_languages import transcription_lang from models.transcription.transcription_whisper import _MODELS as whisper_models -from utils import errorLogging, validateDictStructure, getComputeTypeList +from utils import errorLogging, validateDictStructure, getComputeDeviceList json_serializable_vars = {} def json_serializable(var_name): @@ -135,14 +135,6 @@ class Config: def SELECTABLE_COMPUTE_DEVICE_LIST(self): return self._SELECTABLE_COMPUTE_DEVICE_LIST - @property - def SELECTABLE_CTRANSLATE2_COMPUTE_TYPE_LIST(self): - return self._SELECTABLE_CTRANSLATE2_COMPUTE_TYPE_LIST - - @property - def SELECTABLE_WHISPER_COMPUTE_TYPE_LIST(self): - return self._SELECTABLE_WHISPER_COMPUTE_TYPE_LIST - @property def SEND_MESSAGE_BUTTON_TYPE_LIST(self): return self._SEND_MESSAGE_BUTTON_TYPE_LIST @@ -830,7 +822,7 @@ class Config: @CTRANSLATE2_COMPUTE_TYPE.setter def CTRANSLATE2_COMPUTE_TYPE(self, value): if isinstance(value, str): - if value in self.SELECTABLE_CTRANSLATE2_COMPUTE_TYPE_LIST: + if value in self.SELECTED_TRANSLATION_COMPUTE_DEVICE["compute_type"]: self._CTRANSLATE2_COMPUTE_TYPE = value self.saveConfig(inspect.currentframe().f_code.co_name, value) @@ -854,7 +846,7 @@ class Config: @WHISPER_COMPUTE_TYPE.setter def WHISPER_COMPUTE_TYPE(self, value): if isinstance(value, str): - if value in self.SELECTABLE_WHISPER_COMPUTE_TYPE_LIST: + if value in self.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["compute_type"]: self._WHISPER_COMPUTE_TYPE = value self.saveConfig(inspect.currentframe().f_code.co_name, value) @@ -1078,13 +1070,7 @@ class Config: self._SELECTABLE_TRANSCRIPTION_ENGINE_LIST = list(transcription_lang[list(transcription_lang.keys())[0]].values())[0].keys() self._SELECTABLE_UI_LANGUAGE_LIST = ["en", "ja", "ko", "zh-Hant", "zh-Hans"] self._COMPUTE_MODE = "cuda" if torch.cuda.is_available() else "cpu" - self._SELECTABLE_COMPUTE_DEVICE_LIST = [] - if torch.cuda.is_available(): - for i in range(torch.cuda.device_count()): - self._SELECTABLE_COMPUTE_DEVICE_LIST.append({"device":"cuda", "device_index": i, "device_name": torch.cuda.get_device_name(i)}) - self._SELECTABLE_COMPUTE_DEVICE_LIST.append({"device":"cpu", "device_index": 0, "device_name": "cpu"}) - self._SELECTABLE_CTRANSLATE2_COMPUTE_TYPE_LIST = ["auto"] + getComputeTypeList() - self._SELECTABLE_WHISPER_COMPUTE_TYPE_LIST = ["auto"] + getComputeTypeList() + self._SELECTABLE_COMPUTE_DEVICE_LIST = getComputeDeviceList() self._SEND_MESSAGE_BUTTON_TYPE_LIST = ["show", "hide", "show_and_disable_enter_key"] self._SEND_MESSAGE_FORMAT_PARTS = { "message": { diff --git a/src-python/controller.py b/src-python/controller.py index 77717918..5ea86430 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -652,14 +652,6 @@ class Controller: def getComputeDeviceList(*args, **kwargs) -> dict: return {"status":200, "result":config.SELECTABLE_COMPUTE_DEVICE_LIST} - @staticmethod - def getCTranslate2ComputeTypeList(*args, **kwargs) -> dict: - return {"status":200, "result":config.SELECTABLE_CTRANSLATE2_COMPUTE_TYPE_LIST} - - @staticmethod - def getWhisperComputeTypeList(*args, **kwargs) -> dict: - return {"status":200, "result":config.SELECTABLE_WHISPER_COMPUTE_TYPE_LIST} - @staticmethod def getSelectedTranslationComputeDevice(*args, **kwargs) -> dict: return {"status":200, "result":config.SELECTED_TRANSLATION_COMPUTE_DEVICE} @@ -1455,10 +1447,6 @@ class Controller: th_callback.join() return {"status":200, "result":config.CTRANSLATE2_WEIGHT_TYPE} - @staticmethod - def getCtranslate2ComputeTypeList(*args, **kwargs) -> dict: - return {"status":200, "result":config.SELECTABLE_CTRANSLATE2_COMPUTE_TYPE_LIST} - @staticmethod def getCtranslate2ComputeType(*args, **kwargs) -> dict: return {"status":200, "result":config.CTRANSLATE2_COMPUTE_TYPE} diff --git a/src-python/mainloop.py b/src-python/mainloop.py index c7b03ea6..73e75594 100644 --- a/src-python/mainloop.py +++ b/src-python/mainloop.py @@ -162,8 +162,6 @@ mapping = { "/get/data/ctranslate2_weight_type": {"status": True, "variable":controller.getCtranslate2WeightType}, "/set/data/ctranslate2_weight_type": {"status": True, "variable":controller.setCtranslate2WeightType}, - "/get/data/ctranslate2_compute_type_list": {"status": True, "variable":controller.getCtranslate2ComputeTypeList}, - "/get/data/ctranslate2_compute_type": {"status": True, "variable":controller.getCtranslate2ComputeType}, "/set/data/ctranslate2_compute_type": {"status": True, "variable":controller.setCtranslate2ComputeType}, @@ -270,8 +268,6 @@ mapping = { "/get/data/whisper_weight_type": {"status": True, "variable":controller.getWhisperWeightType}, "/set/data/whisper_weight_type": {"status": True, "variable":controller.setWhisperWeightType}, - "/get/data/whisper_compute_type_list": {"status": True, "variable":controller.getWhisperComputeTypeList}, - "/get/data/whisper_compute_type": {"status": True, "variable":controller.getWhisperComputeType}, "/set/data/whisper_compute_type": {"status": True, "variable":controller.setWhisperComputeType}, diff --git a/src-python/utils.py b/src-python/utils.py index 1b28fcf6..fab62d51 100644 --- a/src-python/utils.py +++ b/src-python/utils.py @@ -5,6 +5,7 @@ import traceback import logging from logging.handlers import RotatingFileHandler +import torch from ctranslate2 import get_supported_compute_types import requests import ipaddress @@ -78,17 +79,67 @@ def isValidIpAddress(ip_address: str) -> bool: except ValueError: return False -def getComputeTypeList() -> list: - return ["int8_bfloat16", "int8_float16", "int8", "bfloat16", "float16", "int8_float32", "float32"] +def getComputeDeviceList() -> dict: + compute_types = [ + { + "device": "cpu", + "device_index": 0, + "device_name": "cpu", + "compute_types": ["auto"] + list(get_supported_compute_types("cpu", 0)), + } + ] -def getBestComputeType(device, device_index) -> str: - compute_types = get_supported_compute_types(device, device_index) - compute_types = set(compute_types) - preferred_types = getComputeTypeList() + if torch.cuda.is_available(): + for device_index in range(torch.cuda.device_count()): + gpu_device_name = torch.cuda.get_device_name(device_index) + gpu_compute_types = ["auto"] + list(get_supported_compute_types("cuda", device_index)) - for preferred_type in preferred_types: - if preferred_type in compute_types: - return preferred_type + # デバイスごとの計算タイプの制限 + if "GTX" in gpu_device_name: + unsupported_types = {"int8_bfloat16", "bfloat16", "float16", "int8"} + gpu_compute_types = [t for t in gpu_compute_types if t not in unsupported_types] + elif not any(keyword in gpu_device_name for keyword in ["RTX", "Tesla", "A100", "Quadro"]): + gpu_compute_types = ["float32"] + + compute_types.append( + { + "device": "cuda", + "device_index": device_index, + "device_name": gpu_device_name, + "compute_types": gpu_compute_types, + } + ) + + return compute_types + +def getBestComputeType(device: str, device_index: int) -> str: + compute_types = set(get_supported_compute_types(device, device_index)) + device_name = "cpu" if device == "cpu" else torch.cuda.get_device_name(device_index) + + # デバイスごとの優先計算タイプ + preferred_types = { + "default": ["int8_bfloat16", "int8_float16", "int8", "bfloat16", "float16", "int8_float32", "float32"], + "GTX": ["float32"], + "RTX": ["int8_bfloat16", "int8_float16", "int8", "bfloat16", "float16", "int8_float32", "float32"], + "Tesla": ["int8_bfloat16", "int8_float16", "int8", "bfloat16", "float16", "int8_float32", "float32"], + "A100": ["int8_bfloat16", "int8_float16", "int8", "bfloat16", "float16", "int8_float32", "float32"], + "Quadro": ["int8_bfloat16", "int8_float16", "int8", "bfloat16", "float16", "int8_float32", "float32"], + } + + # デバイス名に基づいて優先タイプを選択 + for key in preferred_types: + if key in device_name: + selected_types = preferred_types[key] + break + else: + selected_types = preferred_types["default"] + + # 利用可能な計算タイプを返す + for compute_type in selected_types: + if compute_type in compute_types: + return compute_type + + return "float32" def encodeBase64(data:str) -> dict: return json.loads(base64.b64decode(data).decode('utf-8')) @@ -178,4 +229,7 @@ def errorLogging() -> None: if error_logger is None: error_logger = setupLogger("error", "error.log", logging.ERROR) - error_logger.error(traceback.format_exc()) \ No newline at end of file + error_logger.error(traceback.format_exc()) + +if __name__ == "__main__": + print(getComputeDeviceList()) \ No newline at end of file From c171588ba9db90368e1b5e1c26cd71aec125f375 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Fri, 19 Sep 2025 22:49:48 +0900 Subject: [PATCH 33/92] =?UTF-8?q?[bugfix]=20Chat=E6=A9=9F=E8=83=BD?= =?UTF-8?q?=E3=82=88=E3=82=8A=E6=94=B9=E8=A1=8C=E3=82=92=E5=90=AB=E3=82=80?= =?UTF-8?q?=E6=96=87=E7=AB=A0=E3=82=92=E9=80=81=E4=BF=A1=E3=81=97=E3=81=9F?= =?UTF-8?q?=E5=A0=B4=E5=90=88=E3=81=AB=E3=80=81=E3=83=9E=E3=83=AB=E3=83=81?= =?UTF-8?q?=E3=83=A9=E3=82=A4=E3=83=B3=E3=81=AEoverlay=E3=81=AE=E8=A1=A8?= =?UTF-8?q?=E7=A4=BA=E3=81=AB=E5=A4=B1=E6=95=97=E3=81=99=E3=82=8B=E5=95=8F?= =?UTF-8?q?=E9=A1=8C=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/models/overlay/overlay_image.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src-python/models/overlay/overlay_image.py b/src-python/models/overlay/overlay_image.py index 2be5b0af..fec4c741 100644 --- a/src-python/models/overlay/overlay_image.py +++ b/src-python/models/overlay/overlay_image.py @@ -175,7 +175,8 @@ class OverlayImage: font_path = os_path.join(os_path.dirname(__file__), "..", "..", "..", "fonts", font_family) font = ImageFont.truetype(font_path, font_size) - text_width = draw.textlength(text, font) + # 改行を含んだtextの最大の文字数を計算する + text_width = max(draw.textlength(line, font) for line in text.split("\n")) character_width = text_width // len(text) character_line_num = int((ui_size["width"] // character_width) - 1) if len(text) > character_line_num: From 29ca5bfbe10f3fd342ec1d718b64f959aed5a807 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Tue, 23 Sep 2025 07:25:48 +0900 Subject: [PATCH 34/92] =?UTF-8?q?[WIP/TEST]=20Main=E3=82=AF=E3=83=A9?= =?UTF-8?q?=E3=82=B9=E3=81=AE=E5=88=9D=E6=9C=9F=E5=8C=96=E3=83=AD=E3=82=B8?= =?UTF-8?q?=E3=83=83=E3=82=AF=E3=82=92=E6=94=B9=E5=96=84=E3=81=97=E3=80=81?= =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E7=94=A8=E3=81=AE=E3=82=A8=E3=83=B3?= =?UTF-8?q?=E3=83=89=E3=83=9D=E3=82=A4=E3=83=B3=E3=83=88=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/mainloop.py | 32 +- src-python/test_endpoints.py | 727 +++++++++++++++++++++++++++++++++++ 2 files changed, 747 insertions(+), 12 deletions(-) create mode 100644 src-python/test_endpoints.py diff --git a/src-python/mainloop.py b/src-python/mainloop.py index 0010b98a..32b44dbd 100644 --- a/src-python/mainloop.py +++ b/src-python/mainloop.py @@ -347,9 +347,11 @@ init_mapping = {key:value for key, value in mapping.items() if key.startswith("/ controller.setInitMapping(init_mapping) class Main: - def __init__(self) -> None: + def __init__(self, controller_instance, mapping_data) -> None: self.queue = Queue() self.main_loop = True + self.controller = controller_instance + self.mapping = mapping_data def receiver(self) -> None: while True: @@ -360,7 +362,7 @@ class Main: endpoint = received_data.get("endpoint", None) data = received_data.get("data", None) data = encodeBase64(data) if data is not None else None - printLog(endpoint, {"receive_data":data}) + printLog(endpoint, {"receive_data": data}) self.queue.put((endpoint, data)) def startReceiver(self) -> None: @@ -369,7 +371,10 @@ class Main: th_receiver.start() def handleRequest(self, endpoint, data=None) -> tuple: - handler = mapping.get(endpoint) + result = None # デフォルト値を設定 + status = 500 # デフォルト値を設定 + + handler = self.mapping.get(endpoint) if handler is None: response = "Invalid endpoint" status = 404 @@ -385,6 +390,7 @@ class Main: errorLogging() result = str(e) status = 500 + return result, status def handler(self) -> None: @@ -417,13 +423,15 @@ class Main: def stop(self) -> None: self.main_loop = False -if __name__ == "__main__": - main = Main() - main.startReceiver() - main.startHandler() +# 外部から参照可能なインスタンスを提供 +main_instance = Main(controller_instance=controller, mapping_data=mapping) - controller.setWatchdogCallback(main.stop) - controller.init() +if __name__ == "__main__": + main_instance.startReceiver() + main_instance.startHandler() + + main_instance.controller.setWatchdogCallback(main_instance.stop) + main_instance.controller.init() # mappingのすべてのstatusをTrueにする for key in mapping.keys(): @@ -432,13 +440,13 @@ if __name__ == "__main__": process = "main" match process: case "main": - main.start() + main_instance.start() case "test": for _ in range(100): time.sleep(0.5) endpoint = "/get/data/mic_host_list" - result, status = main.handleRequest(endpoint) + result, status = main_instance.handleRequest(endpoint) printResponse(status, endpoint, result) case "test_all": @@ -639,6 +647,6 @@ if __name__ == "__main__": case _: data = None - result, status = main.handleRequest(endpoint, data) + result, status = main_instance.handleRequest(endpoint, data) printResponse(status, endpoint, result) time.sleep(0.5) \ No newline at end of file diff --git a/src-python/test_endpoints.py b/src-python/test_endpoints.py new file mode 100644 index 00000000..03892fa8 --- /dev/null +++ b/src-python/test_endpoints.py @@ -0,0 +1,727 @@ +import sys +import unittest + +# 初期化のため、config.jsonの削除 +import os +if os.path.exists("config.json"): + os.remove("config.json") + +from mainloop import main_instance + +class TestMainloop(unittest.TestCase): + def setUp(self): + self.main = main_instance + self.main.startReceiver() + self.main.startHandler() + + def stop_main(): + pass + self.main.controller.setWatchdogCallback(stop_main) + self.main.controller.init() + + # mappingのすべてのstatusをTrueにする + for key in self.main.mapping.keys(): + self.main.mapping[key]["status"] = True + + def test_endpoints(self): + print("単体動作の確認") + # エンドポイントとテストデータの定義 + endpoints = { + # Main Window + "/set/enable/translation": [{"data": None, "status": 200, "result": True}], + "/set/disable/translation": [{"data": None, "status": 200, "result": False}], + "/set/enable/transcription_send": [{"data": None, "status": 200, "result": True}], + "/set/disable/transcription_send": [{"data": None, "status": 200, "result": False}], + "/set/enable/transcription_receive": [{"data": None, "status": 200, "result": True}], + "/set/disable/transcription_receive": [{"data": None, "status": 200, "result": False}], + "/set/enable/foreground": [{"data": None, "status": 200, "result": True}], + "/set/disable/foreground": [{"data": None, "status": 200, "result": False}], + "/get/data/selected_tab_no": [{"data": None, "status": 200, "result": "1"}], + "/set/data/selected_tab_no": [ + {"data": "1", "status": 200, "result": "1"}, + {"data": "2", "status": 200, "result": "2"}, + {"data": "3", "status": 200, "result": "3"}, + ], + "/get/data/main_window_sidebar_compact_mode": [{"data": None, "status": 200, "result": False}], + "/set/enable/main_window_sidebar_compact_mode": [{"data": None, "status": 200, "result": True}], + "/set/disable/main_window_sidebar_compact_mode": [{"data": None, "status": 200, "result": False}], + "/get/data/translation_engines": [{"data": None, "status": 200, "result": ['DeepL', 'Google', 'Bing', 'Papago', 'CTranslate2']}], + "/get/data/selectable_language_list": [ + { + "data": None, + "status": 200, + "result": + [ + {'language': 'Afrikaans', 'country': 'South Africa'}, + {'language': 'Albanian', 'country': 'Albania'}, + {'language': 'Amharic', 'country': 'Ethiopia'}, + {'language': 'Arabic', 'country': 'Algeria'}, + {'language': 'Arabic', 'country': 'Bahrain'}, + {'language': 'Arabic', 'country': 'Egypt'}, + {'language': 'Arabic', 'country': 'Israel'}, + {'language': 'Arabic', 'country': 'Iraq'}, + {'language': 'Arabic', 'country': 'Jordan'}, + {'language': 'Arabic', 'country': 'Kuwait'}, + {'language': 'Arabic', 'country': 'Lebanon'}, + {'language': 'Arabic', 'country': 'Mauritania'}, + {'language': 'Arabic', 'country': 'Morocco'}, + {'language': 'Arabic', 'country': 'Oman'}, + {'language': 'Arabic', 'country': 'Qatar'}, + {'language': 'Arabic', 'country': 'Saudi Arabia'}, + {'language': 'Arabic', 'country': 'Palestine'}, + {'language': 'Arabic', 'country': 'Syria'}, + {'language': 'Arabic', 'country': 'Tunisia'}, + {'language': 'Arabic', 'country': 'United Arab Emirates'}, + {'language': 'Arabic', 'country': 'Yemen'}, + {'language': 'Armenian', 'country': 'Armenia'}, + {'language': 'Azerbaijani', 'country': 'Azerbaijan'}, + {'language': 'Basque', 'country': 'Spain'}, + {'language': 'Bengali', 'country': 'Bangladesh'}, + {'language': 'Bengali', 'country': 'India'}, + {'language': 'Bosnian', 'country': 'Bosnia and Herzegovina'}, + {'language': 'Bulgarian', 'country': 'Bulgaria'}, + {'language': 'Catalan', 'country': 'Spain'}, + {'language': 'Chinese Simplified', 'country': 'China'}, + {'language': 'Chinese Simplified', 'country': 'Hong Kong'}, + {'language': 'Chinese Traditional', 'country': 'Taiwan'}, + {'language': 'Chinese Traditional', 'country': 'Hong Kong'}, + {'language': 'Croatian', 'country': 'Croatia'}, + {'language': 'Czech', 'country': 'Czech Republic'}, + {'language': 'Danish', 'country': 'Denmark'}, + {'language': 'Dutch', 'country': 'Belgium'}, + {'language': 'Dutch', 'country': 'Netherlands'}, + {'language': 'English', 'country': 'Australia'}, + {'language': 'English', 'country': 'Canada'}, + {'language': 'English', 'country': 'Ghana'}, + {'language': 'English', 'country': 'Hong Kong'}, + {'language': 'English', 'country': 'India'}, + {'language': 'English', 'country': 'Ireland'}, + {'language': 'English', 'country': 'Kenya'}, + {'language': 'English', 'country': 'New Zealand'}, + {'language': 'English', 'country': 'Nigeria'}, + {'language': 'English', 'country': 'Philippines'}, + {'language': 'English', 'country': 'Singapore'}, + {'language': 'English', 'country': 'South Africa'}, + {'language': 'English', 'country': 'Tanzania'}, + {'language': 'English', 'country': 'United Kingdom'}, + {'language': 'English', 'country': 'United States'}, + {'language': 'Estonian', 'country': 'Estonia'}, + {'language': 'Filipino', 'country': 'Philippines'}, + {'language': 'Finnish', 'country': 'Finland'}, + {'language': 'French', 'country': 'Belgium'}, + {'language': 'French', 'country': 'Canada'}, + {'language': 'French', 'country': 'France'}, + {'language': 'French', 'country': 'Switzerland'}, + {'language': 'Galician', 'country': 'Spain'}, + {'language': 'Georgian', 'country': 'Georgia'}, + {'language': 'German', 'country': 'Austria'}, + {'language': 'German', 'country': 'Germany'}, + {'language': 'German', 'country': 'Switzerland'}, + {'language': 'Greek', 'country': 'Greece'}, + {'language': 'Gujarati', 'country': 'India'}, + {'language': 'Hebrew', 'country': 'Israel'}, + {'language': 'Hindi', 'country': 'India'}, + {'language': 'Hungarian', 'country': 'Hungary'}, + {'language': 'Icelandic', 'country': 'Iceland'}, + {'language': 'Indonesian', 'country': 'Indonesia'}, + {'language': 'Italian', 'country': 'Italy'}, + {'language': 'Italian', 'country': 'Switzerland'}, + {'language': 'Japanese', 'country': 'Japan'}, + {'language': 'Kannada', 'country': 'India'}, + {'language': 'Kazakh', 'country': 'Kazakhstan'}, + {'language': 'Khmer', 'country': 'Cambodia'}, + {'language': 'Korean', 'country': 'South Korea'}, + {'language': 'Lao', 'country': 'Laos'}, + {'language': 'Latvian', 'country': 'Latvia'}, + {'language': 'Lithuanian', 'country': 'Lithuania'}, + {'language': 'Macedonian', 'country': 'North Macedonia'}, + {'language': 'Malay', 'country': 'Malaysia'}, + {'language': 'Malayalam', 'country': 'India'}, + {'language': 'Mongolian', 'country': 'Mongolia'}, + {'language': 'Nepali', 'country': 'Nepal'}, + {'language': 'Norwegian', 'country': 'Norway'}, + {'language': 'Persian', 'country': 'Iran'}, + {'language': 'Polish', 'country': 'Poland'}, + {'language': 'Portuguese', 'country': 'Brazil'}, + {'language': 'Portuguese', 'country': 'Portugal'}, + {'language': 'Romanian', 'country': 'Romania'}, + {'language': 'Russian', 'country': 'Russia'}, + {'language': 'Serbian', 'country': 'Serbia'}, + {'language': 'Sinhala', 'country': 'Sri Lanka'}, + {'language': 'Slovak', 'country': 'Slovakia'}, + {'language': 'Slovenian', 'country': 'Slovenia'}, + {'language': 'Spanish', 'country': 'Argentina'}, + {'language': 'Spanish', 'country': 'Bolivia'}, + {'language': 'Spanish', 'country': 'Chile'}, + {'language': 'Spanish', 'country': 'Colombia'}, + {'language': 'Spanish', 'country': 'Costa Rica'}, + {'language': 'Spanish', 'country': 'Dominican Republic'}, + {'language': 'Spanish', 'country': 'Ecuador'}, + {'language': 'Spanish', 'country': 'El Salvador'}, + {'language': 'Spanish', 'country': 'Guatemala'}, + {'language': 'Spanish', 'country': 'Honduras'}, + {'language': 'Spanish', 'country': 'Mexico'}, + {'language': 'Spanish', 'country': 'Nicaragua'}, + {'language': 'Spanish', 'country': 'Panama'}, + {'language': 'Spanish', 'country': 'Paraguay'}, + {'language': 'Spanish', 'country': 'Peru'}, + {'language': 'Spanish', 'country': 'Puerto Rico'}, + {'language': 'Spanish', 'country': 'Spain'}, + {'language': 'Spanish', 'country': 'United States'}, + {'language': 'Spanish', 'country': 'Uruguay'}, + {'language': 'Spanish', 'country': 'Venezuela'}, + {'language': 'Sundanese', 'country': 'Indonesia'}, + {'language': 'Swahili', 'country': 'Kenya'}, + {'language': 'Swahili', 'country': 'Tanzania'}, + {'language': 'Swedish', 'country': 'Sweden'}, + {'language': 'Tamil', 'country': 'India'}, + {'language': 'Tamil', 'country': 'malaysia'}, + {'language': 'Tamil', 'country': 'Singapore'}, + {'language': 'Tamil', 'country': 'Sri Lanka'}, + {'language': 'Telugu', 'country': 'India'}, + {'language': 'Thai', 'country': 'Thailand'}, + {'language': 'Turkish', 'country': 'Turkey'}, + {'language': 'Ukrainian', 'country': 'Ukraine'}, + {'language': 'Urdu', 'country': 'India'}, + {'language': 'Urdu', 'country': 'Pakistan'}, + {'language': 'Uzbek', 'country': 'Uzbekistan'}, + {'language': 'Vietnamese', 'country': 'Vietnam'} + ]}], + "/get/data/selected_translation_engines": [{"data": None, "status": 200, "result": {'1': 'CTranslate2', '2': 'CTranslate2', '3': 'CTranslate2'}}], + "/set/data/selected_translation_engines": [ + { + "data": {'1': 'DeepL', '2': 'Google', '3': 'Papago'}, + "status": 200, + "result": {'1': 'DeepL', '2': 'Google', '3': 'Papago'} + }, + ], + "/get/data/selected_your_languages": [ + { + "data": None, + "status": 200, + "result": { + '1': { + '1': { + 'language': 'Japanese', + 'country': 'Japan', + 'enable': True + } + }, + '2': { + '1': { + 'language': 'Japanese', + 'country': 'Japan', + 'enable': True + } + }, + '3': { + '1': { + 'language': 'Japanese', + 'country': 'Japan', + 'enable': True + } + } + } + } + ], + "/set/data/selected_your_languages": [ + { + "data": { + '1': { + '1': { + 'language': 'Japanese', + 'country': 'Japan', + 'enable': True + }, + }, + '2': { + '1': { + 'language': 'English', + 'country': 'United States', + 'enable': True + }, + }, + '3': { + '1': { + 'language': 'French', + 'country': 'France', + 'enable': True + } + } + }, + "status": 200, + "result": { + '1': { + '1': { + 'language': 'Japanese', + 'country': 'Japan', + 'enable': True + }, + }, + '2': { + '1': { + 'language': 'English', + 'country': 'United States', + 'enable': True + }, + }, + '3': { + '1': { + 'language': 'French', + 'country': 'France', + 'enable': True + } + } + } + } + ], + "/get/data/selected_target_languages": [ + { + "data": None, + "status": 200, + "result": { + "1": { + "1": { + "language": "English", + "country": "United States", + "enable": True + }, + "2": { + "language": "English", + "country": "United States", + "enable": False + }, + "3": { + "language": "English", + "country": "United States", + "enable": False + } + }, + "2": { + "1": { + "language": "English", + "country": "United States", + "enable": True + }, + "2": { + "language": "English", + "country": "United States", + "enable": False + }, + "3": { + "language": "English", + "country": "United States", + "enable": False + } + }, + "3": { + "1": { + "language": "English", + "country": "United States", + "enable": True + }, + "2": { + "language": "English", + "country": "United States", + "enable": False + }, + "3": { + "language": "English", + "country": "United States", + "enable": False + } + } + }, + } + ], + "/set/data/selected_target_languages": [ + { + "data": { + "1": { + "1": { + "language": "Japanese", + "country": "Japan", + "enable": True + }, + "2": { + "language": "English", + "country": "United States", + "enable": True + }, + "3": { + "language": "French", + "country": "France", + "enable": True + } + }, + "2": { + "1": { + "language": "Japanese", + "country": "Japan", + "enable": True + }, + "2": { + "language": "English", + "country": "United States", + "enable": True + }, + "3": { + "language": "French", + "country": "France", + "enable": True + } + }, + "3": { + "1": { + "language": "Japanese", + "country": "Japan", + "enable": True + }, + "2": { + "language": "English", + "country": "United States", + "enable": True + }, + "3": { + "language": "French", + "country": "France", + "enable": True + } + } + }, + "status": 200, + "result": { + "1": { + "1": { + "language": "Japanese", + "country": "Japan", + "enable": True + }, + "2": { + "language": "English", + "country": "United States", + "enable": True + }, + "3": { + "language": "French", + "country": "France", + "enable": True + } + }, + "2": { + "1": { + "language": "Japanese", + "country": "Japan", + "enable": True + }, + "2": { + "language": "English", + "country": "United States", + "enable": True + }, + "3": { + "language": "French", + "country": "France", + "enable": True + } + }, + "3": { + "1": { + "language": "Japanese", + "country": "Japan", + "enable": True + }, + "2": { + "language": "English", + "country": "United States", + "enable": True + }, + "3": { + "language": "French", + "country": "France", + "enable": True + } + } + }, + } + ], + "/get/data/transcription_engines": [{"data": None, "status": 200, "result": ['Google', 'Whisper']}], + "/get/data/selected_transcription_engine": [{"data": None, "status": 200, "result": "Google"}], + "/set/data/selected_transcription_engine": [ + {"data": "Google", "status": 200, "result": "Google"}, + {"data": "Whisper", "status": 200, "result": "Whisper"}, + ], + "/run/send_message_box": [ + { + "data": {"id":"123456", "message":"test"}, + "status": 200, + "result": { + 'id': '123456', + 'original': { + 'message': 'test', + 'transliteration': [] + }, + 'translations': [] + } + } + ], + "/run/typing_message_box": [{"data": None, "status": 200, "result": True}], + "/run/stop_typing_message_box": [{"data": None, "status": 200, "result": True}], + "/run/send_text_overlay": [{"data": "test_overlay", "status": 200, "result": "test_overlay"}], + "/run/swap_your_language_and_target_language": [{"data": None, "status": 200, "result": True}], + # !!!Cant be tested here!!! + # "/run/update_software": [{"data": None, "status": 200, "result": True}], + # "/run/update_cuda_software": [{"data": None, "status": 200, "result": True}], + + # Config Window + # Appearance + "/get/data/version": [{"data": None, "status": 200, "result": "3.2.2"}], + "/get/data/transparency": [{"data": None, "status": 200, "result": 100}], + "/set/data/transparency": [ + {"data": 100, "status": 200, "result": 100}, + {"data": 80, "status": 200, "result": 80}, + {"data": 50, "status": 200, "result": 50}, + {"data": 20, "status": 200, "result": 20}, + {"data": 0, "status": 200, "result": 0}, + ], + "/get/data/ui_scaling": [{"data": None, "status": 200, "result": 100}], + "/set/data/ui_scaling": [ + {"data": 100, "status": 200, "result": 100}, + {"data": 80, "status": 200, "result": 80}, + {"data": 50, "status": 200, "result": 50}, + {"data": 20, "status": 200, "result": 20}, + {"data": 10, "status": 200, "result": 10}, + ], + "/get/data/textbox_ui_scaling": [{"data": None, "status": 200, "result": 100}], + "/set/data/textbox_ui_scaling": [ + {"data": 100, "status": 200, "result": 100}, + {"data": 80, "status": 200, "result": 80}, + {"data": 50, "status": 200, "result": 50}, + {"data": 20, "status": 200, "result": 20}, + {"data": 10, "status": 200, "result": 10}, + ], + "/get/data/message_box_ratio": [{"data": None, "status": 200, "result": 10}], + "/set/data/message_box_ratio": [ + {"data": 10, "status": 200, "result": 10}, + {"data": 9, "status": 200, "result": 5.5}, + {"data": 1, "status": 200, "result": 1}, + ], + "/get/data/send_message_button_type": [{"data": None, "status": 200, "result": "show"}], + "/set/data/send_message_button_type": [ + {"data": "show", "status": 200, "result": "show"}, + {"data": "hide", "status": 200, "result": "hide"}, + {"data": "show_and_disable_enter_key", "status": 200, "result": "show_and_disable_enter_key"}, + ], + "/get/data/show_resend_button": [{"data": None, "status": 200, "result": False}], + "/set/enable/show_resend_button": [{"data": None, "status": 200, "result": True}], + "/set/disable/show_resend_button": [{"data": None, "status": 200, "result": False}], + "/get/data/font_family": [{"data": None, "status": 200, "result": "Yu Gothic UI"}], + "/set/data/font_family": [{"data": "Yu Gothic UI", "status": 200, "result": "Yu Gothic UI"}], + "/get/data/ui_language": [{"data": None, "status": 200, "result": "en"}], + "/set/data/ui_language": [ + {"data": "en", "status": 200, "result": "en"}, + {"data": "ja", "status": 200, "result": "ja"}, + {"data": "ko", "status": 200, "result": "ko"}, + {"data": "zh-Hant", "status": 200, "result": "zh-Hant"}, + {"data": "zh-Hans", "status": 200, "result": "zh-Hans"}, + ], + "/get/data/main_window_geometry": [{"data": None, "status": 200, "result": {"x_pos": 0, "y_pos": 0, "width": 870, "height": 654}}], + "/set/data/main_window_geometry": [ + { + "data": {"x_pos": 0, "y_pos": 0, "width": 870, "height": 654}, + "status": 200, + "result": {"x_pos": 0, "y_pos": 0, "width": 870, "height": 654} + }, + ], + # Compute device + "/get/data/compute_mode": [{"data": None, "status": 200, "result": "cpu"}], + "/get/data/translation_compute_device_list": [{"data": None, "status": 200, "result": [{"device": "cpu", "device_index": 0, "device_name": "cpu"}]}], + "/get/data/selected_translation_compute_device": [{"data": None, "status": 200, "result": {"device": "cpu", "device_index": 0, "device_name": "cpu"}}], + "/set/data/selected_translation_compute_device": [ + { + "data": {"device": "cpu", "device_index": 0, "device_name": "cpu"}, + "status": 200, + "result": {"device": "cpu", "device_index": 0, "device_name": "cpu"} + } + ], + "/get/data/transcription_compute_device_list": [ + { + "data": None, + "status": 200, + "result": [{"device": "cpu", "device_index": 0, "device_name": "cpu"}] + } + ], + "/get/data/selected_transcription_compute_device": [ + { + "data": None, + "status": 200, + "result": {"device": "cpu", "device_index": 0, "device_name": "cpu"} + } + ], + "/set/data/selected_transcription_compute_device": [ + { + "data": {"device": "cpu", "device_index": 0, "device_name": "cpu"}, + "status": 200, + "result": {"device": "cpu", "device_index": 0, "device_name": "cpu"} + }, + ], + # Translation + "/get/data/selectable_ctranslate2_weight_type_dict": [ + { + "data": None, + "status": 200, + "result": {"small": True, "large": False} + }, + ], + "/get/data/ctranslate2_weight_type": [ + { + "data": "small", + "status": 200, + "result": "small" + }, + ], + # "/set/data/ctranslate2_weight_type": {"data": None}, + # "/run/download_ctranslate2_weight": {"data": None}, + # "/get/data/deepl_auth_key": {"data": None}, + # "/set/data/deepl_auth_key": {"data": None}, + # "/delete/data/deepl_auth_key": {"data": None}, + # "/get/data/convert_message_to_romaji": {"data": None}, + # "/set/enable/convert_message_to_romaji": {"data": None}, + # "/set/disable/convert_message_to_romaji": {"data": None}, + # "/get/data/convert_message_to_hiragana": {"data": None}, + # "/set/enable/convert_message_to_hiragana": {"data": None}, + # "/set/disable/convert_message_to_hiragana": {"data": None}, + # # Transcription + # "/get/data/mic_host_list": {"data": None}, + # "/get/data/mic_device_list": {"data": None}, + # "/get/data/speaker_device_list": {"data": None}, + # "/get/data/auto_mic_select": {"data": None}, + # "/set/enable/auto_mic_select": {"data": None}, + # "/set/disable/auto_mic_select": {"data": None}, + # "/get/data/selected_mic_host": {"data": None}, + # "/set/data/selected_mic_host": {"data": None}, + # "/get/data/selected_mic_device": {"data": None}, + # "/set/data/selected_mic_device": {"data": None}, + # "/get/data/mic_threshold": {"data": None}, + # "/set/data/mic_threshold": {"data": None}, + # "/get/data/mic_automatic_threshold": {"data": None}, + # "/set/enable/mic_automatic_threshold": {"data": None}, + # "/set/disable/mic_automatic_threshold": {"data": None}, + # "/get/data/mic_record_timeout": {"data": None}, + # "/set/data/mic_record_timeout": {"data": None}, + # "/get/data/mic_phrase_timeout": {"data": None}, + # "/set/data/mic_phrase_timeout": {"data": None}, + # "/get/data/mic_max_phrases": {"data": None}, + # "/set/data/mic_max_phrases": {"data": None}, + # "/get/data/hotkeys": {"data": None}, + # "/set/data/hotkeys": {"data": None}, + # "/get/data/plugins_status": {"data": None}, + # "/set/data/plugins_status": {"data": None}, + # "/get/data/mic_avg_logprob": {"data": None}, + # "/set/data/mic_avg_logprob": {"data": None}, + # "/get/data/mic_no_speech_prob": {"data": None}, + # "/set/data/mic_no_speech_prob": {"data": None}, + # "/set/enable/check_mic_threshold": {"data": None}, + # "/set/disable/check_mic_threshold": {"data": None}, + # "/get/data/mic_word_filter": {"data": None}, + # "/set/data/mic_word_filter": {"data": None}, + # "/get/data/auto_speaker_select": {"data": None}, + # "/set/enable/auto_speaker_select": {"data": None}, + # "/set/disable/auto_speaker_select": {"data": None}, + # "/get/data/selected_speaker_device": {"data": None}, + # "/set/data/selected_speaker_device": {"data": None}, + # "/get/data/speaker_threshold": {"data": None}, + # "/set/data/speaker_threshold": {"data": None}, + # "/get/data/speaker_automatic_threshold": {"data": None}, + # "/set/enable/speaker_automatic_threshold": {"data": None}, + # "/set/disable/speaker_automatic_threshold": {"data": None}, + # "/get/data/speaker_record_timeout": {"data": None}, + # "/set/data/speaker_record_timeout": {"data": None}, + # "/get/data/speaker_phrase_timeout": {"data": None}, + # "/set/data/speaker_phrase_timeout": {"data": None}, + # "/get/data/speaker_max_phrases": {"data": None}, + # "/set/data/speaker_max_phrases": {"data": None}, + # "/get/data/speaker_avg_logprob": {"data": None}, + # "/set/data/speaker_avg_logprob": {"data": None}, + # "/get/data/speaker_no_speech_prob": {"data": None}, + # "/set/data/speaker_no_speech_prob": {"data": None}, + # "/set/enable/check_speaker_threshold": {"data": None}, + # "/set/disable/check_speaker_threshold": {"data": None}, + # "/get/data/selectable_whisper_weight_type_dict": {"data": None}, + # "/get/data/whisper_weight_type": {"data": None}, + # "/set/data/whisper_weight_type": {"data": None}, + # "/run/download_whisper_weight": {"data": None}, + # # VR + # "/get/data/overlay_small_log": {"data": None}, + # "/set/enable/overlay_small_log": {"data": None}, + # "/set/disable/overlay_small_log": {"data": None}, + # "/get/data/overlay_small_log_settings": {"data": None}, + # "/set/data/overlay_small_log_settings": {"data": None}, + # "/get/data/overlay_large_log": {"data": None}, + # "/set/enable/overlay_large_log": {"data": None}, + # "/set/disable/overlay_large_log": {"data": None}, + # "/get/data/overlay_large_log_settings": {"data": None}, + # "/set/data/overlay_large_log_settings": {"data": None}, + # "/get/data/overlay_show_only_translated_messages": {"data": None}, + # "/set/enable/overlay_show_only_translated_messages": {"data": None}, + # "/set/disable/overlay_show_only_translated_messages": {"data": None}, + # # Others + # "/get/data/send_message_format_parts": {"data": None}, + # "/set/data/send_message_format_parts": {"data": None}, + # "/get/data/received_message_format_parts": {"data": None}, + # "/set/data/received_message_format_parts": {"data": None}, + # "/get/data/auto_clear_message_box": {"data": None}, + # "/set/enable/auto_clear_message_box": {"data": None}, + # "/set/disable/auto_clear_message_box": {"data": None}, + # "/get/data/send_only_translated_messages": {"data": None}, + # "/set/enable/send_only_translated_messages": {"data": None}, + # "/set/disable/send_only_translated_messages": {"data": None}, + # "/get/data/logger_feature": {"data": None}, + # "/set/enable/logger_feature": {"data": None}, + # "/set/disable/logger_feature": {"data": None}, + # "/run/open_filepath_logs": {"data": None}, + # "/get/data/vrc_mic_mute_sync": {"data": None}, + # "/set/enable/vrc_mic_mute_sync": {"data": None}, + # "/set/disable/vrc_mic_mute_sync": {"data": None}, + # "/get/data/send_message_to_vrc": {"data": None}, + # "/set/enable/send_message_to_vrc": {"data": None}, + # "/set/disable/send_message_to_vrc": {"data": None}, + # "/get/data/send_received_message_to_vrc": {"data": None}, + # "/set/enable/send_received_message_to_vrc": {"data": None}, + # "/set/disable/send_received_message_to_vrc": {"data": None}, + # # WebSocket Settings + # "/get/data/websocket_host": {"data": None}, + # "/set/data/websocket_host": {"data": None}, + # "/get/data/websocket_port": {"data": None}, + # "/set/data/websocket_port": {"data": None}, + # "/get/data/websocket_server": {"data": None}, + # "/set/enable/websocket_server": {"data": None}, + # "/set/disable/websocket_server": {"data": None}, + # # Advanced Settings + # "/get/data/osc_ip_address": {"data": None}, + # "/set/data/osc_ip_address": {"data": None}, + # "/get/data/osc_port": {"data": None}, + # "/set/data/osc_port": {"data": None}, + # "/get/data/notification_vrc_sfx": {"data": None}, + # "/set/enable/notification_vrc_sfx": {"data": None}, + # "/set/disable/notification_vrc_sfx": {"data": None}, + # "/run/open_filepath_config_file": {"data": None}, + # "/run/feed_watchdog": {"data": None}, + } + + for endpoint, value in endpoints.items(): + with self.subTest(endpoint=endpoint): + for item in value: + input_data = item["data"] + expected_status = item["status"] + expected_result = item["result"] + result, status = self.main.handleRequest(endpoint, input_data) + print(f"Endpoint: {endpoint}, Status: {status}, Result: {result}") + self.assertEqual(status, expected_status) + self.assertEqual(result, expected_result) + + def tearDown(self): + self.main.stop() + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From 1689a45e3e4ac61500941c5b779494912c2b24a0 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Tue, 23 Sep 2025 14:45:08 +0900 Subject: [PATCH 35/92] Refactor test_endpoints.py: Enhance endpoint testing with structured tests for ON/OFF states, random access, and continuous testing. Introduce Color class for colored output and streamline endpoint handling logic. --- src-python/controller.py | 40 +- src-python/test_endpoints.py | 918 +++++++++-------------------------- 2 files changed, 262 insertions(+), 696 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index b10d5617..ec287267 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -1699,14 +1699,16 @@ class Controller: @staticmethod def setEnableLoggerFeature(*args, **kwargs) -> dict: - config.LOGGER_FEATURE = True - model.startLogger() + if config.LOGGER_FEATURE is False: + model.startLogger() + config.LOGGER_FEATURE = True return {"status":200, "result":config.LOGGER_FEATURE} @staticmethod def setDisableLoggerFeature(*args, **kwargs) -> dict: - model.stopLogger() - config.LOGGER_FEATURE = False + if config.LOGGER_FEATURE is True: + model.stopLogger() + config.LOGGER_FEATURE = False return {"status":200, "result":config.LOGGER_FEATURE} @staticmethod @@ -2245,24 +2247,28 @@ class Controller: @staticmethod def setEnableWebSocketServer(*args, **kwargs) -> dict: - if isAvailableWebSocketServer(config.WEBSOCKET_HOST, config.WEBSOCKET_PORT) is True: - model.startWebSocketServer(config.WEBSOCKET_HOST, config.WEBSOCKET_PORT) - config.WEBSOCKET_SERVER = True - response = {"status":200, "result":config.WEBSOCKET_SERVER} - else: - response = { - "status":400, - "result":{ - "message":"WebSocket server host or port is not available", - "data": config.WEBSOCKET_SERVER + if config.WEBSOCKET_SERVER is False: + if isAvailableWebSocketServer(config.WEBSOCKET_HOST, config.WEBSOCKET_PORT) is True: + model.startWebSocketServer(config.WEBSOCKET_HOST, config.WEBSOCKET_PORT) + config.WEBSOCKET_SERVER = True + response = {"status":200, "result":config.WEBSOCKET_SERVER} + else: + response = { + "status":400, + "result":{ + "message":"WebSocket server host or port is not available", + "data": config.WEBSOCKET_SERVER + } } - } + else: + response = {"status":200, "result":config.WEBSOCKET_SERVER} return response @staticmethod def setDisableWebSocketServer(*args, **kwargs) -> dict: - config.WEBSOCKET_SERVER = False - model.stopWebSocketServer() + if config.WEBSOCKET_SERVER is True: + config.WEBSOCKET_SERVER = False + model.stopWebSocketServer() return {"status":200, "result":config.WEBSOCKET_SERVER} def initializationProgress(self, progress): diff --git a/src-python/test_endpoints.py b/src-python/test_endpoints.py index 03892fa8..b0664b50 100644 --- a/src-python/test_endpoints.py +++ b/src-python/test_endpoints.py @@ -1,15 +1,40 @@ -import sys -import unittest - # 初期化のため、config.jsonの削除 import os +import pprint +import time +import random if os.path.exists("config.json"): os.remove("config.json") from mainloop import main_instance -class TestMainloop(unittest.TestCase): - def setUp(self): +class Color: + BLACK = '\033[30m'#(文字)黒 + RED = '\033[31m'#(文字)赤 + GREEN = '\033[32m'#(文字)緑 + YELLOW = '\033[33m'#(文字)黄 + BLUE = '\033[34m'#(文字)青 + MAGENTA = '\033[35m'#(文字)マゼンタ + CYAN = '\033[36m'#(文字)シアン + WHITE = '\033[37m'#(文字)白 + COLOR_DEFAULT = '\033[39m'#文字色をデフォルトに戻す + BOLD = '\033[1m'#太字 + UNDERLINE = '\033[4m'#下線 + INVISIBLE = '\033[08m'#不可視 + REVERCE = '\033[07m'#文字色と背景色を反転 + BG_BLACK = '\033[40m'#(背景)黒 + BG_RED = '\033[41m'#(背景)赤 + BG_GREEN = '\033[42m'#(背景)緑 + BG_YELLOW = '\033[43m'#(背景)黄 + BG_BLUE = '\033[44m'#(背景)青 + BG_MAGENTA = '\033[45m'#(背景)マゼンタ + BG_CYAN = '\033[46m'#(背景)シアン + BG_WHITE = '\033[47m'#(背景)白 + BG_DEFAULT = '\033[49m'#背景色をデフォルトに戻す + RESET = '\033[0m'#全てリセット + +class TestMainloop(): + def __init__(self): self.main = main_instance self.main.startReceiver() self.main.startHandler() @@ -23,434 +48,119 @@ class TestMainloop(unittest.TestCase): for key in self.main.mapping.keys(): self.main.mapping[key]["status"] = True - def test_endpoints(self): - print("単体動作の確認") - # エンドポイントとテストデータの定義 - endpoints = { - # Main Window - "/set/enable/translation": [{"data": None, "status": 200, "result": True}], - "/set/disable/translation": [{"data": None, "status": 200, "result": False}], - "/set/enable/transcription_send": [{"data": None, "status": 200, "result": True}], - "/set/disable/transcription_send": [{"data": None, "status": 200, "result": False}], - "/set/enable/transcription_receive": [{"data": None, "status": 200, "result": True}], - "/set/disable/transcription_receive": [{"data": None, "status": 200, "result": False}], - "/set/enable/foreground": [{"data": None, "status": 200, "result": True}], - "/set/disable/foreground": [{"data": None, "status": 200, "result": False}], - "/get/data/selected_tab_no": [{"data": None, "status": 200, "result": "1"}], - "/set/data/selected_tab_no": [ - {"data": "1", "status": 200, "result": "1"}, - {"data": "2", "status": 200, "result": "2"}, - {"data": "3", "status": 200, "result": "3"}, - ], - "/get/data/main_window_sidebar_compact_mode": [{"data": None, "status": 200, "result": False}], - "/set/enable/main_window_sidebar_compact_mode": [{"data": None, "status": 200, "result": True}], - "/set/disable/main_window_sidebar_compact_mode": [{"data": None, "status": 200, "result": False}], - "/get/data/translation_engines": [{"data": None, "status": 200, "result": ['DeepL', 'Google', 'Bing', 'Papago', 'CTranslate2']}], - "/get/data/selectable_language_list": [ - { - "data": None, - "status": 200, - "result": - [ - {'language': 'Afrikaans', 'country': 'South Africa'}, - {'language': 'Albanian', 'country': 'Albania'}, - {'language': 'Amharic', 'country': 'Ethiopia'}, - {'language': 'Arabic', 'country': 'Algeria'}, - {'language': 'Arabic', 'country': 'Bahrain'}, - {'language': 'Arabic', 'country': 'Egypt'}, - {'language': 'Arabic', 'country': 'Israel'}, - {'language': 'Arabic', 'country': 'Iraq'}, - {'language': 'Arabic', 'country': 'Jordan'}, - {'language': 'Arabic', 'country': 'Kuwait'}, - {'language': 'Arabic', 'country': 'Lebanon'}, - {'language': 'Arabic', 'country': 'Mauritania'}, - {'language': 'Arabic', 'country': 'Morocco'}, - {'language': 'Arabic', 'country': 'Oman'}, - {'language': 'Arabic', 'country': 'Qatar'}, - {'language': 'Arabic', 'country': 'Saudi Arabia'}, - {'language': 'Arabic', 'country': 'Palestine'}, - {'language': 'Arabic', 'country': 'Syria'}, - {'language': 'Arabic', 'country': 'Tunisia'}, - {'language': 'Arabic', 'country': 'United Arab Emirates'}, - {'language': 'Arabic', 'country': 'Yemen'}, - {'language': 'Armenian', 'country': 'Armenia'}, - {'language': 'Azerbaijani', 'country': 'Azerbaijan'}, - {'language': 'Basque', 'country': 'Spain'}, - {'language': 'Bengali', 'country': 'Bangladesh'}, - {'language': 'Bengali', 'country': 'India'}, - {'language': 'Bosnian', 'country': 'Bosnia and Herzegovina'}, - {'language': 'Bulgarian', 'country': 'Bulgaria'}, - {'language': 'Catalan', 'country': 'Spain'}, - {'language': 'Chinese Simplified', 'country': 'China'}, - {'language': 'Chinese Simplified', 'country': 'Hong Kong'}, - {'language': 'Chinese Traditional', 'country': 'Taiwan'}, - {'language': 'Chinese Traditional', 'country': 'Hong Kong'}, - {'language': 'Croatian', 'country': 'Croatia'}, - {'language': 'Czech', 'country': 'Czech Republic'}, - {'language': 'Danish', 'country': 'Denmark'}, - {'language': 'Dutch', 'country': 'Belgium'}, - {'language': 'Dutch', 'country': 'Netherlands'}, - {'language': 'English', 'country': 'Australia'}, - {'language': 'English', 'country': 'Canada'}, - {'language': 'English', 'country': 'Ghana'}, - {'language': 'English', 'country': 'Hong Kong'}, - {'language': 'English', 'country': 'India'}, - {'language': 'English', 'country': 'Ireland'}, - {'language': 'English', 'country': 'Kenya'}, - {'language': 'English', 'country': 'New Zealand'}, - {'language': 'English', 'country': 'Nigeria'}, - {'language': 'English', 'country': 'Philippines'}, - {'language': 'English', 'country': 'Singapore'}, - {'language': 'English', 'country': 'South Africa'}, - {'language': 'English', 'country': 'Tanzania'}, - {'language': 'English', 'country': 'United Kingdom'}, - {'language': 'English', 'country': 'United States'}, - {'language': 'Estonian', 'country': 'Estonia'}, - {'language': 'Filipino', 'country': 'Philippines'}, - {'language': 'Finnish', 'country': 'Finland'}, - {'language': 'French', 'country': 'Belgium'}, - {'language': 'French', 'country': 'Canada'}, - {'language': 'French', 'country': 'France'}, - {'language': 'French', 'country': 'Switzerland'}, - {'language': 'Galician', 'country': 'Spain'}, - {'language': 'Georgian', 'country': 'Georgia'}, - {'language': 'German', 'country': 'Austria'}, - {'language': 'German', 'country': 'Germany'}, - {'language': 'German', 'country': 'Switzerland'}, - {'language': 'Greek', 'country': 'Greece'}, - {'language': 'Gujarati', 'country': 'India'}, - {'language': 'Hebrew', 'country': 'Israel'}, - {'language': 'Hindi', 'country': 'India'}, - {'language': 'Hungarian', 'country': 'Hungary'}, - {'language': 'Icelandic', 'country': 'Iceland'}, - {'language': 'Indonesian', 'country': 'Indonesia'}, - {'language': 'Italian', 'country': 'Italy'}, - {'language': 'Italian', 'country': 'Switzerland'}, - {'language': 'Japanese', 'country': 'Japan'}, - {'language': 'Kannada', 'country': 'India'}, - {'language': 'Kazakh', 'country': 'Kazakhstan'}, - {'language': 'Khmer', 'country': 'Cambodia'}, - {'language': 'Korean', 'country': 'South Korea'}, - {'language': 'Lao', 'country': 'Laos'}, - {'language': 'Latvian', 'country': 'Latvia'}, - {'language': 'Lithuanian', 'country': 'Lithuania'}, - {'language': 'Macedonian', 'country': 'North Macedonia'}, - {'language': 'Malay', 'country': 'Malaysia'}, - {'language': 'Malayalam', 'country': 'India'}, - {'language': 'Mongolian', 'country': 'Mongolia'}, - {'language': 'Nepali', 'country': 'Nepal'}, - {'language': 'Norwegian', 'country': 'Norway'}, - {'language': 'Persian', 'country': 'Iran'}, - {'language': 'Polish', 'country': 'Poland'}, - {'language': 'Portuguese', 'country': 'Brazil'}, - {'language': 'Portuguese', 'country': 'Portugal'}, - {'language': 'Romanian', 'country': 'Romania'}, - {'language': 'Russian', 'country': 'Russia'}, - {'language': 'Serbian', 'country': 'Serbia'}, - {'language': 'Sinhala', 'country': 'Sri Lanka'}, - {'language': 'Slovak', 'country': 'Slovakia'}, - {'language': 'Slovenian', 'country': 'Slovenia'}, - {'language': 'Spanish', 'country': 'Argentina'}, - {'language': 'Spanish', 'country': 'Bolivia'}, - {'language': 'Spanish', 'country': 'Chile'}, - {'language': 'Spanish', 'country': 'Colombia'}, - {'language': 'Spanish', 'country': 'Costa Rica'}, - {'language': 'Spanish', 'country': 'Dominican Republic'}, - {'language': 'Spanish', 'country': 'Ecuador'}, - {'language': 'Spanish', 'country': 'El Salvador'}, - {'language': 'Spanish', 'country': 'Guatemala'}, - {'language': 'Spanish', 'country': 'Honduras'}, - {'language': 'Spanish', 'country': 'Mexico'}, - {'language': 'Spanish', 'country': 'Nicaragua'}, - {'language': 'Spanish', 'country': 'Panama'}, - {'language': 'Spanish', 'country': 'Paraguay'}, - {'language': 'Spanish', 'country': 'Peru'}, - {'language': 'Spanish', 'country': 'Puerto Rico'}, - {'language': 'Spanish', 'country': 'Spain'}, - {'language': 'Spanish', 'country': 'United States'}, - {'language': 'Spanish', 'country': 'Uruguay'}, - {'language': 'Spanish', 'country': 'Venezuela'}, - {'language': 'Sundanese', 'country': 'Indonesia'}, - {'language': 'Swahili', 'country': 'Kenya'}, - {'language': 'Swahili', 'country': 'Tanzania'}, - {'language': 'Swedish', 'country': 'Sweden'}, - {'language': 'Tamil', 'country': 'India'}, - {'language': 'Tamil', 'country': 'malaysia'}, - {'language': 'Tamil', 'country': 'Singapore'}, - {'language': 'Tamil', 'country': 'Sri Lanka'}, - {'language': 'Telugu', 'country': 'India'}, - {'language': 'Thai', 'country': 'Thailand'}, - {'language': 'Turkish', 'country': 'Turkey'}, - {'language': 'Ukrainian', 'country': 'Ukraine'}, - {'language': 'Urdu', 'country': 'India'}, - {'language': 'Urdu', 'country': 'Pakistan'}, - {'language': 'Uzbek', 'country': 'Uzbekistan'}, - {'language': 'Vietnamese', 'country': 'Vietnam'} - ]}], - "/get/data/selected_translation_engines": [{"data": None, "status": 200, "result": {'1': 'CTranslate2', '2': 'CTranslate2', '3': 'CTranslate2'}}], - "/set/data/selected_translation_engines": [ - { - "data": {'1': 'DeepL', '2': 'Google', '3': 'Papago'}, - "status": 200, - "result": {'1': 'DeepL', '2': 'Google', '3': 'Papago'} - }, - ], - "/get/data/selected_your_languages": [ - { - "data": None, - "status": 200, - "result": { - '1': { - '1': { - 'language': 'Japanese', - 'country': 'Japan', - 'enable': True - } - }, - '2': { - '1': { - 'language': 'Japanese', - 'country': 'Japan', - 'enable': True - } - }, - '3': { - '1': { - 'language': 'Japanese', - 'country': 'Japan', - 'enable': True - } - } - } - } - ], - "/set/data/selected_your_languages": [ - { - "data": { - '1': { - '1': { - 'language': 'Japanese', - 'country': 'Japan', - 'enable': True - }, - }, - '2': { - '1': { - 'language': 'English', - 'country': 'United States', - 'enable': True - }, - }, - '3': { - '1': { - 'language': 'French', - 'country': 'France', - 'enable': True - } - } - }, - "status": 200, - "result": { - '1': { - '1': { - 'language': 'Japanese', - 'country': 'Japan', - 'enable': True - }, - }, - '2': { - '1': { - 'language': 'English', - 'country': 'United States', - 'enable': True - }, - }, - '3': { - '1': { - 'language': 'French', - 'country': 'France', - 'enable': True - } - } - } - } - ], - "/get/data/selected_target_languages": [ - { - "data": None, - "status": 200, - "result": { - "1": { - "1": { - "language": "English", - "country": "United States", - "enable": True - }, - "2": { - "language": "English", - "country": "United States", - "enable": False - }, - "3": { - "language": "English", - "country": "United States", - "enable": False - } - }, - "2": { - "1": { - "language": "English", - "country": "United States", - "enable": True - }, - "2": { - "language": "English", - "country": "United States", - "enable": False - }, - "3": { - "language": "English", - "country": "United States", - "enable": False - } - }, - "3": { - "1": { - "language": "English", - "country": "United States", - "enable": True - }, - "2": { - "language": "English", - "country": "United States", - "enable": False - }, - "3": { - "language": "English", - "country": "United States", - "enable": False - } - } - }, - } - ], - "/set/data/selected_target_languages": [ - { - "data": { - "1": { - "1": { - "language": "Japanese", - "country": "Japan", - "enable": True - }, - "2": { - "language": "English", - "country": "United States", - "enable": True - }, - "3": { - "language": "French", - "country": "France", - "enable": True - } - }, - "2": { - "1": { - "language": "Japanese", - "country": "Japan", - "enable": True - }, - "2": { - "language": "English", - "country": "United States", - "enable": True - }, - "3": { - "language": "French", - "country": "France", - "enable": True - } - }, - "3": { - "1": { - "language": "Japanese", - "country": "Japan", - "enable": True - }, - "2": { - "language": "English", - "country": "United States", - "enable": True - }, - "3": { - "language": "French", - "country": "France", - "enable": True - } - } - }, - "status": 200, - "result": { - "1": { - "1": { - "language": "Japanese", - "country": "Japan", - "enable": True - }, - "2": { - "language": "English", - "country": "United States", - "enable": True - }, - "3": { - "language": "French", - "country": "France", - "enable": True - } - }, - "2": { - "1": { - "language": "Japanese", - "country": "Japan", - "enable": True - }, - "2": { - "language": "English", - "country": "United States", - "enable": True - }, - "3": { - "language": "French", - "country": "France", - "enable": True - } - }, - "3": { - "1": { - "language": "Japanese", - "country": "Japan", - "enable": True - }, - "2": { - "language": "English", - "country": "United States", - "enable": True - }, - "3": { - "language": "French", - "country": "France", - "enable": True - } - } - }, - } - ], - "/get/data/transcription_engines": [{"data": None, "status": 200, "result": ['Google', 'Whisper']}], - "/get/data/selected_transcription_engine": [{"data": None, "status": 200, "result": "Google"}], - "/set/data/selected_transcription_engine": [ - {"data": "Google", "status": 200, "result": "Google"}, - {"data": "Whisper", "status": 200, "result": "Whisper"}, - ], + self.config_dict = {} + for endpoint in self.main.mapping.keys(): + if endpoint.startswith("/get/data/"): + self.config_dict[endpoint.split("/")[-1]] = self.main.handleRequest(endpoint, None)[0] + elif endpoint.startswith("/set/disable/"): + self.config_dict[endpoint.split("/")[-1]] = self.main.handleRequest(endpoint, None)[0] + print(self.config_dict) + + self.validity_endpoints = [ + "/set/enable/translation", + "/set/disable/translation", + "/set/enable/transcription_send", + "/set/disable/transcription_send", + "/set/enable/transcription_receive", + "/set/disable/transcription_receive", + "/set/enable/foreground", + "/set/disable/foreground", + "/set/enable/main_window_sidebar_compact_mode", + "/set/disable/main_window_sidebar_compact_mode", + "/set/enable/show_resend_button", + "/set/disable/show_resend_button", + "/set/enable/convert_message_to_romaji", + "/set/disable/convert_message_to_romaji", + "/set/enable/convert_message_to_hiragana", + "/set/disable/convert_message_to_hiragana", + # "/set/enable/auto_mic_select", + # "/set/disable/auto_mic_select", + "/set/enable/mic_automatic_threshold", + "/set/disable/mic_automatic_threshold", + # "/set/enable/check_mic_threshold", + # "/set/disable/check_mic_threshold", + # "/set/enable/auto_speaker_select", + # "/set/disable/auto_speaker_select", + "/set/enable/speaker_automatic_threshold", + "/set/disable/speaker_automatic_threshold", + # "/set/enable/check_speaker_threshold", + # "/set/disable/check_speaker_threshold", + "/set/enable/overlay_small_log", + "/set/disable/overlay_small_log", + "/set/enable/overlay_large_log", + "/set/disable/overlay_large_log", + "/set/enable/overlay_show_only_translated_messages", + "/set/disable/overlay_show_only_translated_messages", + "/set/enable/auto_clear_message_box", + "/set/disable/auto_clear_message_box", + "/set/enable/send_only_translated_messages", + "/set/disable/send_only_translated_messages", + "/set/enable/logger_feature", + "/set/disable/logger_feature", + "/set/enable/vrc_mic_mute_sync", + "/set/disable/vrc_mic_mute_sync", + "/set/enable/send_message_to_vrc", + "/set/disable/send_message_to_vrc", + "/set/enable/send_received_message_to_vrc", + "/set/disable/send_received_message_to_vrc", + "/set/enable/websocket_server", + "/set/disable/websocket_server", + "/set/enable/notification_vrc_sfx", + "/set/disable/notification_vrc_sfx", + ] + + self.set_data_endpoints = [ + "/set/data/selected_tab_no", + "/set/data/selected_translation_engines", + "/set/data/selected_your_languages", + "/set/data/selected_target_languages" + "/set/data/selected_transcription_engine", + "/set/data/transparency", + "/set/data/ui_scaling", + "/set/data/textbox_ui_scaling", + "/set/data/message_box_ratio", + "/set/data/send_message_button_type", + "/set/data/font_family", + "/set/data/ui_language", + "/set/data/main_window_geometry", + "/set/data/selected_translation_compute_device", + "/set/data/selected_transcription_compute_device", + "/set/data/ctranslate2_weight_type", + "/set/data/deepl_auth_key", + "/set/data/selected_mic_host", + "/set/data/selected_mic_device", + "/set/data/mic_threshold", + "/set/data/mic_record_timeout", + "/set/data/mic_phrase_timeout", + "/set/data/mic_max_phrases", + "/set/data/hotkeys", + "/set/data/plugins_status", + "/set/data/mic_avg_logprob", + "/set/data/mic_no_speech_prob", + "/set/data/mic_word_filter", + "/set/data/selected_speaker_device", + "/set/data/speaker_threshold", + "/set/data/speaker_record_timeout", + "/set/data/speaker_phrase_timeout", + "/set/data/speaker_max_phrases", + "/set/data/speaker_avg_logprob", + "/set/data/speaker_no_speech_prob", + "/set/data/whisper_weight_type", + "/set/data/overlay_small_log_settings", + "/set/data/overlay_large_log_settings", + "/set/data/send_message_format_parts", + "/set/data/received_message_format_parts", + "/set/data/websocket_host", + "/set/data/websocket_port", + "/set/data/osc_ip_address", + "/set/data/osc_port", + ] + + self.delete_data_endpoints = [ + "/delete/data/deepl_auth_key", + ] + + self.run_endpoints = { "/run/send_message_box": [ { "data": {"id":"123456", "message":"test"}, @@ -472,256 +182,106 @@ class TestMainloop(unittest.TestCase): # !!!Cant be tested here!!! # "/run/update_software": [{"data": None, "status": 200, "result": True}], # "/run/update_cuda_software": [{"data": None, "status": 200, "result": True}], - - # Config Window - # Appearance - "/get/data/version": [{"data": None, "status": 200, "result": "3.2.2"}], - "/get/data/transparency": [{"data": None, "status": 200, "result": 100}], - "/set/data/transparency": [ - {"data": 100, "status": 200, "result": 100}, - {"data": 80, "status": 200, "result": 80}, - {"data": 50, "status": 200, "result": 50}, - {"data": 20, "status": 200, "result": 20}, - {"data": 0, "status": 200, "result": 0}, + "/run/download_ctranslate2_weight": [ + {"data": "small", "status": 200, "result": True}, + {"data": "large", "status": 400, "result": False}, ], - "/get/data/ui_scaling": [{"data": None, "status": 200, "result": 100}], - "/set/data/ui_scaling": [ - {"data": 100, "status": 200, "result": 100}, - {"data": 80, "status": 200, "result": 80}, - {"data": 50, "status": 200, "result": 50}, - {"data": 20, "status": 200, "result": 20}, - {"data": 10, "status": 200, "result": 10}, + "/run/download_whisper_weight": [ + {"data": "tiny", "status": 200, "result": True}, + {"data": "base", "status": 200, "result": True}, + {"data": "small", "status": 200, "result": True}, + {"data": "medium", "status": 200, "result": True}, + {"data": "large-v1", "status": 200, "result": True}, + {"data": "large-v2", "status": 400, "result": False}, + {"data": "large-v3", "status": 400, "result": False}, + {"data": "large-v3-turbo-int8", "status": 400, "result": False}, + {"data": "large-v3-turbo", "status": 400, "result": False} ], - "/get/data/textbox_ui_scaling": [{"data": None, "status": 200, "result": 100}], - "/set/data/textbox_ui_scaling": [ - {"data": 100, "status": 200, "result": 100}, - {"data": 80, "status": 200, "result": 80}, - {"data": 50, "status": 200, "result": 50}, - {"data": 20, "status": 200, "result": 20}, - {"data": 10, "status": 200, "result": 10}, - ], - "/get/data/message_box_ratio": [{"data": None, "status": 200, "result": 10}], - "/set/data/message_box_ratio": [ - {"data": 10, "status": 200, "result": 10}, - {"data": 9, "status": 200, "result": 5.5}, - {"data": 1, "status": 200, "result": 1}, - ], - "/get/data/send_message_button_type": [{"data": None, "status": 200, "result": "show"}], - "/set/data/send_message_button_type": [ - {"data": "show", "status": 200, "result": "show"}, - {"data": "hide", "status": 200, "result": "hide"}, - {"data": "show_and_disable_enter_key", "status": 200, "result": "show_and_disable_enter_key"}, - ], - "/get/data/show_resend_button": [{"data": None, "status": 200, "result": False}], - "/set/enable/show_resend_button": [{"data": None, "status": 200, "result": True}], - "/set/disable/show_resend_button": [{"data": None, "status": 200, "result": False}], - "/get/data/font_family": [{"data": None, "status": 200, "result": "Yu Gothic UI"}], - "/set/data/font_family": [{"data": "Yu Gothic UI", "status": 200, "result": "Yu Gothic UI"}], - "/get/data/ui_language": [{"data": None, "status": 200, "result": "en"}], - "/set/data/ui_language": [ - {"data": "en", "status": 200, "result": "en"}, - {"data": "ja", "status": 200, "result": "ja"}, - {"data": "ko", "status": 200, "result": "ko"}, - {"data": "zh-Hant", "status": 200, "result": "zh-Hant"}, - {"data": "zh-Hans", "status": 200, "result": "zh-Hans"}, - ], - "/get/data/main_window_geometry": [{"data": None, "status": 200, "result": {"x_pos": 0, "y_pos": 0, "width": 870, "height": 654}}], - "/set/data/main_window_geometry": [ - { - "data": {"x_pos": 0, "y_pos": 0, "width": 870, "height": 654}, - "status": 200, - "result": {"x_pos": 0, "y_pos": 0, "width": 870, "height": 654} - }, - ], - # Compute device - "/get/data/compute_mode": [{"data": None, "status": 200, "result": "cpu"}], - "/get/data/translation_compute_device_list": [{"data": None, "status": 200, "result": [{"device": "cpu", "device_index": 0, "device_name": "cpu"}]}], - "/get/data/selected_translation_compute_device": [{"data": None, "status": 200, "result": {"device": "cpu", "device_index": 0, "device_name": "cpu"}}], - "/set/data/selected_translation_compute_device": [ - { - "data": {"device": "cpu", "device_index": 0, "device_name": "cpu"}, - "status": 200, - "result": {"device": "cpu", "device_index": 0, "device_name": "cpu"} - } - ], - "/get/data/transcription_compute_device_list": [ - { - "data": None, - "status": 200, - "result": [{"device": "cpu", "device_index": 0, "device_name": "cpu"}] - } - ], - "/get/data/selected_transcription_compute_device": [ - { - "data": None, - "status": 200, - "result": {"device": "cpu", "device_index": 0, "device_name": "cpu"} - } - ], - "/set/data/selected_transcription_compute_device": [ - { - "data": {"device": "cpu", "device_index": 0, "device_name": "cpu"}, - "status": 200, - "result": {"device": "cpu", "device_index": 0, "device_name": "cpu"} - }, - ], - # Translation - "/get/data/selectable_ctranslate2_weight_type_dict": [ - { - "data": None, - "status": 200, - "result": {"small": True, "large": False} - }, - ], - "/get/data/ctranslate2_weight_type": [ - { - "data": "small", - "status": 200, - "result": "small" - }, - ], - # "/set/data/ctranslate2_weight_type": {"data": None}, - # "/run/download_ctranslate2_weight": {"data": None}, - # "/get/data/deepl_auth_key": {"data": None}, - # "/set/data/deepl_auth_key": {"data": None}, - # "/delete/data/deepl_auth_key": {"data": None}, - # "/get/data/convert_message_to_romaji": {"data": None}, - # "/set/enable/convert_message_to_romaji": {"data": None}, - # "/set/disable/convert_message_to_romaji": {"data": None}, - # "/get/data/convert_message_to_hiragana": {"data": None}, - # "/set/enable/convert_message_to_hiragana": {"data": None}, - # "/set/disable/convert_message_to_hiragana": {"data": None}, - # # Transcription - # "/get/data/mic_host_list": {"data": None}, - # "/get/data/mic_device_list": {"data": None}, - # "/get/data/speaker_device_list": {"data": None}, - # "/get/data/auto_mic_select": {"data": None}, - # "/set/enable/auto_mic_select": {"data": None}, - # "/set/disable/auto_mic_select": {"data": None}, - # "/get/data/selected_mic_host": {"data": None}, - # "/set/data/selected_mic_host": {"data": None}, - # "/get/data/selected_mic_device": {"data": None}, - # "/set/data/selected_mic_device": {"data": None}, - # "/get/data/mic_threshold": {"data": None}, - # "/set/data/mic_threshold": {"data": None}, - # "/get/data/mic_automatic_threshold": {"data": None}, - # "/set/enable/mic_automatic_threshold": {"data": None}, - # "/set/disable/mic_automatic_threshold": {"data": None}, - # "/get/data/mic_record_timeout": {"data": None}, - # "/set/data/mic_record_timeout": {"data": None}, - # "/get/data/mic_phrase_timeout": {"data": None}, - # "/set/data/mic_phrase_timeout": {"data": None}, - # "/get/data/mic_max_phrases": {"data": None}, - # "/set/data/mic_max_phrases": {"data": None}, - # "/get/data/hotkeys": {"data": None}, - # "/set/data/hotkeys": {"data": None}, - # "/get/data/plugins_status": {"data": None}, - # "/set/data/plugins_status": {"data": None}, - # "/get/data/mic_avg_logprob": {"data": None}, - # "/set/data/mic_avg_logprob": {"data": None}, - # "/get/data/mic_no_speech_prob": {"data": None}, - # "/set/data/mic_no_speech_prob": {"data": None}, - # "/set/enable/check_mic_threshold": {"data": None}, - # "/set/disable/check_mic_threshold": {"data": None}, - # "/get/data/mic_word_filter": {"data": None}, - # "/set/data/mic_word_filter": {"data": None}, - # "/get/data/auto_speaker_select": {"data": None}, - # "/set/enable/auto_speaker_select": {"data": None}, - # "/set/disable/auto_speaker_select": {"data": None}, - # "/get/data/selected_speaker_device": {"data": None}, - # "/set/data/selected_speaker_device": {"data": None}, - # "/get/data/speaker_threshold": {"data": None}, - # "/set/data/speaker_threshold": {"data": None}, - # "/get/data/speaker_automatic_threshold": {"data": None}, - # "/set/enable/speaker_automatic_threshold": {"data": None}, - # "/set/disable/speaker_automatic_threshold": {"data": None}, - # "/get/data/speaker_record_timeout": {"data": None}, - # "/set/data/speaker_record_timeout": {"data": None}, - # "/get/data/speaker_phrase_timeout": {"data": None}, - # "/set/data/speaker_phrase_timeout": {"data": None}, - # "/get/data/speaker_max_phrases": {"data": None}, - # "/set/data/speaker_max_phrases": {"data": None}, - # "/get/data/speaker_avg_logprob": {"data": None}, - # "/set/data/speaker_avg_logprob": {"data": None}, - # "/get/data/speaker_no_speech_prob": {"data": None}, - # "/set/data/speaker_no_speech_prob": {"data": None}, - # "/set/enable/check_speaker_threshold": {"data": None}, - # "/set/disable/check_speaker_threshold": {"data": None}, - # "/get/data/selectable_whisper_weight_type_dict": {"data": None}, - # "/get/data/whisper_weight_type": {"data": None}, - # "/set/data/whisper_weight_type": {"data": None}, - # "/run/download_whisper_weight": {"data": None}, - # # VR - # "/get/data/overlay_small_log": {"data": None}, - # "/set/enable/overlay_small_log": {"data": None}, - # "/set/disable/overlay_small_log": {"data": None}, - # "/get/data/overlay_small_log_settings": {"data": None}, - # "/set/data/overlay_small_log_settings": {"data": None}, - # "/get/data/overlay_large_log": {"data": None}, - # "/set/enable/overlay_large_log": {"data": None}, - # "/set/disable/overlay_large_log": {"data": None}, - # "/get/data/overlay_large_log_settings": {"data": None}, - # "/set/data/overlay_large_log_settings": {"data": None}, - # "/get/data/overlay_show_only_translated_messages": {"data": None}, - # "/set/enable/overlay_show_only_translated_messages": {"data": None}, - # "/set/disable/overlay_show_only_translated_messages": {"data": None}, - # # Others - # "/get/data/send_message_format_parts": {"data": None}, - # "/set/data/send_message_format_parts": {"data": None}, - # "/get/data/received_message_format_parts": {"data": None}, - # "/set/data/received_message_format_parts": {"data": None}, - # "/get/data/auto_clear_message_box": {"data": None}, - # "/set/enable/auto_clear_message_box": {"data": None}, - # "/set/disable/auto_clear_message_box": {"data": None}, - # "/get/data/send_only_translated_messages": {"data": None}, - # "/set/enable/send_only_translated_messages": {"data": None}, - # "/set/disable/send_only_translated_messages": {"data": None}, - # "/get/data/logger_feature": {"data": None}, - # "/set/enable/logger_feature": {"data": None}, - # "/set/disable/logger_feature": {"data": None}, - # "/run/open_filepath_logs": {"data": None}, - # "/get/data/vrc_mic_mute_sync": {"data": None}, - # "/set/enable/vrc_mic_mute_sync": {"data": None}, - # "/set/disable/vrc_mic_mute_sync": {"data": None}, - # "/get/data/send_message_to_vrc": {"data": None}, - # "/set/enable/send_message_to_vrc": {"data": None}, - # "/set/disable/send_message_to_vrc": {"data": None}, - # "/get/data/send_received_message_to_vrc": {"data": None}, - # "/set/enable/send_received_message_to_vrc": {"data": None}, - # "/set/disable/send_received_message_to_vrc": {"data": None}, - # # WebSocket Settings - # "/get/data/websocket_host": {"data": None}, - # "/set/data/websocket_host": {"data": None}, - # "/get/data/websocket_port": {"data": None}, - # "/set/data/websocket_port": {"data": None}, - # "/get/data/websocket_server": {"data": None}, - # "/set/enable/websocket_server": {"data": None}, - # "/set/disable/websocket_server": {"data": None}, - # # Advanced Settings - # "/get/data/osc_ip_address": {"data": None}, - # "/set/data/osc_ip_address": {"data": None}, - # "/get/data/osc_port": {"data": None}, - # "/set/data/osc_port": {"data": None}, - # "/get/data/notification_vrc_sfx": {"data": None}, - # "/set/enable/notification_vrc_sfx": {"data": None}, - # "/set/disable/notification_vrc_sfx": {"data": None}, - # "/run/open_filepath_config_file": {"data": None}, - # "/run/feed_watchdog": {"data": None}, + "/run/open_filepath_logs": {"data": None, "status": 200, "result": True}, + "/run/open_filepath_config_file": {"data": None, "status": 200, "result": True}, + "/run/feed_watchdog": {"data": None, "status": 200, "result": True}, } - for endpoint, value in endpoints.items(): - with self.subTest(endpoint=endpoint): - for item in value: - input_data = item["data"] - expected_status = item["status"] - expected_result = item["result"] - result, status = self.main.handleRequest(endpoint, input_data) - print(f"Endpoint: {endpoint}, Status: {status}, Result: {result}") - self.assertEqual(status, expected_status) - self.assertEqual(result, expected_result) + def test_endpoints_on_off_single(self): + print("----ON/OFF系のエンドポイントのテスト----") + for endpoint in self.validity_endpoints: + print(f"Testing endpoint: {endpoint}", end="", flush=True) + if endpoint.startswith("/set/enable/"): + result, status = self.main.handleRequest(endpoint, None) + if result is True and status == 200: + self.config_dict[endpoint.split("/")[-1]] = result + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") + print(f"Current config_dict: {self.config_dict}") + break + elif endpoint.startswith("/set/disable/"): + result, status = self.main.handleRequest(endpoint, None) + if result is False and status == 200: + self.config_dict[endpoint.split("/")[-1]] = result + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") + print(f"Current config_dict: {self.config_dict}") + break + print("----ON/OFF系のエンドポイントのテスト終了----") - def tearDown(self): - self.main.stop() + def test_endpoints_on_off_random(self): + print("----ON/OFFでのランダムアクセスのテスト----") + for i in range(1000): + endpoint = random.choice(self.validity_endpoints) + print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) + if endpoint.startswith("/set/enable/"): + result, status = self.main.handleRequest(endpoint, None) + expected_result = True + if result == expected_result and status == 200: + self.config_dict[endpoint.split("/")[-1]] = result + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") + pprint.pprint(self.config_dict) + break + elif endpoint.startswith("/set/disable/"): + result, status = self.main.handleRequest(endpoint, None) + expected_result = False + if result == expected_result and status == 200: + self.config_dict[endpoint.split("/")[-1]] = result + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") + pprint.pprint(self.config_dict) + break + print("----ON/OFFでのランダムアクセスのテスト終了----") + + def test_endpoints_continuous(self): + print("----連続テスト----") + # endpoints = ["/set/enable/websocket_server", "/set/disable/websocket_server"] + endpoints = ["/set/enable/transcription_receive", "/set/disable/transcription_receive"] + for i in range(1000): + endpoint = random.choice(endpoints) + print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) + if endpoint.startswith("/set/enable/"): + result, status = self.main.handleRequest(endpoint, None) + expected_result = True + if result == expected_result and status == 200: + self.config_dict[endpoint.split("/")[-1]] = result + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") + pprint.pprint(self.config_dict) + break + elif endpoint.startswith("/set/disable/"): + result, status = self.main.handleRequest(endpoint, None) + expected_result = False + if result == expected_result and status == 200: + self.config_dict[endpoint.split("/")[-1]] = result + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") + pprint.pprint(self.config_dict) + break + print("----連続テスト終了----") if __name__ == "__main__": - unittest.main() \ No newline at end of file + test = TestMainloop() + test.test_endpoints_continuous() \ No newline at end of file From a4656a50813cf46bf41a224f3c3da675d1ccfd38 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Tue, 23 Sep 2025 16:56:11 +0900 Subject: [PATCH 36/92] =?UTF-8?q?Controller=E3=82=AF=E3=83=A9=E3=82=B9?= =?UTF-8?q?=E3=81=AE=E8=A8=AD=E5=AE=9A=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89?= =?UTF-8?q?=E3=82=92=E4=BF=AE=E6=AD=A3=E3=81=97=E3=80=81=E9=87=8D=E8=A4=87?= =?UTF-8?q?=E3=81=97=E3=81=9F=E8=A8=AD=E5=AE=9A=E3=82=92=E9=98=B2=E6=AD=A2?= =?UTF-8?q?=E3=81=99=E3=82=8B=E6=9D=A1=E4=BB=B6=E3=82=92=E8=BF=BD=E5=8A=A0?= =?UTF-8?q?=E3=80=82=E3=83=86=E3=82=B9=E3=83=88=E3=82=B1=E3=83=BC=E3=82=B9?= =?UTF-8?q?=E3=82=92=E6=9B=B4=E6=96=B0=E3=81=97=E3=80=81ON/OFF=E7=8A=B6?= =?UTF-8?q?=E6=85=8B=E3=81=AE=E3=83=A9=E3=83=B3=E3=83=80=E3=83=A0=E3=82=A2?= =?UTF-8?q?=E3=82=AF=E3=82=BB=E3=82=B9=E3=81=A8=E9=80=A3=E7=B6=9A=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=82=92=E5=BC=B7=E5=8C=96=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/controller.py | 233 +++++++++++++++++++++-------------- src-python/test_endpoints.py | 51 ++++++-- 2 files changed, 179 insertions(+), 105 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index ec287267..b31a2759 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -795,24 +795,28 @@ class Controller: @staticmethod def setEnableTranslation(*args, **kwargs) -> dict: - if model.isLoadedCTranslate2Model() is False: - model.changeTranslatorCTranslate2Model() - config.ENABLE_TRANSLATION = True + if config.ENABLE_TRANSLATION is False: + if model.isLoadedCTranslate2Model() is False: + model.changeTranslatorCTranslate2Model() + config.ENABLE_TRANSLATION = True return {"status":200, "result":config.ENABLE_TRANSLATION} @staticmethod def setDisableTranslation(*args, **kwargs) -> dict: - config.ENABLE_TRANSLATION = False + if config.ENABLE_TRANSLATION is True: + config.ENABLE_TRANSLATION = False return {"status":200, "result":config.ENABLE_TRANSLATION} @staticmethod def setEnableForeground(*args, **kwargs) -> dict: - config.ENABLE_FOREGROUND = True + if config.ENABLE_FOREGROUND is False: + config.ENABLE_FOREGROUND = True return {"status":200, "result":config.ENABLE_FOREGROUND} @staticmethod def setDisableForeground(*args, **kwargs) -> dict: - config.ENABLE_FOREGROUND = False + if config.ENABLE_FOREGROUND is True: + config.ENABLE_FOREGROUND = False return {"status":200, "result":config.ENABLE_FOREGROUND} @staticmethod @@ -906,12 +910,14 @@ class Controller: @staticmethod def setEnableConvertMessageToRomaji(*args, **kwargs) -> dict: - config.CONVERT_MESSAGE_TO_ROMAJI = True + if config.CONVERT_MESSAGE_TO_ROMAJI is False: + config.CONVERT_MESSAGE_TO_ROMAJI = True return {"status":200, "result":config.CONVERT_MESSAGE_TO_ROMAJI} @staticmethod def setDisableConvertMessageToRomaji(*args, **kwargs) -> dict: - config.CONVERT_MESSAGE_TO_ROMAJI = False + if config.CONVERT_MESSAGE_TO_ROMAJI is True: + config.CONVERT_MESSAGE_TO_ROMAJI = False return {"status":200, "result":config.CONVERT_MESSAGE_TO_ROMAJI} @staticmethod @@ -920,12 +926,14 @@ class Controller: @staticmethod def setEnableConvertMessageToHiragana(*args, **kwargs) -> dict: - config.CONVERT_MESSAGE_TO_HIRAGANA = True + if config.CONVERT_MESSAGE_TO_HIRAGANA is False: + config.CONVERT_MESSAGE_TO_HIRAGANA = True return {"status":200, "result":config.CONVERT_MESSAGE_TO_HIRAGANA} @staticmethod def setDisableConvertMessageToHiragana(*args, **kwargs) -> dict: - config.CONVERT_MESSAGE_TO_HIRAGANA = False + if config.CONVERT_MESSAGE_TO_HIRAGANA is True: + config.CONVERT_MESSAGE_TO_HIRAGANA = False return {"status":200, "result":config.CONVERT_MESSAGE_TO_HIRAGANA} @staticmethod @@ -934,12 +942,14 @@ class Controller: @staticmethod def setEnableMainWindowSidebarCompactMode(*args, **kwargs) -> dict: - config.MAIN_WINDOW_SIDEBAR_COMPACT_MODE = True + if config.MAIN_WINDOW_SIDEBAR_COMPACT_MODE is False: + config.MAIN_WINDOW_SIDEBAR_COMPACT_MODE = True return {"status":200, "result":config.MAIN_WINDOW_SIDEBAR_COMPACT_MODE} @staticmethod def setDisableMainWindowSidebarCompactMode(*args, **kwargs) -> dict: - config.MAIN_WINDOW_SIDEBAR_COMPACT_MODE = False + if config.MAIN_WINDOW_SIDEBAR_COMPACT_MODE is True: + config.MAIN_WINDOW_SIDEBAR_COMPACT_MODE = False return {"status":200, "result":config.MAIN_WINDOW_SIDEBAR_COMPACT_MODE} @staticmethod @@ -993,12 +1003,14 @@ class Controller: @staticmethod def setEnableShowResendButton(*args, **kwargs) -> dict: - config.SHOW_RESEND_BUTTON = True + if not config.SHOW_RESEND_BUTTON: + config.SHOW_RESEND_BUTTON = True return {"status":200, "result":config.SHOW_RESEND_BUTTON} @staticmethod def setDisableShowResendButton(*args, **kwargs) -> dict: - config.SHOW_RESEND_BUTTON = False + if config.SHOW_RESEND_BUTTON is True: + config.SHOW_RESEND_BUTTON = False return {"status":200, "result":config.SHOW_RESEND_BUTTON} @staticmethod @@ -1033,19 +1045,21 @@ class Controller: return {"status":200, "result":config.AUTO_MIC_SELECT} def setEnableAutoMicSelect(self, *args, **kwargs) -> dict: - config.AUTO_MIC_SELECT = True - device_manager.setCallbackProcessBeforeUpdateDevices(self.stopAccessDevices) - device_manager.setCallbackDefaultMicDevice(self.updateSelectedMicDevice) - device_manager.setCallbackProcessAfterUpdateDevices(self.restartAccessDevices) - device_manager.forceUpdateAndSetMicDevices() + if config.AUTO_MIC_SELECT is False: + device_manager.setCallbackProcessBeforeUpdateDevices(self.stopAccessDevices) + device_manager.setCallbackDefaultMicDevice(self.updateSelectedMicDevice) + device_manager.setCallbackProcessAfterUpdateDevices(self.restartAccessDevices) + device_manager.forceUpdateAndSetMicDevices() + config.AUTO_MIC_SELECT = True return {"status":200, "result":config.AUTO_MIC_SELECT} @staticmethod def setDisableAutoMicSelect(*args, **kwargs) -> dict: - device_manager.clearCallbackProcessBeforeUpdateDevices() - device_manager.clearCallbackDefaultMicDevice() - device_manager.clearCallbackProcessAfterUpdateDevices() - config.AUTO_MIC_SELECT = False + if config.AUTO_MIC_SELECT is True: + device_manager.clearCallbackProcessBeforeUpdateDevices() + device_manager.clearCallbackDefaultMicDevice() + device_manager.clearCallbackProcessAfterUpdateDevices() + config.AUTO_MIC_SELECT = False return {"status":200, "result":config.AUTO_MIC_SELECT} @staticmethod @@ -1108,12 +1122,14 @@ class Controller: @staticmethod def setEnableMicAutomaticThreshold(*args, **kwargs) -> dict: - config.MIC_AUTOMATIC_THRESHOLD = True + if config.MIC_AUTOMATIC_THRESHOLD is False: + config.MIC_AUTOMATIC_THRESHOLD = True return {"status":200, "result":config.MIC_AUTOMATIC_THRESHOLD} @staticmethod def setDisableMicAutomaticThreshold(*args, **kwargs) -> dict: - config.MIC_AUTOMATIC_THRESHOLD = False + if config.MIC_AUTOMATIC_THRESHOLD is True: + config.MIC_AUTOMATIC_THRESHOLD = False return {"status":200, "result":config.MIC_AUTOMATIC_THRESHOLD} @staticmethod @@ -1226,20 +1242,21 @@ class Controller: return {"status":200, "result":config.AUTO_SPEAKER_SELECT} def setEnableAutoSpeakerSelect(self, *args, **kwargs) -> dict: - config.AUTO_SPEAKER_SELECT = True - device_manager.setCallbackProcessBeforeUpdateDevices(self.stopAccessDevices) - device_manager.setCallbackDefaultSpeakerDevice(self.updateSelectedSpeakerDevice) - device_manager.setCallbackProcessAfterUpdateDevices(self.restartAccessDevices) - device_manager.forceUpdateAndSetSpeakerDevices() - + if config.AUTO_SPEAKER_SELECT is False: + device_manager.setCallbackProcessBeforeUpdateDevices(self.stopAccessDevices) + device_manager.setCallbackDefaultSpeakerDevice(self.updateSelectedSpeakerDevice) + device_manager.setCallbackProcessAfterUpdateDevices(self.restartAccessDevices) + device_manager.forceUpdateAndSetSpeakerDevices() + config.AUTO_SPEAKER_SELECT = True return {"status":200, "result":config.AUTO_SPEAKER_SELECT} @staticmethod def setDisableAutoSpeakerSelect(*args, **kwargs) -> dict: - device_manager.clearCallbackProcessBeforeUpdateDevices() - device_manager.clearCallbackDefaultSpeakerDevice() - device_manager.clearCallbackProcessAfterUpdateDevices() - config.AUTO_SPEAKER_SELECT = False + if config.AUTO_SPEAKER_SELECT is True: + device_manager.clearCallbackProcessBeforeUpdateDevices() + device_manager.clearCallbackDefaultSpeakerDevice() + device_manager.clearCallbackProcessAfterUpdateDevices() + config.AUTO_SPEAKER_SELECT = False return {"status":200, "result":config.AUTO_SPEAKER_SELECT} @staticmethod @@ -1285,12 +1302,14 @@ class Controller: @staticmethod def setEnableSpeakerAutomaticThreshold(*args, **kwargs) -> dict: - config.SPEAKER_AUTOMATIC_THRESHOLD = True + if config.SPEAKER_AUTOMATIC_THRESHOLD is False: + config.SPEAKER_AUTOMATIC_THRESHOLD = True return {"status":200, "result":config.SPEAKER_AUTOMATIC_THRESHOLD} @staticmethod def setDisableSpeakerAutomaticThreshold(*args, **kwargs) -> dict: - config.SPEAKER_AUTOMATIC_THRESHOLD = False + if config.SPEAKER_AUTOMATIC_THRESHOLD is True: + config.SPEAKER_AUTOMATIC_THRESHOLD = False return {"status":200, "result":config.SPEAKER_AUTOMATIC_THRESHOLD} @staticmethod @@ -1459,12 +1478,14 @@ class Controller: @staticmethod def setEnableNotificationVrcSfx(*args, **kwargs) -> dict: - config.NOTIFICATION_VRC_SFX = True + if config.NOTIFICATION_VRC_SFX is False: + config.NOTIFICATION_VRC_SFX = True return {"status":200, "result":config.NOTIFICATION_VRC_SFX} @staticmethod def setDisableNotificationVrcSfx(*args, **kwargs) -> dict: - config.NOTIFICATION_VRC_SFX = False + if config.NOTIFICATION_VRC_SFX is True: + config.NOTIFICATION_VRC_SFX = False return {"status":200, "result":config.NOTIFICATION_VRC_SFX} @staticmethod @@ -1571,12 +1592,14 @@ class Controller: @staticmethod def setEnableAutoClearMessageBox(*args, **kwargs) -> dict: - config.AUTO_CLEAR_MESSAGE_BOX = True + if config.AUTO_CLEAR_MESSAGE_BOX is False: + config.AUTO_CLEAR_MESSAGE_BOX = True return {"status":200, "result":config.AUTO_CLEAR_MESSAGE_BOX} @staticmethod def setDisableAutoClearMessageBox(*args, **kwargs) -> dict: - config.AUTO_CLEAR_MESSAGE_BOX = False + if config.AUTO_CLEAR_MESSAGE_BOX is True: + config.AUTO_CLEAR_MESSAGE_BOX = False return {"status":200, "result":config.AUTO_CLEAR_MESSAGE_BOX} @staticmethod @@ -1585,12 +1608,14 @@ class Controller: @staticmethod def setEnableSendOnlyTranslatedMessages(*args, **kwargs) -> dict: - config.SEND_ONLY_TRANSLATED_MESSAGES = True + if config.SEND_ONLY_TRANSLATED_MESSAGES is False: + config.SEND_ONLY_TRANSLATED_MESSAGES = True return {"status":200, "result":config.SEND_ONLY_TRANSLATED_MESSAGES} @staticmethod def setDisableSendOnlyTranslatedMessages(*args, **kwargs) -> dict: - config.SEND_ONLY_TRANSLATED_MESSAGES = False + if config.SEND_ONLY_TRANSLATED_MESSAGES is True: + config.SEND_ONLY_TRANSLATED_MESSAGES = False return {"status":200, "result":config.SEND_ONLY_TRANSLATED_MESSAGES} @staticmethod @@ -1599,17 +1624,19 @@ class Controller: @staticmethod def setEnableOverlaySmallLog(*args, **kwargs) -> dict: - config.OVERLAY_SMALL_LOG = True - if config.OVERLAY_LARGE_LOG is False: - model.startOverlay() + if config.OVERLAY_SMALL_LOG is False: + if config.OVERLAY_LARGE_LOG is False: + model.startOverlay() + config.OVERLAY_SMALL_LOG = True return {"status":200, "result":config.OVERLAY_SMALL_LOG} @staticmethod def setDisableOverlaySmallLog(*args, **kwargs) -> dict: - config.OVERLAY_SMALL_LOG = False - model.clearOverlayImageSmallLog() - if config.OVERLAY_LARGE_LOG is False: - model.shutdownOverlay() + if config.OVERLAY_SMALL_LOG is True: + model.clearOverlayImageSmallLog() + if config.OVERLAY_LARGE_LOG is False: + model.shutdownOverlay() + config.OVERLAY_SMALL_LOG = False return {"status":200, "result":config.OVERLAY_SMALL_LOG} @staticmethod @@ -1628,17 +1655,19 @@ class Controller: @staticmethod def setEnableOverlayLargeLog(*args, **kwargs) -> dict: - config.OVERLAY_LARGE_LOG = True - if config.OVERLAY_SMALL_LOG is False: - model.startOverlay() + if config.OVERLAY_LARGE_LOG is False: + if config.OVERLAY_SMALL_LOG is False: + model.startOverlay() + config.OVERLAY_LARGE_LOG = True return {"status":200, "result":config.OVERLAY_LARGE_LOG} @staticmethod def setDisableOverlayLargeLog(*args, **kwargs) -> dict: - config.OVERLAY_LARGE_LOG = False - model.clearOverlayImageLargeLog() - if config.OVERLAY_SMALL_LOG is False: - model.shutdownOverlay() + if config.OVERLAY_LARGE_LOG is True: + model.clearOverlayImageLargeLog() + if config.OVERLAY_SMALL_LOG is False: + model.shutdownOverlay() + config.OVERLAY_LARGE_LOG = False return {"status":200, "result":config.OVERLAY_LARGE_LOG} @staticmethod @@ -1657,12 +1686,14 @@ class Controller: @staticmethod def setEnableOverlayShowOnlyTranslatedMessages(*args, **kwargs) -> dict: - config.OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES = True + if config.OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES is False: + config.OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES = True return {"status":200, "result":config.OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES} @staticmethod def setDisableOverlayShowOnlyTranslatedMessages(*args, **kwargs) -> dict: - config.OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES = False + if config.OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES is True: + config.OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES = False return {"status":200, "result":config.OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES} @staticmethod @@ -1671,12 +1702,14 @@ class Controller: @staticmethod def setEnableSendMessageToVrc(*args, **kwargs) -> dict: - config.SEND_MESSAGE_TO_VRC = True + if config.SEND_MESSAGE_TO_VRC is False: + config.SEND_MESSAGE_TO_VRC = True return {"status":200, "result":config.SEND_MESSAGE_TO_VRC} @staticmethod def setDisableSendMessageToVrc(*args, **kwargs) -> dict: - config.SEND_MESSAGE_TO_VRC = False + if config.SEND_MESSAGE_TO_VRC is True: + config.SEND_MESSAGE_TO_VRC = False return {"status":200, "result":config.SEND_MESSAGE_TO_VRC} @staticmethod @@ -1685,12 +1718,14 @@ class Controller: @staticmethod def setEnableSendReceivedMessageToVrc(*args, **kwargs) -> dict: - config.SEND_RECEIVED_MESSAGE_TO_VRC = True + if config.SEND_RECEIVED_MESSAGE_TO_VRC is False: + config.SEND_RECEIVED_MESSAGE_TO_VRC = True return {"status":200, "result":config.SEND_RECEIVED_MESSAGE_TO_VRC} @staticmethod def setDisableSendReceivedMessageToVrc(*args, **kwargs) -> dict: - config.SEND_RECEIVED_MESSAGE_TO_VRC = False + if config.SEND_RECEIVED_MESSAGE_TO_VRC is True: + config.SEND_RECEIVED_MESSAGE_TO_VRC = False return {"status":200, "result":config.SEND_RECEIVED_MESSAGE_TO_VRC} @staticmethod @@ -1717,45 +1752,53 @@ class Controller: @staticmethod def setEnableVrcMicMuteSync(*args, **kwargs) -> dict: - if model.getIsOscQueryEnabled() is True: - config.VRC_MIC_MUTE_SYNC = True - model.setMuteSelfStatus() - model.changeMicTranscriptStatus() - response = {"status":200, "result":config.VRC_MIC_MUTE_SYNC} + if config.VRC_MIC_MUTE_SYNC is False: + if model.getIsOscQueryEnabled() is True: + config.VRC_MIC_MUTE_SYNC = True + model.setMuteSelfStatus() + model.changeMicTranscriptStatus() + response = {"status":200, "result":config.VRC_MIC_MUTE_SYNC} + else: + response = { + "status":400, + "result":{ + "message":"Cannot enable VRC mic mute sync while OSC query is disabled", + "data": config.VRC_MIC_MUTE_SYNC + } + } else: - response = { - "status":400, - "result":{ - "message":"Cannot enable VRC mic mute sync while OSC query is disabled", - "data": config.VRC_MIC_MUTE_SYNC - } - } + response = {"status":200, "result":config.VRC_MIC_MUTE_SYNC} return response @staticmethod def setDisableVrcMicMuteSync(*args, **kwargs) -> dict: - config.VRC_MIC_MUTE_SYNC = False - model.changeMicTranscriptStatus() + if config.VRC_MIC_MUTE_SYNC is True: + config.VRC_MIC_MUTE_SYNC = False + model.changeMicTranscriptStatus() return {"status":200, "result":config.VRC_MIC_MUTE_SYNC} def setEnableCheckSpeakerThreshold(self, *args, **kwargs) -> dict: - self.startThreadingCheckSpeakerEnergy() - config.ENABLE_CHECK_ENERGY_RECEIVE = True + if config.ENABLE_CHECK_ENERGY_RECEIVE is False: + self.startThreadingCheckSpeakerEnergy() + config.ENABLE_CHECK_ENERGY_RECEIVE = True return {"status":200, "result":config.ENABLE_CHECK_ENERGY_RECEIVE} def setDisableCheckSpeakerThreshold(self, *args, **kwargs) -> dict: - self.stopThreadingCheckSpeakerEnergy() - config.ENABLE_CHECK_ENERGY_RECEIVE = False + if config.ENABLE_CHECK_ENERGY_RECEIVE is True: + self.stopThreadingCheckSpeakerEnergy() + config.ENABLE_CHECK_ENERGY_RECEIVE = False return {"status":200, "result":config.ENABLE_CHECK_ENERGY_RECEIVE} def setEnableCheckMicThreshold(self, *args, **kwargs) -> dict: - self.startThreadingCheckMicEnergy() - config.ENABLE_CHECK_ENERGY_SEND = True + if config.ENABLE_CHECK_ENERGY_SEND is False: + self.startThreadingCheckMicEnergy() + config.ENABLE_CHECK_ENERGY_SEND = True return {"status":200, "result":config.ENABLE_CHECK_ENERGY_SEND} def setDisableCheckMicThreshold(self, *args, **kwargs) -> dict: - self.stopThreadingCheckMicEnergy() - config.ENABLE_CHECK_ENERGY_SEND = False + if config.ENABLE_CHECK_ENERGY_SEND is True: + self.stopThreadingCheckMicEnergy() + config.ENABLE_CHECK_ENERGY_SEND = False return {"status":200, "result":config.ENABLE_CHECK_ENERGY_SEND} @staticmethod @@ -1769,23 +1812,27 @@ class Controller: return {"status":200, "result":True} def setEnableTranscriptionSend(self, *args, **kwargs) -> dict: - self.startThreadingTranscriptionSendMessage() - config.ENABLE_TRANSCRIPTION_SEND = True + if config.ENABLE_TRANSCRIPTION_SEND is False: + self.startThreadingTranscriptionSendMessage() + config.ENABLE_TRANSCRIPTION_SEND = True return {"status":200, "result":config.ENABLE_TRANSCRIPTION_SEND} def setDisableTranscriptionSend(self, *args, **kwargs) -> dict: - self.stopThreadingTranscriptionSendMessage() - config.ENABLE_TRANSCRIPTION_SEND = False + if config.ENABLE_TRANSCRIPTION_SEND is True: + self.stopThreadingTranscriptionSendMessage() + config.ENABLE_TRANSCRIPTION_SEND = False return {"status":200, "result":config.ENABLE_TRANSCRIPTION_SEND} def setEnableTranscriptionReceive(self, *args, **kwargs) -> dict: - self.startThreadingTranscriptionReceiveMessage() - config.ENABLE_TRANSCRIPTION_RECEIVE = True + if config.ENABLE_TRANSCRIPTION_RECEIVE is False: + self.startThreadingTranscriptionReceiveMessage() + config.ENABLE_TRANSCRIPTION_RECEIVE = True return {"status":200, "result":config.ENABLE_TRANSCRIPTION_RECEIVE} def setDisableTranscriptionReceive(self, *args, **kwargs) -> dict: - self.stopThreadingTranscriptionReceiveMessage() - config.ENABLE_TRANSCRIPTION_RECEIVE = False + if config.ENABLE_TRANSCRIPTION_RECEIVE is True: + self.stopThreadingTranscriptionReceiveMessage() + config.ENABLE_TRANSCRIPTION_RECEIVE = False return {"status":200, "result":config.ENABLE_TRANSCRIPTION_RECEIVE} def sendMessageBox(self, data, *args, **kwargs) -> dict: diff --git a/src-python/test_endpoints.py b/src-python/test_endpoints.py index b0664b50..499a4bd9 100644 --- a/src-python/test_endpoints.py +++ b/src-python/test_endpoints.py @@ -73,18 +73,18 @@ class TestMainloop(): "/set/disable/convert_message_to_romaji", "/set/enable/convert_message_to_hiragana", "/set/disable/convert_message_to_hiragana", - # "/set/enable/auto_mic_select", - # "/set/disable/auto_mic_select", + "/set/enable/auto_mic_select", + "/set/disable/auto_mic_select", "/set/enable/mic_automatic_threshold", "/set/disable/mic_automatic_threshold", - # "/set/enable/check_mic_threshold", - # "/set/disable/check_mic_threshold", - # "/set/enable/auto_speaker_select", - # "/set/disable/auto_speaker_select", + "/set/enable/check_mic_threshold", + "/set/disable/check_mic_threshold", + "/set/enable/auto_speaker_select", + "/set/disable/auto_speaker_select", "/set/enable/speaker_automatic_threshold", "/set/disable/speaker_automatic_threshold", - # "/set/enable/check_speaker_threshold", - # "/set/disable/check_speaker_threshold", + "/set/enable/check_speaker_threshold", + "/set/disable/check_speaker_threshold", "/set/enable/overlay_small_log", "/set/disable/overlay_small_log", "/set/enable/overlay_large_log", @@ -251,12 +251,26 @@ class TestMainloop(): print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") pprint.pprint(self.config_dict) break + time.sleep(0.2) + + # 最後にすべてOFFにして終了 + for endpoint in self.validity_endpoints: + if endpoint.startswith("/set/disable/"): + result, status = self.main.handleRequest(endpoint, None) + time.sleep(0.2) print("----ON/OFFでのランダムアクセスのテスト終了----") - def test_endpoints_continuous(self): + def test_endpoints_on_off_continuous(self): print("----連続テスト----") # endpoints = ["/set/enable/websocket_server", "/set/disable/websocket_server"] - endpoints = ["/set/enable/transcription_receive", "/set/disable/transcription_receive"] + endpoints = [ + "/set/enable/translation", + "/set/disable/translation", + "/set/enable/transcription_send", + "/set/disable/transcription_send", + "/set/enable/transcription_receive", + "/set/disable/transcription_receive", + ] for i in range(1000): endpoint = random.choice(endpoints) print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) @@ -280,8 +294,21 @@ class TestMainloop(): print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") pprint.pprint(self.config_dict) break + time.sleep(0.2) + + # 最後にすべてOFFにして終了 + for endpoint in self.validity_endpoints: + if endpoint.startswith("/set/disable/"): + result, status = self.main.handleRequest(endpoint, None) + time.sleep(0.2) print("----連続テスト終了----") if __name__ == "__main__": - test = TestMainloop() - test.test_endpoints_continuous() \ No newline at end of file + try: + test = TestMainloop() + test.test_endpoints_on_off_random() + # test.test_endpoints_continuous() + except KeyboardInterrupt: + print("Interrupted by user, shutting down...") + except Exception as e: + print(f"An error occurred: {e}") \ No newline at end of file From d9f1dabecb39bd62c5f7415f1ccb57b017b7b2b5 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Tue, 23 Sep 2025 20:51:09 +0900 Subject: [PATCH 37/92] =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=83=A1?= =?UTF-8?q?=E3=82=BD=E3=83=83=E3=83=89=E3=81=AE=E5=87=BA=E5=8A=9B=E3=83=A1?= =?UTF-8?q?=E3=83=83=E3=82=BB=E3=83=BC=E3=82=B8=E3=82=92=E6=94=B9=E5=96=84?= =?UTF-8?q?=E3=81=97=E3=80=81=E3=83=87=E3=83=BC=E3=82=BF=E8=A8=AD=E5=AE=9A?= =?UTF-8?q?=E7=B3=BB=E3=81=AE=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D=E3=82=A4?= =?UTF-8?q?=E3=83=B3=E3=83=88=E3=83=86=E3=82=B9=E3=83=88=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/test_endpoints.py | 174 ++++++++++++++++++++++++++++++++++- 1 file changed, 170 insertions(+), 4 deletions(-) diff --git a/src-python/test_endpoints.py b/src-python/test_endpoints.py index 499a4bd9..5fedaae5 100644 --- a/src-python/test_endpoints.py +++ b/src-python/test_endpoints.py @@ -32,7 +32,7 @@ class Color: BG_WHITE = '\033[47m'#(背景)白 BG_DEFAULT = '\033[49m'#背景色をデフォルトに戻す RESET = '\033[0m'#全てリセット - + class TestMainloop(): def __init__(self): self.main = main_instance @@ -261,7 +261,7 @@ class TestMainloop(): print("----ON/OFFでのランダムアクセスのテスト終了----") def test_endpoints_on_off_continuous(self): - print("----連続テスト----") + print("----ON/OFF連続テスト----") # endpoints = ["/set/enable/websocket_server", "/set/disable/websocket_server"] endpoints = [ "/set/enable/translation", @@ -301,13 +301,179 @@ class TestMainloop(): if endpoint.startswith("/set/disable/"): result, status = self.main.handleRequest(endpoint, None) time.sleep(0.2) - print("----連続テスト終了----") + print("----ON/OFF連続テスト終了----") + + def test_set_data_endpoints(self): + print("----データ設定系のエンドポイントのテスト----") + for endpoint in self.set_data_endpoints: + print(f"Testing endpoint: {endpoint}", end=" ", flush=True) + match endpoint: + case "/set/data/selected_tab_no": + data = random.choice(["1", "2", "3"]) + case "/set/data/selected_translation_engines": + translation_engines = self.config_dict.get("translation_engines", None) + data = {} + for i in ["1", "2", "3"]: + data[i] = random.choice(translation_engines) + case "/set/data/selected_your_languages": + selectable_language_list = self.config_dict.get("selectable_language_list", None) + data = {} + for i in ["1", "2", "3"]: + data[i] = {} + data[i]["1"] = random.choice(selectable_language_list) | {"enable": True} + case "/set/data/selected_target_languages": + selectable_language_list = self.config_dict.get("selectable_language_list", None) + data = {} + for i in ["1", "2", "3"]: + data[i] = {} + for j in ["1", "2", "3"]: + data[i][j] = random.choice(selectable_language_list) | {"enable": random.choice([True, False])} + case "/set/data/selected_transcription_engine": + transcription_engines = self.config_dict.get("transcription_engines", None) + data = random.choice(transcription_engines) + case "/set/data/transparency": + data = random.randint(0, 100) + case "/set/data/ui_scaling": + data = random.randint(50, 200) + case "/set/data/textbox_ui_scaling": + data = random.randint(50, 200) + case "/set/data/message_box_ratio": + data = round(random.uniform(0.1, 0.9), 2) + case "/set/data/send_message_button_type": + data = random.choice(["show", "hide", "show_and_disable_enter_key"]) + case "/set/data/font_family": + data = random.choice(["Arial", "Verdana", "Times New Roman"]) + case "/set/data/ui_language": + data = random.choice(["en", "ja", "ko", "zh-Hant", "zh-Hans"]) + case "/set/data/main_window_geometry": + data = { + "x_pos": random.randint(0, 1920), + "y_pos": random.randint(0, 1080), + "width": random.randint(800, 1920), + "height": random.randint(600, 1080) + } + case "/set/data/selected_translation_compute_device": + data = random.choice(self.config_dict["translation_compute_device_list"]) + case "/set/data/selected_transcription_compute_device": + data = random.choice(self.config_dict["transcription_compute_device_list"]) + case "/set/data/ctranslate2_weight_type": + data = random.choice(list(self.config_dict["selectable_ctranslate2_weight_type_dict"].keys())) + case "/set/data/deepl_auth_key": + data = None # Set to None to avoid using a real key + case "/set/data/selected_mic_host": + data = random.choice(self.config_dict["mic_host_list"]) + case "/set/data/selected_mic_device": + data = random.choice(self.config_dict["mic_device_list"]) + case "/set/data/mic_threshold": + data = random.randint(0, 100) + case "/set/data/mic_record_timeout": + data = random.randint(1, 3) + case "/set/data/mic_phrase_timeout": + data = random.randint(5, 10) + case "/set/data/mic_max_phrases": + data = random.randint(1, 10) + case "/set/data/hotkeys": + data = { + 'toggle_vrct_visibility': None, + 'toggle_translation': None, + 'toggle_transcription_send': None, + 'toggle_transcription_receive': None + } + case "/set/data/plugins_status": + data = {plugin: random.choice([True, False]) for plugin in self.config_dict.get("plugins", [])} + case "/set/data/mic_avg_logprob": + data = random.uniform(-5, 0) + case "/set/data/mic_no_speech_prob": + data = random.uniform(0, 1) + case "/set/data/mic_word_filter": + data = random.choice( + [ + ["test_0_0", "test_0_1", "test_0_2", None], + ["test_1_0", "test_1_1", None], + ["test_2_0", None], + [None] + ] + ) + case "/set/data/selected_speaker_device": + data = random.choice(self.config_dict["speaker_device_list"]) + case "/set/data/speaker_threshold": + data = random.randint(0, 100) + case "/set/data/speaker_record_timeout": + data = random.randint(1, 3) + case "/set/data/speaker_phrase_timeout": + data = random.randint(5, 10) + case "/set/data/speaker_max_phrases": + data = random.randint(1, 10) + case "/set/data/speaker_avg_logprob": + data = random.uniform(-5, 0) + case "/set/data/speaker_no_speech_prob": + data = random.uniform(0, 1) + case "/set/data/whisper_weight_type": + data = random.choice([key for key, value in self.config_dict["selectable_whisper_weight_type_dict"].items() if value is True]) + case "/set/data/overlay_small_log_settings": + data = { + "x_pos": random.random(), + "y_pos": random.random(), + "z_pos": random.random(), + "x_rotation": random.random(), + "y_rotation": random.random(), + "z_rotation": random.random(), + "display_duration": random.randint(0, 100), + "fadeout_duration": random.randint(0, 100), + "opacity": random.random(), + "ui_scaling": random.random(), + "tracker": random.choice(["HMD", "LeftHand", "RightHand"]), + } + case "/set/data/overlay_large_log_settings": + data = { + "x_pos": random.random(), + "y_pos": random.random(), + "z_pos": random.random(), + "x_rotation": random.random(), + "y_rotation": random.random(), + "z_rotation": random.random(), + "display_duration": random.randint(0, 100), + "fadeout_duration": random.randint(0, 100), + "opacity": random.random(), + "ui_scaling": random.random(), + "tracker": random.choice(["HMD", "LeftHand", "RightHand"]), + } + case "/set/data/send_message_format_parts": + data = self.config_dict["send_message_format_parts"] + case "/set/data/received_message_format_parts": + data = self.config_dict["received_message_format_parts"] + case "/set/data/websocket_host": + data = "127.0.0.1" + case "/set/data/websocket_port": + data = random.randint(1024, 65535) + case "/set/data/osc_ip_address": + data = "127.0.0.1" + case "/set/data/osc_port": + data = random.randint(1024, 65535) + case _: + data = None + + if data is not None: + print(f"data: {data}", end=" ", flush=True) + result, status = self.main.handleRequest(endpoint, data) + if status == 200: + self.config_dict[endpoint.split("/")[-1]] = result + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") + print(f" Current config_dict: {self.config_dict}") + break + else: + print(f"\t -> {Color.YELLOW}[SKIP]{Color.RESET} No data to set for this endpoint.") + time.sleep(0.2) + print("----データ設定系のエンドポイントのテスト終了----") if __name__ == "__main__": try: test = TestMainloop() - test.test_endpoints_on_off_random() + # test.test_endpoints_on_off_random() # test.test_endpoints_continuous() + test.test_set_data_endpoints() except KeyboardInterrupt: print("Interrupted by user, shutting down...") except Exception as e: From 95cf247e2ef8e302896c839856894a769d55ec13 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Tue, 23 Sep 2025 23:19:35 +0900 Subject: [PATCH 38/92] =?UTF-8?q?=E5=87=A6=E7=90=86=E3=81=AE=E5=AE=89?= =?UTF-8?q?=E5=AE=9A=E5=8C=96=E3=81=AE=E3=81=9F=E3=82=81=E3=81=AB=E5=BE=85?= =?UTF-8?q?=E6=A9=9F=E6=99=82=E9=96=93=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97?= =?UTF-8?q?=E3=80=81=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D=E3=82=A4=E3=83=B3?= =?UTF-8?q?=E3=83=88=E3=83=86=E3=82=B9=E3=83=88=E3=82=92=E5=BC=B7=E5=8C=96?= =?UTF-8?q?=E3=80=82=E3=83=86=E3=82=B9=E3=83=88=E3=82=B1=E3=83=BC=E3=82=B9?= =?UTF-8?q?=E3=82=92=E6=95=B4=E7=90=86=E3=81=97=E3=80=81=E5=AE=9F=E8=A1=8C?= =?UTF-8?q?=E7=B3=BB=E3=81=AE=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D=E3=82=A4?= =?UTF-8?q?=E3=83=B3=E3=83=88=E3=83=86=E3=82=B9=E3=83=88=E3=82=92=E6=96=B0?= =?UTF-8?q?=E3=81=9F=E3=81=AB=E8=BF=BD=E5=8A=A0=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/mainloop.py | 1 + src-python/test_endpoints.py | 275 +++++++++++++++++++++-------------- 2 files changed, 169 insertions(+), 107 deletions(-) diff --git a/src-python/mainloop.py b/src-python/mainloop.py index 32b44dbd..6e5f6548 100644 --- a/src-python/mainloop.py +++ b/src-python/mainloop.py @@ -386,6 +386,7 @@ class Main: response = handler["variable"](data) status = response.get("status", None) result = response.get("result", None) + time.sleep(0.2) # 処理の安定化のために少し待機 except Exception as e: errorLogging() result = str(e) diff --git a/src-python/test_endpoints.py b/src-python/test_endpoints.py index 5fedaae5..0c7c3704 100644 --- a/src-python/test_endpoints.py +++ b/src-python/test_endpoints.py @@ -163,43 +163,55 @@ class TestMainloop(): self.run_endpoints = { "/run/send_message_box": [ { - "data": {"id":"123456", "message":"test"}, + "data": {"id":"000001", "message":"test"}, "status": 200, - "result": { - 'id': '123456', - 'original': { - 'message': 'test', - 'transliteration': [] - }, - 'translations': [] - } - } + }, + { + # 英語 + "data": {"id":"000002", "message":"Hello World!"}, + "status": 200, + }, + { + # 日本語 + "data": {"id":"000003", "message":"こんにちわ 世界!"}, + "status": 200, + }, + { + # 韓国語 + "data": {"id":"000004", "message":"안녕하세요 세계!"}, + "status": 200, + }, + { + # 中国語 繁体字 + "data": {"id":"000005", "message":"你好,世界!"}, + "status": 200, + }, ], "/run/typing_message_box": [{"data": None, "status": 200, "result": True}], "/run/stop_typing_message_box": [{"data": None, "status": 200, "result": True}], "/run/send_text_overlay": [{"data": "test_overlay", "status": 200, "result": "test_overlay"}], - "/run/swap_your_language_and_target_language": [{"data": None, "status": 200, "result": True}], + "/run/swap_your_language_and_target_language": [{"data": None, "status": 200}], # !!!Cant be tested here!!! # "/run/update_software": [{"data": None, "status": 200, "result": True}], # "/run/update_cuda_software": [{"data": None, "status": 200, "result": True}], - "/run/download_ctranslate2_weight": [ - {"data": "small", "status": 200, "result": True}, - {"data": "large", "status": 400, "result": False}, - ], - "/run/download_whisper_weight": [ - {"data": "tiny", "status": 200, "result": True}, - {"data": "base", "status": 200, "result": True}, - {"data": "small", "status": 200, "result": True}, - {"data": "medium", "status": 200, "result": True}, - {"data": "large-v1", "status": 200, "result": True}, - {"data": "large-v2", "status": 400, "result": False}, - {"data": "large-v3", "status": 400, "result": False}, - {"data": "large-v3-turbo-int8", "status": 400, "result": False}, - {"data": "large-v3-turbo", "status": 400, "result": False} - ], - "/run/open_filepath_logs": {"data": None, "status": 200, "result": True}, - "/run/open_filepath_config_file": {"data": None, "status": 200, "result": True}, - "/run/feed_watchdog": {"data": None, "status": 200, "result": True}, + # "/run/download_ctranslate2_weight": [ + # {"data": "small", "status": 200, "result": True}, + # {"data": "large", "status": 400, "result": False}, + # ], + # "/run/download_whisper_weight": [ + # {"data": "tiny", "status": 200, "result": True}, + # {"data": "base", "status": 200, "result": True}, + # {"data": "small", "status": 200, "result": True}, + # {"data": "medium", "status": 200, "result": True}, + # {"data": "large-v1", "status": 200, "result": True}, + # {"data": "large-v2", "status": 400, "result": True}, + # {"data": "large-v3", "status": 400, "result": True}, + # {"data": "large-v3-turbo-int8", "status": 400, "result": True}, + # {"data": "large-v3-turbo", "status": 400, "result": True} + # ], + # "/run/open_filepath_logs": {"data": None, "status": 200, "result": True}, + # "/run/open_filepath_config_file": {"data": None, "status": 200, "result": True}, + # "/run/feed_watchdog": {"data": None, "status": 200, "result": True}, } def test_endpoints_on_off_single(self): @@ -226,84 +238,81 @@ class TestMainloop(): break print("----ON/OFF系のエンドポイントのテスト終了----") - def test_endpoints_on_off_random(self): - print("----ON/OFFでのランダムアクセスのテスト----") - for i in range(1000): - endpoint = random.choice(self.validity_endpoints) - print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) - if endpoint.startswith("/set/enable/"): - result, status = self.main.handleRequest(endpoint, None) - expected_result = True - if result == expected_result and status == 200: - self.config_dict[endpoint.split("/")[-1]] = result - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") - pprint.pprint(self.config_dict) - break - elif endpoint.startswith("/set/disable/"): - result, status = self.main.handleRequest(endpoint, None) - expected_result = False - if result == expected_result and status == 200: - self.config_dict[endpoint.split("/")[-1]] = result - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") - pprint.pprint(self.config_dict) - break - time.sleep(0.2) + # def test_endpoints_on_off_random(self): + # print("----ON/OFFでのランダムアクセスのテスト----") + # for i in range(1000): + # endpoint = random.choice(self.validity_endpoints) + # print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) + # if endpoint.startswith("/set/enable/"): + # result, status = self.main.handleRequest(endpoint, None) + # expected_result = True + # if result == expected_result and status == 200: + # self.config_dict[endpoint.split("/")[-1]] = result + # print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + # else: + # print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") + # pprint.pprint(self.config_dict) + # break + # elif endpoint.startswith("/set/disable/"): + # result, status = self.main.handleRequest(endpoint, None) + # expected_result = False + # if result == expected_result and status == 200: + # self.config_dict[endpoint.split("/")[-1]] = result + # print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + # else: + # print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") + # pprint.pprint(self.config_dict) + # break - # 最後にすべてOFFにして終了 - for endpoint in self.validity_endpoints: - if endpoint.startswith("/set/disable/"): - result, status = self.main.handleRequest(endpoint, None) - time.sleep(0.2) - print("----ON/OFFでのランダムアクセスのテスト終了----") + # # 最後にすべてOFFにして終了 + # for endpoint in self.validity_endpoints: + # if endpoint.startswith("/set/disable/"): + # result, status = self.main.handleRequest(endpoint, None) + # time.sleep(0.2) + # print("----ON/OFFでのランダムアクセスのテスト終了----") - def test_endpoints_on_off_continuous(self): - print("----ON/OFF連続テスト----") - # endpoints = ["/set/enable/websocket_server", "/set/disable/websocket_server"] - endpoints = [ - "/set/enable/translation", - "/set/disable/translation", - "/set/enable/transcription_send", - "/set/disable/transcription_send", - "/set/enable/transcription_receive", - "/set/disable/transcription_receive", - ] - for i in range(1000): - endpoint = random.choice(endpoints) - print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) - if endpoint.startswith("/set/enable/"): - result, status = self.main.handleRequest(endpoint, None) - expected_result = True - if result == expected_result and status == 200: - self.config_dict[endpoint.split("/")[-1]] = result - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") - pprint.pprint(self.config_dict) - break - elif endpoint.startswith("/set/disable/"): - result, status = self.main.handleRequest(endpoint, None) - expected_result = False - if result == expected_result and status == 200: - self.config_dict[endpoint.split("/")[-1]] = result - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") - pprint.pprint(self.config_dict) - break - time.sleep(0.2) + # def test_endpoints_on_off_continuous(self): + # print("----ON/OFF連続テスト----") + # # endpoints = ["/set/enable/websocket_server", "/set/disable/websocket_server"] + # endpoints = [ + # "/set/enable/translation", + # "/set/disable/translation", + # "/set/enable/transcription_send", + # "/set/disable/transcription_send", + # "/set/enable/transcription_receive", + # "/set/disable/transcription_receive", + # ] + # for i in range(1000): + # endpoint = random.choice(endpoints) + # print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) + # if endpoint.startswith("/set/enable/"): + # result, status = self.main.handleRequest(endpoint, None) + # expected_result = True + # if result == expected_result and status == 200: + # self.config_dict[endpoint.split("/")[-1]] = result + # print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + # else: + # print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") + # pprint.pprint(self.config_dict) + # break + # elif endpoint.startswith("/set/disable/"): + # result, status = self.main.handleRequest(endpoint, None) + # expected_result = False + # if result == expected_result and status == 200: + # self.config_dict[endpoint.split("/")[-1]] = result + # print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + # else: + # print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") + # pprint.pprint(self.config_dict) + # break - # 最後にすべてOFFにして終了 - for endpoint in self.validity_endpoints: - if endpoint.startswith("/set/disable/"): - result, status = self.main.handleRequest(endpoint, None) - time.sleep(0.2) - print("----ON/OFF連続テスト終了----") + # # 最後にすべてOFFにして終了 + # for endpoint in self.validity_endpoints: + # if endpoint.startswith("/set/disable/"): + # result, status = self.main.handleRequest(endpoint, None) + # print("----ON/OFF連続テスト終了----") - def test_set_data_endpoints(self): + def test_set_data_endpoints_single(self): print("----データ設定系のエンドポイントのテスト----") for endpoint in self.set_data_endpoints: print(f"Testing endpoint: {endpoint}", end=" ", flush=True) @@ -465,15 +474,67 @@ class TestMainloop(): break else: print(f"\t -> {Color.YELLOW}[SKIP]{Color.RESET} No data to set for this endpoint.") - time.sleep(0.2) print("----データ設定系のエンドポイントのテスト終了----") + def test_run_endpoints_single(self): + print("----実行系のエンドポイントのテスト----") + for endpoint, tests in self.run_endpoints.items(): + print(f"Testing endpoint: {endpoint}", end=" ", flush=True) + match endpoint: + case "/run/send_message_box": + for test in tests: + data = test["data"] + expected_status = test["status"] + result, status = self.main.handleRequest(endpoint, data) + if status == expected_status: + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") + print(f" Current config_dict: {self.config_dict}") + break + case "/run/typing_message_box" | "/run/stop_typing_message_box": + for test in tests: + data = test["data"] + expected_status = test["status"] + expected_result = test["result"] + result, status = self.main.handleRequest(endpoint, data) + if status == expected_status and result == expected_result: + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") + print(f" Current config_dict: {self.config_dict}") + break + case "/run/send_text_overlay": + for test in tests: + data = test["data"] + expected_status = test["status"] + expected_result = test["result"] + result, status = self.main.handleRequest(endpoint, data) + if status == expected_status and result == expected_result: + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") + print(f" Current config_dict: {self.config_dict}") + break + case "/run/swap_your_language_and_target_language": + for test in tests: + data = test["data"] + expected_status = test["status"] + result, status = self.main.handleRequest(endpoint, data) + if status == expected_status: + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") + print(f" Current config_dict: {self.config_dict}") + break + print("----実行系のエンドポイントのテスト終了----") + if __name__ == "__main__": try: test = TestMainloop() - # test.test_endpoints_on_off_random() - # test.test_endpoints_continuous() - test.test_set_data_endpoints() + test.test_endpoints_on_off_single() + test.test_set_data_endpoints_single() + test.test_run_endpoints_single() except KeyboardInterrupt: print("Interrupted by user, shutting down...") except Exception as e: From c38e474385c6f18bd80c20669d16a686a707cd27 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 24 Sep 2025 01:06:11 +0900 Subject: [PATCH 39/92] =?UTF-8?q?=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D?= =?UTF-8?q?=E3=82=A4=E3=83=B3=E3=83=88=E3=83=86=E3=82=B9=E3=83=88=E3=82=92?= =?UTF-8?q?=E5=BC=B7=E5=8C=96=E3=81=97=E3=80=81ON/OFF=E7=8A=B6=E6=85=8B?= =?UTF-8?q?=E3=81=AE=E3=83=86=E3=82=B9=E3=83=88=E3=83=A1=E3=82=BD=E3=83=83?= =?UTF-8?q?=E3=83=89=E3=82=92=E3=83=AA=E3=83=95=E3=82=A1=E3=82=AF=E3=82=BF?= =?UTF-8?q?=E3=83=AA=E3=83=B3=E3=82=B0=E3=80=82=E3=83=87=E3=83=BC=E3=82=BF?= =?UTF-8?q?=E8=A8=AD=E5=AE=9A=E7=B3=BB=E3=81=8A=E3=82=88=E3=81=B3=E5=AE=9F?= =?UTF-8?q?=E8=A1=8C=E7=B3=BB=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D=E3=82=A4?= =?UTF-8?q?=E3=83=B3=E3=83=88=E3=81=AE=E3=83=86=E3=82=B9=E3=83=88=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81=E3=83=A9=E3=83=B3=E3=83=80?= =?UTF-8?q?=E3=83=A0=E3=82=A2=E3=82=AF=E3=82=BB=E3=82=B9=E3=81=AE=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=82=92=E5=AE=9F=E8=A3=85=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/test_endpoints.py | 612 ++++++++++++++++++----------------- 1 file changed, 318 insertions(+), 294 deletions(-) diff --git a/src-python/test_endpoints.py b/src-python/test_endpoints.py index 0c7c3704..e2f1b18f 100644 --- a/src-python/test_endpoints.py +++ b/src-python/test_endpoints.py @@ -214,327 +214,351 @@ class TestMainloop(): # "/run/feed_watchdog": {"data": None, "status": 200, "result": True}, } - def test_endpoints_on_off_single(self): + def test_endpoints_on_off_single(self, endpoint): + success = False + if endpoint.startswith("/set/enable/"): + result, status = self.main.handleRequest(endpoint, None) + if result is True and status == 200: + self.config_dict[endpoint.split("/")[-1]] = result + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + success = True + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") + print(f"Current config_dict: {self.config_dict}") + elif endpoint.startswith("/set/disable/"): + result, status = self.main.handleRequest(endpoint, None) + if result is False and status == 200: + self.config_dict[endpoint.split("/")[-1]] = result + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + success = True + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") + print(f"Current config_dict: {self.config_dict}") + return success + + def test_endpoints_on_off_all(self): print("----ON/OFF系のエンドポイントのテスト----") for endpoint in self.validity_endpoints: print(f"Testing endpoint: {endpoint}", end="", flush=True) - if endpoint.startswith("/set/enable/"): - result, status = self.main.handleRequest(endpoint, None) - if result is True and status == 200: - self.config_dict[endpoint.split("/")[-1]] = result - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") - print(f"Current config_dict: {self.config_dict}") - break - elif endpoint.startswith("/set/disable/"): - result, status = self.main.handleRequest(endpoint, None) - if result is False and status == 200: - self.config_dict[endpoint.split("/")[-1]] = result - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") - print(f"Current config_dict: {self.config_dict}") - break + if self.test_endpoints_on_off_single(endpoint) is False: + break print("----ON/OFF系のエンドポイントのテスト終了----") - # def test_endpoints_on_off_random(self): - # print("----ON/OFFでのランダムアクセスのテスト----") - # for i in range(1000): - # endpoint = random.choice(self.validity_endpoints) - # print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) - # if endpoint.startswith("/set/enable/"): - # result, status = self.main.handleRequest(endpoint, None) - # expected_result = True - # if result == expected_result and status == 200: - # self.config_dict[endpoint.split("/")[-1]] = result - # print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - # else: - # print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") - # pprint.pprint(self.config_dict) - # break - # elif endpoint.startswith("/set/disable/"): - # result, status = self.main.handleRequest(endpoint, None) - # expected_result = False - # if result == expected_result and status == 200: - # self.config_dict[endpoint.split("/")[-1]] = result - # print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - # else: - # print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") - # pprint.pprint(self.config_dict) - # break + def test_endpoints_on_off_random(self): + print("----ON/OFFでのランダムアクセスのテスト----") + for i in range(1000): + endpoint = random.choice(self.validity_endpoints) + print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) + if self.test_endpoints_on_off_single(endpoint) is False: + break - # # 最後にすべてOFFにして終了 - # for endpoint in self.validity_endpoints: - # if endpoint.startswith("/set/disable/"): - # result, status = self.main.handleRequest(endpoint, None) - # time.sleep(0.2) - # print("----ON/OFFでのランダムアクセスのテスト終了----") + # 最後にすべてOFFにして終了 + for endpoint in self.validity_endpoints: + if endpoint.startswith("/set/disable/"): + result, status = self.main.handleRequest(endpoint, None) + time.sleep(0.2) + print("----ON/OFFでのランダムアクセスのテスト終了----") - # def test_endpoints_on_off_continuous(self): - # print("----ON/OFF連続テスト----") - # # endpoints = ["/set/enable/websocket_server", "/set/disable/websocket_server"] - # endpoints = [ - # "/set/enable/translation", - # "/set/disable/translation", - # "/set/enable/transcription_send", - # "/set/disable/transcription_send", - # "/set/enable/transcription_receive", - # "/set/disable/transcription_receive", - # ] - # for i in range(1000): - # endpoint = random.choice(endpoints) - # print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) - # if endpoint.startswith("/set/enable/"): - # result, status = self.main.handleRequest(endpoint, None) - # expected_result = True - # if result == expected_result and status == 200: - # self.config_dict[endpoint.split("/")[-1]] = result - # print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - # else: - # print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") - # pprint.pprint(self.config_dict) - # break - # elif endpoint.startswith("/set/disable/"): - # result, status = self.main.handleRequest(endpoint, None) - # expected_result = False - # if result == expected_result and status == 200: - # self.config_dict[endpoint.split("/")[-1]] = result - # print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - # else: - # print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") - # pprint.pprint(self.config_dict) - # break + def test_endpoints_on_off_continuous(self): + print("----ON/OFF連続テスト----") + endpoints = ["/set/enable/websocket_server", "/set/disable/websocket_server"] + # endpoints = [ + # "/set/enable/translation", + # "/set/disable/translation", + # "/set/enable/transcription_send", + # "/set/disable/transcription_send", + # "/set/enable/transcription_receive", + # "/set/disable/transcription_receive", + # ] + for i in range(1000): + endpoint = random.choice(endpoints) + print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) + if self.test_endpoints_on_off_single(endpoint) is False: + break - # # 最後にすべてOFFにして終了 - # for endpoint in self.validity_endpoints: - # if endpoint.startswith("/set/disable/"): - # result, status = self.main.handleRequest(endpoint, None) - # print("----ON/OFF連続テスト終了----") + # 最後にすべてOFFにして終了 + for endpoint in self.validity_endpoints: + if endpoint.startswith("/set/disable/"): + result, status = self.main.handleRequest(endpoint, None) + print("----ON/OFF連続テスト終了----") - def test_set_data_endpoints_single(self): + def test_set_data_endpoints_single(self, endpoint): + success = False + match endpoint: + case "/set/data/selected_tab_no": + data = random.choice(["1", "2", "3"]) + case "/set/data/selected_translation_engines": + translation_engines = self.config_dict.get("translation_engines", None) + data = {} + for i in ["1", "2", "3"]: + data[i] = random.choice(translation_engines) + case "/set/data/selected_your_languages": + selectable_language_list = self.config_dict.get("selectable_language_list", None) + data = {} + for i in ["1", "2", "3"]: + data[i] = {} + data[i]["1"] = random.choice(selectable_language_list) | {"enable": True} + case "/set/data/selected_target_languages": + selectable_language_list = self.config_dict.get("selectable_language_list", None) + data = {} + for i in ["1", "2", "3"]: + data[i] = {} + for j in ["1", "2", "3"]: + data[i][j] = random.choice(selectable_language_list) | {"enable": random.choice([True, False])} + case "/set/data/selected_transcription_engine": + transcription_engines = self.config_dict.get("transcription_engines", None) + data = random.choice(transcription_engines) + case "/set/data/transparency": + data = random.randint(0, 100) + case "/set/data/ui_scaling": + data = random.randint(50, 200) + case "/set/data/textbox_ui_scaling": + data = random.randint(50, 200) + case "/set/data/message_box_ratio": + data = round(random.uniform(0.1, 0.9), 2) + case "/set/data/send_message_button_type": + data = random.choice(["show", "hide", "show_and_disable_enter_key"]) + case "/set/data/font_family": + data = random.choice(["Arial", "Verdana", "Times New Roman"]) + case "/set/data/ui_language": + data = random.choice(["en", "ja", "ko", "zh-Hant", "zh-Hans"]) + case "/set/data/main_window_geometry": + data = { + "x_pos": random.randint(0, 1920), + "y_pos": random.randint(0, 1080), + "width": random.randint(800, 1920), + "height": random.randint(600, 1080) + } + case "/set/data/selected_translation_compute_device": + data = random.choice(self.config_dict["translation_compute_device_list"]) + case "/set/data/selected_transcription_compute_device": + data = random.choice(self.config_dict["transcription_compute_device_list"]) + case "/set/data/ctranslate2_weight_type": + data = random.choice(list(self.config_dict["selectable_ctranslate2_weight_type_dict"].keys())) + case "/set/data/deepl_auth_key": + data = None # Set to None to avoid using a real key + case "/set/data/selected_mic_host": + data = random.choice(self.config_dict["mic_host_list"]) + case "/set/data/selected_mic_device": + data = random.choice(self.config_dict["mic_device_list"]) + case "/set/data/mic_threshold": + data = random.randint(0, 100) + case "/set/data/mic_record_timeout": + data = random.randint(1, 3) + case "/set/data/mic_phrase_timeout": + data = random.randint(5, 10) + case "/set/data/mic_max_phrases": + data = random.randint(1, 10) + case "/set/data/hotkeys": + data = { + 'toggle_vrct_visibility': None, + 'toggle_translation': None, + 'toggle_transcription_send': None, + 'toggle_transcription_receive': None + } + case "/set/data/plugins_status": + data = {plugin: random.choice([True, False]) for plugin in self.config_dict.get("plugins", [])} + case "/set/data/mic_avg_logprob": + data = random.uniform(-5, 0) + case "/set/data/mic_no_speech_prob": + data = random.uniform(0, 1) + case "/set/data/mic_word_filter": + data = random.choice( + [ + ["test_0_0", "test_0_1", "test_0_2", None], + ["test_1_0", "test_1_1", None], + ["test_2_0", None], + [None] + ] + ) + case "/set/data/selected_speaker_device": + data = random.choice(self.config_dict["speaker_device_list"]) + case "/set/data/speaker_threshold": + data = random.randint(0, 100) + case "/set/data/speaker_record_timeout": + data = random.randint(1, 3) + case "/set/data/speaker_phrase_timeout": + data = random.randint(5, 10) + case "/set/data/speaker_max_phrases": + data = random.randint(1, 10) + case "/set/data/speaker_avg_logprob": + data = random.uniform(-5, 0) + case "/set/data/speaker_no_speech_prob": + data = random.uniform(0, 1) + case "/set/data/whisper_weight_type": + data = random.choice([key for key, value in self.config_dict["selectable_whisper_weight_type_dict"].items() if value is True]) + case "/set/data/overlay_small_log_settings": + data = { + "x_pos": random.random(), + "y_pos": random.random(), + "z_pos": random.random(), + "x_rotation": random.random(), + "y_rotation": random.random(), + "z_rotation": random.random(), + "display_duration": random.randint(0, 100), + "fadeout_duration": random.randint(0, 100), + "opacity": random.random(), + "ui_scaling": random.random(), + "tracker": random.choice(["HMD", "LeftHand", "RightHand"]), + } + case "/set/data/overlay_large_log_settings": + data = { + "x_pos": random.random(), + "y_pos": random.random(), + "z_pos": random.random(), + "x_rotation": random.random(), + "y_rotation": random.random(), + "z_rotation": random.random(), + "display_duration": random.randint(0, 100), + "fadeout_duration": random.randint(0, 100), + "opacity": random.random(), + "ui_scaling": random.random(), + "tracker": random.choice(["HMD", "LeftHand", "RightHand"]), + } + case "/set/data/send_message_format_parts": + data = self.config_dict["send_message_format_parts"] + case "/set/data/received_message_format_parts": + data = self.config_dict["received_message_format_parts"] + case "/set/data/websocket_host": + data = "127.0.0.1" + case "/set/data/websocket_port": + data = random.randint(1024, 65535) + case "/set/data/osc_ip_address": + data = "127.0.0.1" + case "/set/data/osc_port": + data = random.randint(1024, 65535) + case _: + data = None + + if data is not None: + print(f"data: {data}", end=" ", flush=True) + result, status = self.main.handleRequest(endpoint, data) + if status == 200: + self.config_dict[endpoint.split("/")[-1]] = result + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + success = True + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") + print(f" Current config_dict: {self.config_dict}") + else: + print(f"\t -> {Color.YELLOW}[SKIP]{Color.RESET} No data to set for this endpoint.") + success = True + return success + + def test_set_data_endpoints_all(self): print("----データ設定系のエンドポイントのテスト----") for endpoint in self.set_data_endpoints: print(f"Testing endpoint: {endpoint}", end=" ", flush=True) - match endpoint: - case "/set/data/selected_tab_no": - data = random.choice(["1", "2", "3"]) - case "/set/data/selected_translation_engines": - translation_engines = self.config_dict.get("translation_engines", None) - data = {} - for i in ["1", "2", "3"]: - data[i] = random.choice(translation_engines) - case "/set/data/selected_your_languages": - selectable_language_list = self.config_dict.get("selectable_language_list", None) - data = {} - for i in ["1", "2", "3"]: - data[i] = {} - data[i]["1"] = random.choice(selectable_language_list) | {"enable": True} - case "/set/data/selected_target_languages": - selectable_language_list = self.config_dict.get("selectable_language_list", None) - data = {} - for i in ["1", "2", "3"]: - data[i] = {} - for j in ["1", "2", "3"]: - data[i][j] = random.choice(selectable_language_list) | {"enable": random.choice([True, False])} - case "/set/data/selected_transcription_engine": - transcription_engines = self.config_dict.get("transcription_engines", None) - data = random.choice(transcription_engines) - case "/set/data/transparency": - data = random.randint(0, 100) - case "/set/data/ui_scaling": - data = random.randint(50, 200) - case "/set/data/textbox_ui_scaling": - data = random.randint(50, 200) - case "/set/data/message_box_ratio": - data = round(random.uniform(0.1, 0.9), 2) - case "/set/data/send_message_button_type": - data = random.choice(["show", "hide", "show_and_disable_enter_key"]) - case "/set/data/font_family": - data = random.choice(["Arial", "Verdana", "Times New Roman"]) - case "/set/data/ui_language": - data = random.choice(["en", "ja", "ko", "zh-Hant", "zh-Hans"]) - case "/set/data/main_window_geometry": - data = { - "x_pos": random.randint(0, 1920), - "y_pos": random.randint(0, 1080), - "width": random.randint(800, 1920), - "height": random.randint(600, 1080) - } - case "/set/data/selected_translation_compute_device": - data = random.choice(self.config_dict["translation_compute_device_list"]) - case "/set/data/selected_transcription_compute_device": - data = random.choice(self.config_dict["transcription_compute_device_list"]) - case "/set/data/ctranslate2_weight_type": - data = random.choice(list(self.config_dict["selectable_ctranslate2_weight_type_dict"].keys())) - case "/set/data/deepl_auth_key": - data = None # Set to None to avoid using a real key - case "/set/data/selected_mic_host": - data = random.choice(self.config_dict["mic_host_list"]) - case "/set/data/selected_mic_device": - data = random.choice(self.config_dict["mic_device_list"]) - case "/set/data/mic_threshold": - data = random.randint(0, 100) - case "/set/data/mic_record_timeout": - data = random.randint(1, 3) - case "/set/data/mic_phrase_timeout": - data = random.randint(5, 10) - case "/set/data/mic_max_phrases": - data = random.randint(1, 10) - case "/set/data/hotkeys": - data = { - 'toggle_vrct_visibility': None, - 'toggle_translation': None, - 'toggle_transcription_send': None, - 'toggle_transcription_receive': None - } - case "/set/data/plugins_status": - data = {plugin: random.choice([True, False]) for plugin in self.config_dict.get("plugins", [])} - case "/set/data/mic_avg_logprob": - data = random.uniform(-5, 0) - case "/set/data/mic_no_speech_prob": - data = random.uniform(0, 1) - case "/set/data/mic_word_filter": - data = random.choice( - [ - ["test_0_0", "test_0_1", "test_0_2", None], - ["test_1_0", "test_1_1", None], - ["test_2_0", None], - [None] - ] - ) - case "/set/data/selected_speaker_device": - data = random.choice(self.config_dict["speaker_device_list"]) - case "/set/data/speaker_threshold": - data = random.randint(0, 100) - case "/set/data/speaker_record_timeout": - data = random.randint(1, 3) - case "/set/data/speaker_phrase_timeout": - data = random.randint(5, 10) - case "/set/data/speaker_max_phrases": - data = random.randint(1, 10) - case "/set/data/speaker_avg_logprob": - data = random.uniform(-5, 0) - case "/set/data/speaker_no_speech_prob": - data = random.uniform(0, 1) - case "/set/data/whisper_weight_type": - data = random.choice([key for key, value in self.config_dict["selectable_whisper_weight_type_dict"].items() if value is True]) - case "/set/data/overlay_small_log_settings": - data = { - "x_pos": random.random(), - "y_pos": random.random(), - "z_pos": random.random(), - "x_rotation": random.random(), - "y_rotation": random.random(), - "z_rotation": random.random(), - "display_duration": random.randint(0, 100), - "fadeout_duration": random.randint(0, 100), - "opacity": random.random(), - "ui_scaling": random.random(), - "tracker": random.choice(["HMD", "LeftHand", "RightHand"]), - } - case "/set/data/overlay_large_log_settings": - data = { - "x_pos": random.random(), - "y_pos": random.random(), - "z_pos": random.random(), - "x_rotation": random.random(), - "y_rotation": random.random(), - "z_rotation": random.random(), - "display_duration": random.randint(0, 100), - "fadeout_duration": random.randint(0, 100), - "opacity": random.random(), - "ui_scaling": random.random(), - "tracker": random.choice(["HMD", "LeftHand", "RightHand"]), - } - case "/set/data/send_message_format_parts": - data = self.config_dict["send_message_format_parts"] - case "/set/data/received_message_format_parts": - data = self.config_dict["received_message_format_parts"] - case "/set/data/websocket_host": - data = "127.0.0.1" - case "/set/data/websocket_port": - data = random.randint(1024, 65535) - case "/set/data/osc_ip_address": - data = "127.0.0.1" - case "/set/data/osc_port": - data = random.randint(1024, 65535) - case _: - data = None + if self.test_set_data_endpoints_single(endpoint) is False: + break + print("----データ設定系のエンドポイントのテスト終了----") - if data is not None: - print(f"data: {data}", end=" ", flush=True) + def test_run_endpoints_single(self, endpoint, test): + success = False + match endpoint: + case "/run/send_message_box": + data = test["data"] + expected_status = test["status"] result, status = self.main.handleRequest(endpoint, data) - if status == 200: - self.config_dict[endpoint.split("/")[-1]] = result + if status == expected_status: print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + success = True else: print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") print(f" Current config_dict: {self.config_dict}") - break - else: - print(f"\t -> {Color.YELLOW}[SKIP]{Color.RESET} No data to set for this endpoint.") - print("----データ設定系のエンドポイントのテスト終了----") + case "/run/typing_message_box" | "/run/stop_typing_message_box": + data = test["data"] + expected_status = test["status"] + expected_result = test["result"] + result, status = self.main.handleRequest(endpoint, data) + if status == expected_status and result == expected_result: + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + success = True + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") + print(f" Current config_dict: {self.config_dict}") + case "/run/send_text_overlay": + data = test["data"] + expected_status = test["status"] + expected_result = test["result"] + result, status = self.main.handleRequest(endpoint, data) + if status == expected_status and result == expected_result: + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + success = True + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") + print(f" Current config_dict: {self.config_dict}") + case "/run/swap_your_language_and_target_language": + data = test["data"] + expected_status = test["status"] + result, status = self.main.handleRequest(endpoint, data) + if status == expected_status: + print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + success = True + else: + print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") + print(f" Current config_dict: {self.config_dict}") + case _: + print(f"\t -> {Color.YELLOW}[SKIP]{Color.RESET} No tests defined for this endpoint.") + success = True + return success - def test_run_endpoints_single(self): + def test_run_endpoints_all(self): print("----実行系のエンドポイントのテスト----") for endpoint, tests in self.run_endpoints.items(): print(f"Testing endpoint: {endpoint}", end=" ", flush=True) - match endpoint: - case "/run/send_message_box": - for test in tests: - data = test["data"] - expected_status = test["status"] - result, status = self.main.handleRequest(endpoint, data) - if status == expected_status: - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") - print(f" Current config_dict: {self.config_dict}") - break - case "/run/typing_message_box" | "/run/stop_typing_message_box": - for test in tests: - data = test["data"] - expected_status = test["status"] - expected_result = test["result"] - result, status = self.main.handleRequest(endpoint, data) - if status == expected_status and result == expected_result: - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") - print(f" Current config_dict: {self.config_dict}") - break - case "/run/send_text_overlay": - for test in tests: - data = test["data"] - expected_status = test["status"] - expected_result = test["result"] - result, status = self.main.handleRequest(endpoint, data) - if status == expected_status and result == expected_result: - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") - print(f" Current config_dict: {self.config_dict}") - break - case "/run/swap_your_language_and_target_language": - for test in tests: - data = test["data"] - expected_status = test["status"] - result, status = self.main.handleRequest(endpoint, data) - if status == expected_status: - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") - else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") - print(f" Current config_dict: {self.config_dict}") - break + success = True + for test in tests: + if self.test_run_endpoints_single(endpoint, test) is False: + success = False + break + if success is False: + break print("----実行系のエンドポイントのテスト終了----") + def test_endpoints_all_random(self): + print("----すべてのエンドポイントのランダムアクセスのテスト----") + endpoint_types = [ + "validity", + "set_data", + "run", + ] + + for i in range(1000): + endpoints_type = random.choice(endpoint_types) + match endpoints_type: + case "validity": + endpoint = random.choice(self.validity_endpoints) + print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) + if self.test_endpoints_on_off_single(endpoint) is False: + break + case "set_data": + endpoint = random.choice(self.set_data_endpoints) + print(f"No.{i:04} Testing endpoint: {endpoint}", end=" ", flush=True) + if self.test_set_data_endpoints_single(endpoint) is False: + break + case "run": + endpoint = random.choice(list(self.run_endpoints.keys())) + test = random.choice(self.run_endpoints[endpoint]) + print(f"No.{i:04} Testing endpoint: {endpoint}", end=" ", flush=True) + if self.test_run_endpoints_single(endpoint, test) is False: + break + + # 最後にすべてOFFにして終了 + for endpoint in self.validity_endpoints: + if endpoint.startswith("/set/disable/"): + _, _ = self.main.handleRequest(endpoint, None) + print("----すべてのエンドポイントのランダムアクセスのテスト終了----") + if __name__ == "__main__": try: test = TestMainloop() - test.test_endpoints_on_off_single() - test.test_set_data_endpoints_single() - test.test_run_endpoints_single() + # test.test_endpoints_on_off_all() + # test.test_set_data_endpoints_all() + # test.test_run_endpoints_all() + test.test_endpoints_all_random() + # test.test_endpoints_on_off_continuous() + # test.test_endpoints_on_off_random() except KeyboardInterrupt: print("Interrupted by user, shutting down...") except Exception as e: From 1b0e93b8f86b54c40860fa98c1d81b04d49e9352 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 24 Sep 2025 21:12:08 +0900 Subject: [PATCH 40/92] =?UTF-8?q?=E7=89=B9=E5=AE=9A=E3=81=AE=E3=82=A8?= =?UTF-8?q?=E3=83=B3=E3=83=89=E3=83=9D=E3=82=A4=E3=83=B3=E3=83=88=E3=81=AB?= =?UTF-8?q?=E5=AF=BE=E3=81=99=E3=82=8B=E3=83=A9=E3=83=B3=E3=83=80=E3=83=A0?= =?UTF-8?q?=E3=82=A2=E3=82=AF=E3=82=BB=E3=82=B9=E3=81=AE=E3=83=86=E3=82=B9?= =?UTF-8?q?=E3=83=88=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81=E5=87=BA?= =?UTF-8?q?=E5=8A=9B=E3=83=A1=E3=83=83=E3=82=BB=E3=83=BC=E3=82=B8=E3=82=92?= =?UTF-8?q?=E6=94=B9=E5=96=84=E3=80=82=E3=83=86=E3=82=B9=E3=83=88=E3=81=AE?= =?UTF-8?q?=E7=AF=84=E5=9B=B2=E3=82=92=E6=8B=A1=E5=A4=A7=E3=81=97=E3=80=81?= =?UTF-8?q?=E7=84=A1=E5=8A=B9=E5=8C=96=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D?= =?UTF-8?q?=E3=82=A4=E3=83=B3=E3=83=88=E3=81=AE=E5=87=A6=E7=90=86=E3=82=92?= =?UTF-8?q?=E5=BC=B7=E5=8C=96=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/test_endpoints.py | 107 +++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 29 deletions(-) diff --git a/src-python/test_endpoints.py b/src-python/test_endpoints.py index e2f1b18f..61dfba90 100644 --- a/src-python/test_endpoints.py +++ b/src-python/test_endpoints.py @@ -1,6 +1,5 @@ # 初期化のため、config.jsonの削除 import os -import pprint import time import random if os.path.exists("config.json"): @@ -220,26 +219,26 @@ class TestMainloop(): result, status = self.main.handleRequest(endpoint, None) if result is True and status == 200: self.config_dict[endpoint.split("/")[-1]] = result - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") success = True else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") + print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") print(f"Current config_dict: {self.config_dict}") elif endpoint.startswith("/set/disable/"): result, status = self.main.handleRequest(endpoint, None) if result is False and status == 200: self.config_dict[endpoint.split("/")[-1]] = result - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") success = True else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") + print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") print(f"Current config_dict: {self.config_dict}") return success def test_endpoints_on_off_all(self): print("----ON/OFF系のエンドポイントのテスト----") for endpoint in self.validity_endpoints: - print(f"Testing endpoint: {endpoint}", end="", flush=True) + print(f"Testing endpoint: {endpoint}", flush=True) if self.test_endpoints_on_off_single(endpoint) is False: break print("----ON/OFF系のエンドポイントのテスト終了----") @@ -248,7 +247,7 @@ class TestMainloop(): print("----ON/OFFでのランダムアクセスのテスト----") for i in range(1000): endpoint = random.choice(self.validity_endpoints) - print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) + print(f"No.{i:04} Testing endpoint: {endpoint}", flush=True) if self.test_endpoints_on_off_single(endpoint) is False: break @@ -272,7 +271,7 @@ class TestMainloop(): # ] for i in range(1000): endpoint = random.choice(endpoints) - print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) + print(f"No.{i:04} Testing endpoint: {endpoint}", flush=True) if self.test_endpoints_on_off_single(endpoint) is False: break @@ -435,13 +434,13 @@ class TestMainloop(): result, status = self.main.handleRequest(endpoint, data) if status == 200: self.config_dict[endpoint.split("/")[-1]] = result - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") success = True else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") + print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") print(f" Current config_dict: {self.config_dict}") else: - print(f"\t -> {Color.YELLOW}[SKIP]{Color.RESET} No data to set for this endpoint.") + print(f"-> {Color.YELLOW}[SKIP]{Color.RESET} No data to set for this endpoint: {endpoint}.") success = True return success @@ -461,45 +460,45 @@ class TestMainloop(): expected_status = test["status"] result, status = self.main.handleRequest(endpoint, data) if status == expected_status: - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") success = True else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") - print(f" Current config_dict: {self.config_dict}") + print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") + print(f"Current config_dict: {self.config_dict}") case "/run/typing_message_box" | "/run/stop_typing_message_box": data = test["data"] expected_status = test["status"] expected_result = test["result"] result, status = self.main.handleRequest(endpoint, data) if status == expected_status and result == expected_result: - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") success = True else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") - print(f" Current config_dict: {self.config_dict}") + print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}, Expected: {expected_result}") + print(f"Current config_dict: {self.config_dict}") case "/run/send_text_overlay": data = test["data"] expected_status = test["status"] expected_result = test["result"] result, status = self.main.handleRequest(endpoint, data) if status == expected_status and result == expected_result: - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") success = True else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}, Expected: {expected_result}") - print(f" Current config_dict: {self.config_dict}") + print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}, Expected: {expected_result}") + print(f"Current config_dict: {self.config_dict}") case "/run/swap_your_language_and_target_language": data = test["data"] expected_status = test["status"] result, status = self.main.handleRequest(endpoint, data) if status == expected_status: - print(f"\t -> {Color.GREEN}[PASS]{Color.RESET} Status: {status}, Result: {result}") + print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") success = True else: - print(f"\t -> {Color.RED}[ERROR]{Color.RESET} Status: {status}, Result: {result}") - print(f" Current config_dict: {self.config_dict}") + print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") + print(f"Current config_dict: {self.config_dict}") case _: - print(f"\t -> {Color.YELLOW}[SKIP]{Color.RESET} No tests defined for this endpoint.") + print(f"-> {Color.YELLOW}[SKIP]{Color.RESET} No tests defined for this endpoint :{endpoint}.") success = True return success @@ -524,23 +523,23 @@ class TestMainloop(): "run", ] - for i in range(1000): + for i in range(10000): endpoints_type = random.choice(endpoint_types) match endpoints_type: case "validity": endpoint = random.choice(self.validity_endpoints) - print(f"No.{i:04} Testing endpoint: {endpoint}", end="", flush=True) + print(f"No.{i:04} Testing endpoint: {endpoint}", flush=True) if self.test_endpoints_on_off_single(endpoint) is False: break case "set_data": endpoint = random.choice(self.set_data_endpoints) - print(f"No.{i:04} Testing endpoint: {endpoint}", end=" ", flush=True) + print(f"No.{i:04} Testing endpoint: {endpoint}", flush=True) if self.test_set_data_endpoints_single(endpoint) is False: break case "run": endpoint = random.choice(list(self.run_endpoints.keys())) test = random.choice(self.run_endpoints[endpoint]) - print(f"No.{i:04} Testing endpoint: {endpoint}", end=" ", flush=True) + print(f"No.{i:04} Testing endpoint: {endpoint}", flush=True) if self.test_run_endpoints_single(endpoint, test) is False: break @@ -550,15 +549,65 @@ class TestMainloop(): _, _ = self.main.handleRequest(endpoint, None) print("----すべてのエンドポイントのランダムアクセスのテスト終了----") + + def test_endpoints_specific_random(self): + print("----特定のエンドポイントのランダムアクセスのテスト----") + + self.validity_specific_endpoints = [ + "/set/enable/websocket_server", + "/set/disable/websocket_server", + ] + + self.set_data_specific_endpoints = [ + "/set/data/ctranslate2_weight_type", + "/set/data/websocket_host", + "/set/data/websocket_port", + "/set/data/osc_ip_address", + "/set/data/osc_port", + ] + + endpoint_types = [ + "validity", + "set_data", + # "run", + ] + + for i in range(1000): + endpoints_type = random.choice(endpoint_types) + match endpoints_type: + case "validity": + endpoint = random.choice(self.validity_specific_endpoints) + print(f"No.{i:04} Testing endpoint: {endpoint}", flush=True) + if self.test_endpoints_on_off_single(endpoint) is False: + break + case "set_data": + endpoint = random.choice(self.set_data_specific_endpoints) + print(f"No.{i:04} Testing endpoint: {endpoint}", flush=True) + if self.test_set_data_endpoints_single(endpoint) is False: + break + case "run": + endpoint = random.choice(list(self.run_endpoints.keys())) + test = random.choice(self.run_endpoints[endpoint]) + print(f"No.{i:04} Testing endpoint: {endpoint}", flush=True) + if self.test_run_endpoints_single(endpoint, test) is False: + break + + # 最後にすべてOFFにして終了 + for endpoint in self.validity_endpoints: + if endpoint.startswith("/set/disable/"): + _, _ = self.main.handleRequest(endpoint, None) + print("----特定のエンドポイントのランダムアクセスのテスト終了----") + if __name__ == "__main__": try: test = TestMainloop() # test.test_endpoints_on_off_all() # test.test_set_data_endpoints_all() # test.test_run_endpoints_all() - test.test_endpoints_all_random() + # test.test_endpoints_all_random() # test.test_endpoints_on_off_continuous() # test.test_endpoints_on_off_random() + test.test_endpoints_specific_random() except KeyboardInterrupt: print("Interrupted by user, shutting down...") except Exception as e: From 53f1b958433e308d9de31ec359aa064547de5f76 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 24 Sep 2025 23:51:22 +0900 Subject: [PATCH 41/92] =?UTF-8?q?=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D?= =?UTF-8?q?=E3=82=A4=E3=83=B3=E3=83=88=E3=83=86=E3=82=B9=E3=83=88=E3=81=AE?= =?UTF-8?q?=E6=9C=9F=E5=BE=85=E3=81=95=E3=82=8C=E3=82=8B=E3=82=B9=E3=83=86?= =?UTF-8?q?=E3=83=BC=E3=82=BF=E3=82=B9=E3=82=92=E5=BC=B7=E5=8C=96=E3=81=97?= =?UTF-8?q?=E3=80=81=E3=83=AA=E3=82=AF=E3=82=A8=E3=82=B9=E3=83=88=E5=87=A6?= =?UTF-8?q?=E7=90=86=E3=81=AE=E7=B5=90=E6=9E=9C=E3=82=92=E6=94=B9=E5=96=84?= =?UTF-8?q?=E3=80=82=E3=83=87=E3=83=BC=E3=82=BF=E8=A8=AD=E5=AE=9A=E7=B3=BB?= =?UTF-8?q?=E3=81=AE=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D=E3=82=A4=E3=83=B3?= =?UTF-8?q?=E3=83=88=E3=81=AB=E3=81=8A=E3=81=91=E3=82=8B=E3=83=A9=E3=83=B3?= =?UTF-8?q?=E3=83=80=E3=83=A0=E3=83=87=E3=83=BC=E3=82=BF=E3=81=AE=E7=AF=84?= =?UTF-8?q?=E5=9B=B2=E3=82=92=E6=8B=A1=E5=A4=A7=E3=81=97=E3=80=81=E5=87=BA?= =?UTF-8?q?=E5=8A=9B=E3=83=A1=E3=83=83=E3=82=BB=E3=83=BC=E3=82=B8=E3=82=92?= =?UTF-8?q?=E3=83=95=E3=83=A9=E3=83=83=E3=82=B7=E3=83=A5=E3=81=99=E3=82=8B?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E4=BF=AE=E6=AD=A3=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/test_endpoints.py | 99 +++++++++++++++++++++++++++--------- 1 file changed, 76 insertions(+), 23 deletions(-) diff --git a/src-python/test_endpoints.py b/src-python/test_endpoints.py index 61dfba90..988405ad 100644 --- a/src-python/test_endpoints.py +++ b/src-python/test_endpoints.py @@ -50,10 +50,10 @@ class TestMainloop(): self.config_dict = {} for endpoint in self.main.mapping.keys(): if endpoint.startswith("/get/data/"): - self.config_dict[endpoint.split("/")[-1]] = self.main.handleRequest(endpoint, None)[0] + self.config_dict[endpoint.split("/")[-1]], _ = self.main.handleRequest(endpoint, None) elif endpoint.startswith("/set/disable/"): - self.config_dict[endpoint.split("/")[-1]] = self.main.handleRequest(endpoint, None)[0] - print(self.config_dict) + self.config_dict[endpoint.split("/")[-1]], _ = self.main.handleRequest(endpoint, None) + print(self.config_dict, flush=True) self.validity_endpoints = [ "/set/enable/translation", @@ -112,7 +112,7 @@ class TestMainloop(): "/set/data/selected_tab_no", "/set/data/selected_translation_engines", "/set/data/selected_your_languages", - "/set/data/selected_target_languages" + "/set/data/selected_target_languages", "/set/data/selected_transcription_engine", "/set/data/transparency", "/set/data/ui_scaling", @@ -215,10 +215,18 @@ class TestMainloop(): def test_endpoints_on_off_single(self, endpoint): success = False + expected_status = [200] if endpoint.startswith("/set/enable/"): + match endpoint: + case "/set/enable/websocket_server": + expected_status = [200, 400] + case _: + pass + result, status = self.main.handleRequest(endpoint, None) - if result is True and status == 200: - self.config_dict[endpoint.split("/")[-1]] = result + if status in expected_status: + if status == 200: + self.config_dict[endpoint.split("/")[-1]] = result print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") success = True else: @@ -226,8 +234,9 @@ class TestMainloop(): print(f"Current config_dict: {self.config_dict}") elif endpoint.startswith("/set/disable/"): result, status = self.main.handleRequest(endpoint, None) - if result is False and status == 200: - self.config_dict[endpoint.split("/")[-1]] = result + if status in expected_status: + if status == 200: + self.config_dict[endpoint.split("/")[-1]] = result print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") success = True else: @@ -283,6 +292,7 @@ class TestMainloop(): def test_set_data_endpoints_single(self, endpoint): success = False + expected_status = [200] match endpoint: case "/set/data/selected_tab_no": data = random.choice(["1", "2", "3"]) @@ -335,19 +345,36 @@ class TestMainloop(): case "/set/data/ctranslate2_weight_type": data = random.choice(list(self.config_dict["selectable_ctranslate2_weight_type_dict"].keys())) case "/set/data/deepl_auth_key": - data = None # Set to None to avoid using a real key + data = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + expected_status = [400] case "/set/data/selected_mic_host": data = random.choice(self.config_dict["mic_host_list"]) case "/set/data/selected_mic_device": data = random.choice(self.config_dict["mic_device_list"]) case "/set/data/mic_threshold": - data = random.randint(0, 100) + data = random.randint(-1000, 3000) + if 0 <= data <= 2000: + expected_status = [200] + else: + expected_status = [400] case "/set/data/mic_record_timeout": - data = random.randint(1, 3) + data = random.randint(-1, 10) + if 0 <= data <= self.config_dict["mic_phrase_timeout"]: + expected_status = [200] + else: + expected_status = [400] case "/set/data/mic_phrase_timeout": - data = random.randint(5, 10) + data = random.randint(-1, 10) + if self.config_dict["mic_record_timeout"] <= data: + expected_status = [200] + else: + expected_status = [400] case "/set/data/mic_max_phrases": - data = random.randint(1, 10) + data = random.randint(-1, 10) + if 0 <= data: + expected_status = [200] + else: + expected_status = [400] case "/set/data/hotkeys": data = { 'toggle_vrct_visibility': None, @@ -373,13 +400,29 @@ class TestMainloop(): case "/set/data/selected_speaker_device": data = random.choice(self.config_dict["speaker_device_list"]) case "/set/data/speaker_threshold": - data = random.randint(0, 100) + data = random.randint(-1000, 5000) + if 0 <= data <= 4000: + expected_status = [200] + else: + expected_status = [400] case "/set/data/speaker_record_timeout": - data = random.randint(1, 3) + data = random.randint(-1, 10) + if 0 <= data <= self.config_dict["speaker_phrase_timeout"]: + expected_status = [200] + else: + expected_status = [400] case "/set/data/speaker_phrase_timeout": - data = random.randint(5, 10) + data = random.randint(-1, 10) + if self.config_dict["speaker_record_timeout"] <= data: + expected_status = [200] + else: + expected_status = [400] case "/set/data/speaker_max_phrases": - data = random.randint(1, 10) + data = random.randint(-1, 10) + if 0 <= data: + expected_status = [200] + else: + expected_status = [400] case "/set/data/speaker_avg_logprob": data = random.uniform(-5, 0) case "/set/data/speaker_no_speech_prob": @@ -419,11 +462,20 @@ class TestMainloop(): case "/set/data/received_message_format_parts": data = self.config_dict["received_message_format_parts"] case "/set/data/websocket_host": - data = "127.0.0.1" + data = random.choice(["127.0.0.1", "aaaaadwafasdsd", "0210.1564.845.0"]) + if data == "127.0.0.1": + expected_status = [200, 400] + else: + expected_status = [400] case "/set/data/websocket_port": data = random.randint(1024, 65535) + expected_status = [200, 400] case "/set/data/osc_ip_address": - data = "127.0.0.1" + data = random.choice(["127.0.0.1", "aaaaadwafasdsd", "0210.1564.845.0"]) + if data == "127.0.0.1": + expected_status = [200] + else: + expected_status = [400] case "/set/data/osc_port": data = random.randint(1024, 65535) case _: @@ -432,8 +484,9 @@ class TestMainloop(): if data is not None: print(f"data: {data}", end=" ", flush=True) result, status = self.main.handleRequest(endpoint, data) - if status == 200: - self.config_dict[endpoint.split("/")[-1]] = result + if status in expected_status: + if status == 200: + self.config_dict[endpoint.split("/")[-1]] = result print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") success = True else: @@ -604,10 +657,10 @@ if __name__ == "__main__": # test.test_endpoints_on_off_all() # test.test_set_data_endpoints_all() # test.test_run_endpoints_all() - # test.test_endpoints_all_random() + test.test_endpoints_all_random() # test.test_endpoints_on_off_continuous() # test.test_endpoints_on_off_random() - test.test_endpoints_specific_random() + # test.test_endpoints_specific_random() except KeyboardInterrupt: print("Interrupted by user, shutting down...") except Exception as e: From 89a027a8e0b137843525506b3c7578c9d264d97f Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 25 Sep 2025 12:30:35 +0900 Subject: [PATCH 42/92] =?UTF-8?q?=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D?= =?UTF-8?q?=E3=82=A4=E3=83=B3=E3=83=88=E3=83=86=E3=82=B9=E3=83=88=E3=81=AE?= =?UTF-8?q?=E3=83=AA=E3=83=95=E3=82=A1=E3=82=AF=E3=82=BF=E3=83=AA=E3=83=B3?= =?UTF-8?q?=E3=82=B0=E3=82=92=E5=AE=9F=E6=96=BD=E3=81=97=E3=80=81=E5=8B=95?= =?UTF-8?q?=E7=9A=84=E3=81=AB=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D=E3=82=A4?= =?UTF-8?q?=E3=83=B3=E3=83=88=E3=82=92=E5=8F=96=E5=BE=97=E3=81=99=E3=82=8B?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E5=A4=89=E6=9B=B4=E3=80=82=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E7=B5=90=E6=9E=9C=E3=81=AE=E8=A8=98=E9=8C=B2?= =?UTF-8?q?=E6=A9=9F=E8=83=BD=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81?= =?UTF-8?q?=E5=89=8A=E9=99=A4=E7=B3=BB=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D?= =?UTF-8?q?=E3=82=A4=E3=83=B3=E3=83=88=E3=81=AE=E3=83=86=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=82=92=E6=96=B0=E3=81=9F=E3=81=AB=E5=AE=9F=E8=A3=85=E3=80=82?= =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E7=B5=90=E6=9E=9C=E3=81=AE=E3=82=B5?= =?UTF-8?q?=E3=83=9E=E3=83=AA=E3=83=BC=E8=A1=A8=E7=A4=BA=E6=A9=9F=E8=83=BD?= =?UTF-8?q?=E3=82=82=E8=BF=BD=E5=8A=A0=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/test_endpoints.py | 449 +++++++++++++++++------------------ 1 file changed, 223 insertions(+), 226 deletions(-) diff --git a/src-python/test_endpoints.py b/src-python/test_endpoints.py index 988405ad..d5aa9cb4 100644 --- a/src-python/test_endpoints.py +++ b/src-python/test_endpoints.py @@ -55,162 +55,41 @@ class TestMainloop(): self.config_dict[endpoint.split("/")[-1]], _ = self.main.handleRequest(endpoint, None) print(self.config_dict, flush=True) - self.validity_endpoints = [ - "/set/enable/translation", - "/set/disable/translation", - "/set/enable/transcription_send", - "/set/disable/transcription_send", - "/set/enable/transcription_receive", - "/set/disable/transcription_receive", - "/set/enable/foreground", - "/set/disable/foreground", - "/set/enable/main_window_sidebar_compact_mode", - "/set/disable/main_window_sidebar_compact_mode", - "/set/enable/show_resend_button", - "/set/disable/show_resend_button", - "/set/enable/convert_message_to_romaji", - "/set/disable/convert_message_to_romaji", - "/set/enable/convert_message_to_hiragana", - "/set/disable/convert_message_to_hiragana", - "/set/enable/auto_mic_select", - "/set/disable/auto_mic_select", - "/set/enable/mic_automatic_threshold", - "/set/disable/mic_automatic_threshold", - "/set/enable/check_mic_threshold", - "/set/disable/check_mic_threshold", - "/set/enable/auto_speaker_select", - "/set/disable/auto_speaker_select", - "/set/enable/speaker_automatic_threshold", - "/set/disable/speaker_automatic_threshold", - "/set/enable/check_speaker_threshold", - "/set/disable/check_speaker_threshold", - "/set/enable/overlay_small_log", - "/set/disable/overlay_small_log", - "/set/enable/overlay_large_log", - "/set/disable/overlay_large_log", - "/set/enable/overlay_show_only_translated_messages", - "/set/disable/overlay_show_only_translated_messages", - "/set/enable/auto_clear_message_box", - "/set/disable/auto_clear_message_box", - "/set/enable/send_only_translated_messages", - "/set/disable/send_only_translated_messages", - "/set/enable/logger_feature", - "/set/disable/logger_feature", - "/set/enable/vrc_mic_mute_sync", - "/set/disable/vrc_mic_mute_sync", - "/set/enable/send_message_to_vrc", - "/set/disable/send_message_to_vrc", - "/set/enable/send_received_message_to_vrc", - "/set/disable/send_received_message_to_vrc", - "/set/enable/websocket_server", - "/set/disable/websocket_server", - "/set/enable/notification_vrc_sfx", - "/set/disable/notification_vrc_sfx", - ] + self.validity_endpoints = [] + for endpoint in self.main.mapping.keys(): + if endpoint.startswith("/set/enable/") or endpoint.startswith("/set/disable/"): + self.validity_endpoints.append(endpoint) - self.set_data_endpoints = [ - "/set/data/selected_tab_no", - "/set/data/selected_translation_engines", - "/set/data/selected_your_languages", - "/set/data/selected_target_languages", - "/set/data/selected_transcription_engine", - "/set/data/transparency", - "/set/data/ui_scaling", - "/set/data/textbox_ui_scaling", - "/set/data/message_box_ratio", - "/set/data/send_message_button_type", - "/set/data/font_family", - "/set/data/ui_language", - "/set/data/main_window_geometry", - "/set/data/selected_translation_compute_device", - "/set/data/selected_transcription_compute_device", - "/set/data/ctranslate2_weight_type", - "/set/data/deepl_auth_key", - "/set/data/selected_mic_host", - "/set/data/selected_mic_device", - "/set/data/mic_threshold", - "/set/data/mic_record_timeout", - "/set/data/mic_phrase_timeout", - "/set/data/mic_max_phrases", - "/set/data/hotkeys", - "/set/data/plugins_status", - "/set/data/mic_avg_logprob", - "/set/data/mic_no_speech_prob", - "/set/data/mic_word_filter", - "/set/data/selected_speaker_device", - "/set/data/speaker_threshold", - "/set/data/speaker_record_timeout", - "/set/data/speaker_phrase_timeout", - "/set/data/speaker_max_phrases", - "/set/data/speaker_avg_logprob", - "/set/data/speaker_no_speech_prob", - "/set/data/whisper_weight_type", - "/set/data/overlay_small_log_settings", - "/set/data/overlay_large_log_settings", - "/set/data/send_message_format_parts", - "/set/data/received_message_format_parts", - "/set/data/websocket_host", - "/set/data/websocket_port", - "/set/data/osc_ip_address", - "/set/data/osc_port", - ] + self.set_data_endpoints = [] + for endpoint in self.main.mapping.keys(): + if endpoint.startswith("/set/data/"): + self.set_data_endpoints.append(endpoint) - self.delete_data_endpoints = [ - "/delete/data/deepl_auth_key", - ] + self.delete_data_endpoints = [] + for endpoint in self.main.mapping.keys(): + if endpoint.startswith("/delete/data/"): + self.delete_data_endpoints.append(endpoint) - self.run_endpoints = { - "/run/send_message_box": [ - { - "data": {"id":"000001", "message":"test"}, - "status": 200, - }, - { - # 英語 - "data": {"id":"000002", "message":"Hello World!"}, - "status": 200, - }, - { - # 日本語 - "data": {"id":"000003", "message":"こんにちわ 世界!"}, - "status": 200, - }, - { - # 韓国語 - "data": {"id":"000004", "message":"안녕하세요 세계!"}, - "status": 200, - }, - { - # 中国語 繁体字 - "data": {"id":"000005", "message":"你好,世界!"}, - "status": 200, - }, - ], - "/run/typing_message_box": [{"data": None, "status": 200, "result": True}], - "/run/stop_typing_message_box": [{"data": None, "status": 200, "result": True}], - "/run/send_text_overlay": [{"data": "test_overlay", "status": 200, "result": "test_overlay"}], - "/run/swap_your_language_and_target_language": [{"data": None, "status": 200}], - # !!!Cant be tested here!!! - # "/run/update_software": [{"data": None, "status": 200, "result": True}], - # "/run/update_cuda_software": [{"data": None, "status": 200, "result": True}], - # "/run/download_ctranslate2_weight": [ - # {"data": "small", "status": 200, "result": True}, - # {"data": "large", "status": 400, "result": False}, - # ], - # "/run/download_whisper_weight": [ - # {"data": "tiny", "status": 200, "result": True}, - # {"data": "base", "status": 200, "result": True}, - # {"data": "small", "status": 200, "result": True}, - # {"data": "medium", "status": 200, "result": True}, - # {"data": "large-v1", "status": 200, "result": True}, - # {"data": "large-v2", "status": 400, "result": True}, - # {"data": "large-v3", "status": 400, "result": True}, - # {"data": "large-v3-turbo-int8", "status": 400, "result": True}, - # {"data": "large-v3-turbo", "status": 400, "result": True} - # ], - # "/run/open_filepath_logs": {"data": None, "status": 200, "result": True}, - # "/run/open_filepath_config_file": {"data": None, "status": 200, "result": True}, - # "/run/feed_watchdog": {"data": None, "status": 200, "result": True}, + self.run_endpoints = [] + for endpoint in self.main.mapping.keys(): + if endpoint.startswith("/run/"): + self.run_endpoints.append(endpoint) + + self.test_results = {} + + def record_test_result(self, endpoint, status, result, expected_status): + """ + テスト結果を記録する + :param endpoint: テスト対象のエンドポイント + :param status: 実際のステータスコード + :param result: 実際の結果 + :param expected_status: 期待されるステータスコード + """ + self.test_results[endpoint] = { + "status": status, + "result": result, + "expected_status": expected_status, + "success": status in expected_status } def test_endpoints_on_off_single(self, endpoint): @@ -242,14 +121,14 @@ class TestMainloop(): else: print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") print(f"Current config_dict: {self.config_dict}") + self.record_test_result(endpoint, status, result, expected_status) return success def test_endpoints_on_off_all(self): print("----ON/OFF系のエンドポイントのテスト----") for endpoint in self.validity_endpoints: print(f"Testing endpoint: {endpoint}", flush=True) - if self.test_endpoints_on_off_single(endpoint) is False: - break + self.test_endpoints_on_off_single(endpoint) print("----ON/OFF系のエンドポイントのテスト終了----") def test_endpoints_on_off_random(self): @@ -354,25 +233,25 @@ class TestMainloop(): case "/set/data/mic_threshold": data = random.randint(-1000, 3000) if 0 <= data <= 2000: - expected_status = [200] + pass else: expected_status = [400] case "/set/data/mic_record_timeout": data = random.randint(-1, 10) if 0 <= data <= self.config_dict["mic_phrase_timeout"]: - expected_status = [200] + pass else: expected_status = [400] case "/set/data/mic_phrase_timeout": data = random.randint(-1, 10) if self.config_dict["mic_record_timeout"] <= data: - expected_status = [200] + pass else: expected_status = [400] case "/set/data/mic_max_phrases": data = random.randint(-1, 10) if 0 <= data: - expected_status = [200] + pass else: expected_status = [400] case "/set/data/hotkeys": @@ -402,25 +281,25 @@ class TestMainloop(): case "/set/data/speaker_threshold": data = random.randint(-1000, 5000) if 0 <= data <= 4000: - expected_status = [200] + pass else: expected_status = [400] case "/set/data/speaker_record_timeout": data = random.randint(-1, 10) if 0 <= data <= self.config_dict["speaker_phrase_timeout"]: - expected_status = [200] + pass else: expected_status = [400] case "/set/data/speaker_phrase_timeout": data = random.randint(-1, 10) if self.config_dict["speaker_record_timeout"] <= data: - expected_status = [200] + pass else: expected_status = [400] case "/set/data/speaker_max_phrases": data = random.randint(-1, 10) if 0 <= data: - expected_status = [200] + pass else: expected_status = [400] case "/set/data/speaker_avg_logprob": @@ -473,7 +352,7 @@ class TestMainloop(): case "/set/data/osc_ip_address": data = random.choice(["127.0.0.1", "aaaaadwafasdsd", "0210.1564.845.0"]) if data == "127.0.0.1": - expected_status = [200] + pass else: expected_status = [400] case "/set/data/osc_port": @@ -495,77 +374,113 @@ class TestMainloop(): else: print(f"-> {Color.YELLOW}[SKIP]{Color.RESET} No data to set for this endpoint: {endpoint}.") success = True + self.record_test_result(endpoint, status, result if data is not None else None, expected_status) # テスト結果を記録 return success def test_set_data_endpoints_all(self): print("----データ設定系のエンドポイントのテスト----") for endpoint in self.set_data_endpoints: print(f"Testing endpoint: {endpoint}", end=" ", flush=True) - if self.test_set_data_endpoints_single(endpoint) is False: - break + self.test_set_data_endpoints_single(endpoint) print("----データ設定系のエンドポイントのテスト終了----") - def test_run_endpoints_single(self, endpoint, test): + def test_run_endpoints_single(self, endpoint): success = False + expected_status = [200] match endpoint: case "/run/send_message_box": - data = test["data"] - expected_status = test["status"] - result, status = self.main.handleRequest(endpoint, data) - if status == expected_status: - print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") - success = True - else: - print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") - print(f"Current config_dict: {self.config_dict}") - case "/run/typing_message_box" | "/run/stop_typing_message_box": - data = test["data"] - expected_status = test["status"] - expected_result = test["result"] - result, status = self.main.handleRequest(endpoint, data) - if status == expected_status and result == expected_result: - print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") - success = True - else: - print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}, Expected: {expected_result}") - print(f"Current config_dict: {self.config_dict}") + data_list = [ + { + "data": {"id":"000001", "message":"test"}, + "status": [200], + }, + { + # 英語 + "data": {"id":"000002", "message":"Hello World!"}, + "status": [200], + }, + { + # 日本語 + "data": {"id":"000003", "message":"こんにちわ 世界!"}, + "status": [200], + }, + { + # 韓国語 + "data": {"id":"000004", "message":"안녕하세요 세계!"}, + "status": [200], + }, + { + # 中国語 繁体字 + "data": {"id":"000005", "message":"你好,世界!"}, + "status": [200], + }, + ] + choice_data = random.choice(data_list) + data, expected_status = choice_data["data"], choice_data["status"] + case "/run/typing_message_box": + data = None + case "/run/stop_typing_message_box": + data = None case "/run/send_text_overlay": - data = test["data"] - expected_status = test["status"] - expected_result = test["result"] - result, status = self.main.handleRequest(endpoint, data) - if status == expected_status and result == expected_result: - print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") - success = True - else: - print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}, Expected: {expected_result}") - print(f"Current config_dict: {self.config_dict}") + data = "test_overlay" case "/run/swap_your_language_and_target_language": - data = test["data"] - expected_status = test["status"] - result, status = self.main.handleRequest(endpoint, data) - if status == expected_status: - print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") - success = True - else: - print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") - print(f"Current config_dict: {self.config_dict}") + data = None + case "/run/update_software": + data = None + expected_status = [401] # !!!Cant be tested here!!! + case "/run/update_cuda_software": + data = None + expected_status = [401] # !!!Cant be tested here!!! + case "/run/download_ctranslate2_weight": + data_list = random.choice(["small", "large"]) + data = random.choice(data_list) + expected_status = [401] # !!!Cant be tested here!!! + case "/run/download_whisper_weight": + data_list = [ + "tiny", "base", "small", "medium", + "large-v1", "large-v2", "large-v3", + "large-v3-turbo-int8", "large-v3-turbo" + ] + data = random.choice(data_list) + expected_status = [401] # !!!Cant be tested here!!! + case "/run/open_filepath_logs": + data = None + expected_status = [401] # !!!Cant be tested here!!! + case "/run/open_filepath_config_file": + data = None + expected_status = [401] # !!!Cant be tested here!!! + case "/run/feed_watchdog": + data = None + expected_status = [401] # !!!Cant be tested here!!! case _: - print(f"-> {Color.YELLOW}[SKIP]{Color.RESET} No tests defined for this endpoint :{endpoint}.") + data = None + expected_status = [404] success = True + + if expected_status == [401]: + print(f"-> {Color.YELLOW}[SKIP]{Color.RESET} No test available for this endpoint: {endpoint}.") + self.record_test_result(endpoint, None, None, expected_status) # テスト結果を記録 + return success + elif expected_status == [404]: + print(f"-> {Color.RED}[ERROR]{Color.RESET} Unknown endpoint: {endpoint}.") + self.record_test_result(endpoint, None, None, expected_status) # テスト結果を記録 + return False + + result, status = self.main.handleRequest(endpoint, data) + if status == expected_status: + print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") + success = True + else: + print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") + print(f"Current config_dict: {self.config_dict}") + self.record_test_result(endpoint, status, result, expected_status) # テスト結果を記録 return success def test_run_endpoints_all(self): print("----実行系のエンドポイントのテスト----") - for endpoint, tests in self.run_endpoints.items(): + for endpoint in self.run_endpoints: print(f"Testing endpoint: {endpoint}", end=" ", flush=True) - success = True - for test in tests: - if self.test_run_endpoints_single(endpoint, test) is False: - success = False - break - if success is False: - break + self.test_run_endpoints_single(endpoint) print("----実行系のエンドポイントのテスト終了----") def test_endpoints_all_random(self): @@ -574,6 +489,7 @@ class TestMainloop(): "validity", "set_data", "run", + "delete", ] for i in range(10000): @@ -590,10 +506,14 @@ class TestMainloop(): if self.test_set_data_endpoints_single(endpoint) is False: break case "run": - endpoint = random.choice(list(self.run_endpoints.keys())) - test = random.choice(self.run_endpoints[endpoint]) + endpoint = random.choice(self.run_endpoints) print(f"No.{i:04} Testing endpoint: {endpoint}", flush=True) - if self.test_run_endpoints_single(endpoint, test) is False: + if self.test_run_endpoints_single(endpoint) is False: + break + case "delete": + endpoint = random.choice(self.delete_data_endpoints) + print(f"No.{i:04} Testing endpoint: {endpoint}", flush=True) + if self.test_delete_data_endpoints_single(endpoint) is False: break # 最後にすべてOFFにして終了 @@ -602,7 +522,6 @@ class TestMainloop(): _, _ = self.main.handleRequest(endpoint, None) print("----すべてのエンドポイントのランダムアクセスのテスト終了----") - def test_endpoints_specific_random(self): print("----特定のエンドポイントのランダムアクセスのテスト----") @@ -619,10 +538,14 @@ class TestMainloop(): "/set/data/osc_port", ] + self.run_specific_endpoints = [] + self.delete_data_endpoints = [] + endpoint_types = [ "validity", "set_data", # "run", + # "delete", ] for i in range(1000): @@ -639,10 +562,14 @@ class TestMainloop(): if self.test_set_data_endpoints_single(endpoint) is False: break case "run": - endpoint = random.choice(list(self.run_endpoints.keys())) - test = random.choice(self.run_endpoints[endpoint]) + endpoint = random.choice(self.run_specific_endpoints) print(f"No.{i:04} Testing endpoint: {endpoint}", flush=True) - if self.test_run_endpoints_single(endpoint, test) is False: + if self.test_run_endpoints_single(endpoint) is False: + break + case "delete": + endpoint = random.choice(self.delete_data_endpoints) + print(f"No.{i:04} Testing endpoint: {endpoint}", flush=True) + if self.test_delete_data_endpoints_single(endpoint) is False: break # 最後にすべてOFFにして終了 @@ -651,17 +578,87 @@ class TestMainloop(): _, _ = self.main.handleRequest(endpoint, None) print("----特定のエンドポイントのランダムアクセスのテスト終了----") + def test_delete_data_endpoints_single(self, endpoint): + success = False + expected_status = [200] + match endpoint: + case "/delete/data/deepl_auth_key": + data = None + case _: + data = None + expected_status = [404] + success = True + + if expected_status == [404]: + print(f"-> {Color.RED}[ERROR]{Color.RESET} Unknown endpoint: {endpoint}.") + self.record_test_result(endpoint, None, None, expected_status) # テスト結果を記録 + return False + + result, status = self.main.handleRequest(endpoint, data) + if status == expected_status: + print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") + success = True + else: + print(f"-> {Color.RED}[ERROR]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") + print(f"Current config_dict: {self.config_dict}") + self.record_test_result(endpoint, status, result, expected_status) # テスト結果を記録 + return success + + def test_delete_data_endpoints_all(self): + print("----データ削除系のエンドポイントのテスト----") + for endpoint in self.delete_data_endpoints: + print(f"Testing endpoint: {endpoint}", flush=True) + self.test_delete_data_endpoints_single(endpoint) + print("----データ削除系のエンドポイントのテスト終了----") + + def generate_summary(self): + """ + テスト結果のサマリーを生成して表示する + """ + total_tests = len(self.test_results) + passed_tests = sum(1 for result in self.test_results.values() if result["success"]) + untested_tests = sum(1 for result in self.test_results.values() if result["expected_status"] == [401]) + invalid_tests = sum(1 for result in self.test_results.values() if result["expected_status"] == [404]) + failed_tests = total_tests - passed_tests - untested_tests - invalid_tests + + print("\n---- テスト結果のサマリー ----") + print(f"総テスト数: {total_tests}") + print(f"成功したテスト数: {passed_tests}") + print(f"失敗したテスト数: {failed_tests}") + print(f"テストをしなかったテスト数: {untested_tests}") + print(f"無効なテスト数: {invalid_tests}\n") + + if untested_tests > 0: + print("テストをしなかったテストの詳細:") + for endpoint, result in self.test_results.items(): + if result["expected_status"] == [401]: + print(f"- エンドポイント: {endpoint}") + print(f" ステータス: {result['status']}") + print(f" 結果: {result['result']}\n") + if failed_tests > 0: + print("失敗したテストの詳細:") + for endpoint, result in self.test_results.items(): + if result["success"] != [200]: + print(f"- エンドポイント: {endpoint}") + print(f" ステータス: {result['status']} (期待されるステータス: {result['expected_status']})") + print(f" 結果: {result['result']}\n") + print("---- サマリー終了 ----\n") + if __name__ == "__main__": + import traceback try: test = TestMainloop() - # test.test_endpoints_on_off_all() - # test.test_set_data_endpoints_all() - # test.test_run_endpoints_all() - test.test_endpoints_all_random() + test.test_endpoints_on_off_all() + test.test_set_data_endpoints_all() + test.test_run_endpoints_all() + test.test_delete_data_endpoints_all() + # test.test_endpoints_all_random() # test.test_endpoints_on_off_continuous() # test.test_endpoints_on_off_random() # test.test_endpoints_specific_random() + test.generate_summary() except KeyboardInterrupt: print("Interrupted by user, shutting down...") except Exception as e: + traceback.print_exc() print(f"An error occurred: {e}") \ No newline at end of file From 93bdec7922e3d11ab15df1ca724e91070054a5f2 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 25 Sep 2025 12:37:13 +0900 Subject: [PATCH 43/92] =?UTF-8?q?=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D?= =?UTF-8?q?=E3=82=A4=E3=83=B3=E3=83=88=E3=83=86=E3=82=B9=E3=83=88=E3=81=AB?= =?UTF-8?q?=E3=81=8A=E3=81=84=E3=81=A6=E3=80=81401=E3=81=8A=E3=82=88?= =?UTF-8?q?=E3=81=B3404=E3=82=B9=E3=83=86=E3=83=BC=E3=82=BF=E3=82=B9?= =?UTF-8?q?=E3=81=AE=E5=87=A6=E7=90=86=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97?= =?UTF-8?q?=E3=80=81=E8=A9=B2=E5=BD=93=E3=81=99=E3=82=8B=E3=82=A8=E3=83=B3?= =?UTF-8?q?=E3=83=89=E3=83=9D=E3=82=A4=E3=83=B3=E3=83=88=E3=81=AB=E5=AF=BE?= =?UTF-8?q?=E3=81=99=E3=82=8B=E3=83=86=E3=82=B9=E3=83=88=E7=B5=90=E6=9E=9C?= =?UTF-8?q?=E3=82=92=E8=A8=98=E9=8C=B2=E3=81=99=E3=82=8B=E6=A9=9F=E8=83=BD?= =?UTF-8?q?=E3=82=92=E5=AE=9F=E8=A3=85=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/test_endpoints.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src-python/test_endpoints.py b/src-python/test_endpoints.py index d5aa9cb4..b0ca93be 100644 --- a/src-python/test_endpoints.py +++ b/src-python/test_endpoints.py @@ -359,6 +359,16 @@ class TestMainloop(): data = random.randint(1024, 65535) case _: data = None + expected_status = [404] + + if expected_status == [401]: + print(f"-> {Color.YELLOW}[SKIP]{Color.RESET} No test available for this endpoint: {endpoint}.") + self.record_test_result(endpoint, None, None, expected_status) # テスト結果を記録 + return success + elif expected_status == [404]: + print(f"-> {Color.RED}[ERROR]{Color.RESET} Unknown endpoint: {endpoint}.") + self.record_test_result(endpoint, None, None, expected_status) # テスト結果を記録 + return False if data is not None: print(f"data: {data}", end=" ", flush=True) From 96ffd5509f3f55688d47317dac43980eb88aae84 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 25 Sep 2025 12:43:23 +0900 Subject: [PATCH 44/92] =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=82=B3?= =?UTF-8?q?=E3=83=BC=E3=83=89=E3=81=AA=E5=90=8D=E5=89=8D=E3=82=92=E5=A4=89?= =?UTF-8?q?=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/{test_endpoints.py => backend_test.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src-python/{test_endpoints.py => backend_test.py} (100%) diff --git a/src-python/test_endpoints.py b/src-python/backend_test.py similarity index 100% rename from src-python/test_endpoints.py rename to src-python/backend_test.py From 272ed8629f13ddd2be5f8f26f6ee230692197e49 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 25 Sep 2025 16:39:18 +0900 Subject: [PATCH 45/92] bugfix test case --- src-python/backend_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src-python/backend_test.py b/src-python/backend_test.py index b0ca93be..2550ff92 100644 --- a/src-python/backend_test.py +++ b/src-python/backend_test.py @@ -477,7 +477,7 @@ class TestMainloop(): return False result, status = self.main.handleRequest(endpoint, data) - if status == expected_status: + if status in expected_status: print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") success = True else: @@ -605,7 +605,7 @@ class TestMainloop(): return False result, status = self.main.handleRequest(endpoint, data) - if status == expected_status: + if status in expected_status: print(f"-> {Color.GREEN}[PASS]{Color.RESET} endpoint:{endpoint} Status: {status}, Result: {result}") success = True else: From 9e2c91905c7217c13325fa8ccaa97bf1fd309a90 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 25 Sep 2025 17:27:08 +0900 Subject: [PATCH 46/92] =?UTF-8?q?SHOW=5FRESEND=5FBUTTON=E3=81=AE=E6=9D=A1?= =?UTF-8?q?=E4=BB=B6=E3=82=92=E4=BF=AE=E6=AD=A3=E3=81=97=E3=80=81main=5Fin?= =?UTF-8?q?stance=E3=81=AE=E8=B5=B7=E5=8B=95=E5=87=A6=E7=90=86=E3=82=92?= =?UTF-8?q?=E7=B0=A1=E7=B4=A0=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/controller.py | 2 +- src-python/mainloop.py | 214 +-------------------------------------- 2 files changed, 2 insertions(+), 214 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index b31a2759..ebe3c9bc 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -1003,7 +1003,7 @@ class Controller: @staticmethod def setEnableShowResendButton(*args, **kwargs) -> dict: - if not config.SHOW_RESEND_BUTTON: + if config.SHOW_RESEND_BUTTON is False: config.SHOW_RESEND_BUTTON = True return {"status":200, "result":config.SHOW_RESEND_BUTTON} diff --git a/src-python/mainloop.py b/src-python/mainloop.py index 6e5f6548..802ce501 100644 --- a/src-python/mainloop.py +++ b/src-python/mainloop.py @@ -438,216 +438,4 @@ if __name__ == "__main__": for key in mapping.keys(): mapping[key]["status"] = True - process = "main" - match process: - case "main": - main_instance.start() - - case "test": - for _ in range(100): - time.sleep(0.5) - endpoint = "/get/data/mic_host_list" - result, status = main_instance.handleRequest(endpoint) - printResponse(status, endpoint, result) - - case "test_all": - import time - for endpoint, value in mapping.items(): - printLog("endpoint", endpoint) - - match endpoint: - case "/run/send_message_box": - # handleRequest("/set/enable/translation") - # handleRequest("/set/enable/convert_message_to_romaji") - data = {"id":"123456", "message":"テスト"} - case "/set/data/selected_translation_engines": - data = { - "1":"CTranslate2", - "2":"CTranslate2", - "3":"CTranslate2", - } - case "/set/data/selected_your_languages": - data = { - "1":{ - "1":{ - "language": "English", - "country": "Hong Kong" - }, - }, - "2":{ - "1":{ - "language":"Japanese", - "country":"Japan" - }, - }, - "3":{ - "1":{ - "language":"Japanese", - "country":"Japan" - }, - }, - } - case "/set/data/selected_target_languages": - data ={ - "1":{ - "1": { - "language": "Japanese", - "country": "Japan", - "enabled": True, - }, - "secondary": { - "language": "English", - "country": "United States", - "enabled": True, - }, - "tertiary": { - "language": "Chinese Simplified", - "country": "China", - "enabled": True, - } - }, - "2":{ - "1":{ - "language":"English", - "country":"United States", - "enabled": True, - }, - "secondary":{ - "language":"English", - "country":"United States", - "enabled": True, - }, - "tertiary":{ - "language":"English", - "country":"United States", - "enabled": True, - }, - }, - "3":{ - "1":{ - "language":"English", - "country":"United States", - "enabled": True, - }, - "secondary":{ - "language":"English", - "country":"United States", - "enabled": True, - }, - "tertiary":{ - "language":"English", - "country":"United States", - "enabled": True, - }, - }, - } - case "/set/data/transparency": - data = 0.5 - case "/set/appearance": - data = "Dark" - case "/set/data/ui_scaling": - data = 1.5 - case "/set/data/appearance_theme": - data = "Dark" - case "/set/data/textbox_ui_scaling": - data = 1.5 - case "/set/data/message_box_ratio": - data = 0.5 - case "/set/data/send_message_button_type": - data = "show" - case "/set/data/font_family": - data = "Yu Gothic UI" - case "/set/data/ui_language": - data = "ja" - case "/set/data/ctranslate2_weight_type": - data = "small" - case "/set/data/deepl_auth_key": - data = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee:fx" - case "/set/data/selected_mic_host": - data = "MME" - case "/set/data/selected_mic_device": - data = "マイク (Realtek High Definition Audio)" - case "/set/data/mic_threshold": - data = 0.5 - case "/set/data/mic_record_timeout": - data = 1 - case "/set/data/mic_phrase_timeout": - data = 5 - case "/set/data/mic_max_phrases": - data = 5 - case "/set/data/mic_word_filter": - data = "test0, test1, test2" - case "/set/data/selected_speaker_device": - data = "スピーカー (Realtek High Definition Audio)" - case "/set/data/speaker_threshold": - data = 0.5 - case "/set/data/speaker_record_timeout": - data = 5 - case "/set/data/speaker_phrase_timeout": - data = 5 - case "/set/data/speaker_max_phrases": - data = 5 - case "/set/data/whisper_weight_type": - data = "base" - case "/set/data/overlay_settings": - data = { - "opacity": 0.5, - "ui_scaling": 1.5, - } - case "/set/data/overlay_small_log_settings": - data = { - "x_pos": 0, - "y_pos": 0, - "z_pos": 0, - "x_rotation": 0, - "y_rotation": 0, - "z_rotation": 0, - "display_duration": 5, - "fadeout_duration": 0.5, - } - case "/set/data/send_message_format_parts": - data = { - "message": { - "prefix": "", - "suffix": "" - }, - "between_separator": "\n", - "translation": { - "prefix": "(", - "separator": "\\", - "suffix": ")" - }, - "translation_first": False, - } - case "/set/data/received_message_format_parts": - data = { - "message": { - "prefix": "", - "suffix": "" - }, - "between_separator": "\n", - "translation": { - "prefix": "(", - "separator": "\\", - "suffix": ")" - }, - "translation_first": True, - } - case "/set/data/osc_ip_address": - data = "127.0.0.1" - case "/set/data/osc_port": - data = 8000 - case "/set/data/speaker_no_speech_prob": - data = 0.5 - case "/set/data/speaker_avg_logprob": - data = 0.5 - case "/set/data/mic_no_speech_prob": - data = 0.5 - case "/set/data/mic_avg_logprob": - data = 0.5 - case _: - data = None - - result, status = main_instance.handleRequest(endpoint, data) - printResponse(status, endpoint, result) - time.sleep(0.5) \ No newline at end of file + main_instance.start() \ No newline at end of file From 92f9d645f86fa0d1b38ea940a5bc09619a380fad Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 25 Sep 2025 22:56:16 +0900 Subject: [PATCH 47/92] [Update] Refactor compute type management: rename CTranslate2 and Whisper compute types to Translation and Transcription --- src-python/config.py | 32 ++++++++++++++++---------------- src-python/controller.py | 26 ++++++++++++++++---------- src-python/mainloop.py | 8 ++++---- src-python/model.py | 6 +++--- 4 files changed, 39 insertions(+), 33 deletions(-) diff --git a/src-python/config.py b/src-python/config.py index bd3af244..07926324 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -815,15 +815,15 @@ class Config: self.saveConfig(inspect.currentframe().f_code.co_name, value) @property - @json_serializable('CTRANSLATE2_COMPUTE_TYPE') - def CTRANSLATE2_COMPUTE_TYPE(self): - return self._CTRANSLATE2_COMPUTE_TYPE + @json_serializable('TRANSLATION_COMPUTE_TYPE') + def TRANSLATION_COMPUTE_TYPE(self): + return self._TRANSLATION_COMPUTE_TYPE - @CTRANSLATE2_COMPUTE_TYPE.setter - def CTRANSLATE2_COMPUTE_TYPE(self, value): + @TRANSLATION_COMPUTE_TYPE.setter + def TRANSLATION_COMPUTE_TYPE(self, value): if isinstance(value, str): - if value in self.SELECTED_TRANSLATION_COMPUTE_DEVICE["compute_type"]: - self._CTRANSLATE2_COMPUTE_TYPE = value + if value in self.SELECTED_TRANSLATION_COMPUTE_DEVICE["compute_types"]: + self._TRANSLATION_COMPUTE_TYPE = value self.saveConfig(inspect.currentframe().f_code.co_name, value) @property @@ -839,15 +839,15 @@ class Config: self.saveConfig(inspect.currentframe().f_code.co_name, value) @property - @json_serializable('WHISPER_COMPUTE_TYPE') - def WHISPER_COMPUTE_TYPE(self): - return self._WHISPER_COMPUTE_TYPE + @json_serializable('TRANSCRIPTION_COMPUTE_TYPE') + def TRANSCRIPTION_COMPUTE_TYPE(self): + return self._TRANSCRIPTION_COMPUTE_TYPE - @WHISPER_COMPUTE_TYPE.setter - def WHISPER_COMPUTE_TYPE(self, value): + @TRANSCRIPTION_COMPUTE_TYPE.setter + def TRANSCRIPTION_COMPUTE_TYPE(self, value): if isinstance(value, str): - if value in self.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["compute_type"]: - self._WHISPER_COMPUTE_TYPE = value + if value in self.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["compute_types"]: + self._TRANSCRIPTION_COMPUTE_TYPE = value self.saveConfig(inspect.currentframe().f_code.co_name, value) @property @@ -1209,9 +1209,9 @@ class Config: self._SELECTED_TRANSLATION_COMPUTE_DEVICE = copy.deepcopy(self.SELECTABLE_COMPUTE_DEVICE_LIST[0]) self._SELECTED_TRANSCRIPTION_COMPUTE_DEVICE = copy.deepcopy(self.SELECTABLE_COMPUTE_DEVICE_LIST[0]) self._CTRANSLATE2_WEIGHT_TYPE = "small" - self._CTRANSLATE2_COMPUTE_TYPE = "auto" + self._TRANSLATION_COMPUTE_TYPE = "auto" self._WHISPER_WEIGHT_TYPE = "base" - self._WHISPER_COMPUTE_TYPE = "auto" + self._TRANSCRIPTION_COMPUTE_TYPE = "auto" self._AUTO_CLEAR_MESSAGE_BOX = True self._SEND_ONLY_TRANSLATED_MESSAGES = False self._OVERLAY_SMALL_LOG = False diff --git a/src-python/controller.py b/src-python/controller.py index 5ea86430..a746ecee 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -1437,6 +1437,7 @@ class Controller: @staticmethod def setCtranslate2WeightType(data, *args, **kwargs) -> dict: + pre_weight_type = config.CTRANSLATE2_WEIGHT_TYPE config.CTRANSLATE2_WEIGHT_TYPE = str(data) if model.checkTranslatorCTranslate2ModelWeight(config.CTRANSLATE2_WEIGHT_TYPE): def callback(): @@ -1445,15 +1446,18 @@ class Controller: th_callback.daemon = True th_callback.start() th_callback.join() + else: + config.CTRANSLATE2_WEIGHT_TYPE = pre_weight_type return {"status":200, "result":config.CTRANSLATE2_WEIGHT_TYPE} @staticmethod - def getCtranslate2ComputeType(*args, **kwargs) -> dict: - return {"status":200, "result":config.CTRANSLATE2_COMPUTE_TYPE} + def getTranslationComputeType(*args, **kwargs) -> dict: + return {"status":200, "result":config.TRANSLATION_COMPUTE_TYPE} @staticmethod - def setCtranslate2ComputeType(data, *args, **kwargs) -> dict: - config.CTRANSLATE2_COMPUTE_TYPE = str(data) + def setTranslationComputeType(data, *args, **kwargs) -> dict: + pre_compute_type = config.TRANSLATION_COMPUTE_TYPE + config.TRANSLATION_COMPUTE_TYPE = str(data) if model.checkTranslatorCTranslate2ModelWeight(config.CTRANSLATE2_WEIGHT_TYPE): def callback(): model.changeTranslatorCTranslate2Model() @@ -1461,7 +1465,9 @@ class Controller: th_callback.daemon = True th_callback.start() th_callback.join() - return {"status":200, "result":config.CTRANSLATE2_COMPUTE_TYPE} + else: + config.TRANSLATION_COMPUTE_TYPE = pre_compute_type + return {"status":200, "result":config.TRANSLATION_COMPUTE_TYPE} @staticmethod def getWhisperWeightType(*args, **kwargs) -> dict: @@ -1473,13 +1479,13 @@ class Controller: return {"status":200, "result": config.WHISPER_WEIGHT_TYPE} @staticmethod - def getWhisperComputeType(*args, **kwargs) -> dict: - return {"status":200, "result":config.WHISPER_COMPUTE_TYPE} + def getTranscriptionComputeType(*args, **kwargs) -> dict: + return {"status":200, "result":config.TRANSCRIPTION_COMPUTE_TYPE} @staticmethod - def setWhisperComputeType(data, *args, **kwargs) -> dict: - config.WHISPER_COMPUTE_TYPE = str(data) - return {"status":200, "result":config.WHISPER_COMPUTE_TYPE} + def setTranscriptionComputeType(data, *args, **kwargs) -> dict: + config.TRANSCRIPTION_COMPUTE_TYPE = str(data) + return {"status":200, "result":config.TRANSCRIPTION_COMPUTE_TYPE} @staticmethod def getSendMessageFormatParts(*args, **kwargs) -> dict: diff --git a/src-python/mainloop.py b/src-python/mainloop.py index 73e75594..a32fef8a 100644 --- a/src-python/mainloop.py +++ b/src-python/mainloop.py @@ -162,8 +162,8 @@ mapping = { "/get/data/ctranslate2_weight_type": {"status": True, "variable":controller.getCtranslate2WeightType}, "/set/data/ctranslate2_weight_type": {"status": True, "variable":controller.setCtranslate2WeightType}, - "/get/data/ctranslate2_compute_type": {"status": True, "variable":controller.getCtranslate2ComputeType}, - "/set/data/ctranslate2_compute_type": {"status": True, "variable":controller.setCtranslate2ComputeType}, + "/get/data/translation_compute_type": {"status": True, "variable":controller.getTranslationComputeType}, + "/set/data/translation_compute_type": {"status": True, "variable":controller.setTranslationComputeType}, "/run/download_ctranslate2_weight": {"status": True, "variable":controller.downloadCtranslate2Weight}, @@ -268,8 +268,8 @@ mapping = { "/get/data/whisper_weight_type": {"status": True, "variable":controller.getWhisperWeightType}, "/set/data/whisper_weight_type": {"status": True, "variable":controller.setWhisperWeightType}, - "/get/data/whisper_compute_type": {"status": True, "variable":controller.getWhisperComputeType}, - "/set/data/whisper_compute_type": {"status": True, "variable":controller.setWhisperComputeType}, + "/get/data/transcription_compute_type": {"status": True, "variable":controller.getTranscriptionComputeType}, + "/set/data/transcription_compute_type": {"status": True, "variable":controller.setTranscriptionComputeType}, "/run/download_whisper_weight": {"status": True, "variable":controller.downloadWhisperWeight}, diff --git a/src-python/model.py b/src-python/model.py index 445b0a5e..639d375f 100644 --- a/src-python/model.py +++ b/src-python/model.py @@ -116,7 +116,7 @@ class Model: model_type=config.CTRANSLATE2_WEIGHT_TYPE, device=config.SELECTED_TRANSLATION_COMPUTE_DEVICE["device"], device_index=config.SELECTED_TRANSLATION_COMPUTE_DEVICE["device_index"], - compute_type=config.CTRANSLATE2_COMPUTE_TYPE + compute_type=config.TRANSLATION_COMPUTE_TYPE ) def downloadCTranslate2ModelWeight(self, weight_type, callback=None, end_callback=None): @@ -440,7 +440,7 @@ class Model: whisper_weight_type=config.WHISPER_WEIGHT_TYPE, device=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device"], device_index=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device_index"], - compute_type=config.WHISPER_COMPUTE_TYPE, + compute_type=config.TRANSCRIPTION_COMPUTE_TYPE, ) def sendMicTranscript(): try: @@ -624,7 +624,7 @@ class Model: whisper_weight_type=config.WHISPER_WEIGHT_TYPE, device=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device"], device_index=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device_index"], - compute_type=config.WHISPER_COMPUTE_TYPE, + compute_type=config.TRANSCRIPTION_COMPUTE_TYPE, ) def sendSpeakerTranscript(): try: From 8c5f1b5db2eb04c0b211d5014b3c6fb78877d277 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Fri, 26 Sep 2025 17:07:19 +0900 Subject: [PATCH 48/92] [Update/Chore] Config Page: Put the selectors 'Compute Device' and 'Compute Type' to the same section. Rename: UI: CTranslate2/Whisper Compute Type to Translation/Transcription Compute Type. --- locales/en.yml | 4 +- locales/ja.yml | 4 +- locales/ko.yml | 4 +- locales/zh-Hans.yml | 4 +- locales/zh-Hant.yml | 4 +- .../setting_box/device/Device.jsx | 1 + .../transcription/Transcription.jsx | 124 +++++++++-------- .../transcription/Transcription.module.scss | 116 ++++++++++++++++ .../setting_box/translation/Translation.jsx | 127 +++++++++--------- .../translation/Translation.module.scss | 106 +++++++++++++++ .../configs/transcription/useTranscription.js | 48 +++---- .../configs/translation/useTranslation.js | 48 +++---- src-ui/logics/useReceiveRoutes.js | 10 +- src-ui/store.js | 10 +- 14 files changed, 402 insertions(+), 208 deletions(-) diff --git a/locales/en.yml b/locales/en.yml index 25b04e62..76cfd208 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -133,7 +133,7 @@ config_page: desc: "You can choose the translation model when using the {{ctranslate2}} translation engine." small: "Basic Model ({{capacity}})" large: "High Accuracy Model ({{capacity}})" - ctranslate2_compute_type: + translation_compute_type: label: "Processing type for AI translation {{ctranslate2}}" translation_compute_device: label: "Processing device for AI translation" @@ -179,7 +179,7 @@ config_page: desc: "Larger models have higher accuracy, but they also consume more CPU or GPU resources.\nEspecially for models larger than medium, it may be difficult or even impossible to use them depending on the performance of your CPU/GPU." model_template: "{{model_name}} model ({{capacity}})" recommended_model_template: "{{model_name}} model ({{capacity}}) (Recommended)" - whisper_compute_type: + transcription_compute_type: label: "Processing type for AI transcription {{whisper}}" transcription_compute_device: label: "Processing Device Used For AI transcription" diff --git a/locales/ja.yml b/locales/ja.yml index 39542767..ab47d45c 100644 --- a/locales/ja.yml +++ b/locales/ja.yml @@ -133,7 +133,7 @@ config_page: desc: "翻訳エンジン「{{ctranslate2}}」で翻訳する際に、使用する翻訳モデルを選択できます。" small: "通常モデル ({{capacity}})" large: "高精度モデル ({{capacity}})" - ctranslate2_compute_type: + translation_compute_type: label: "AI翻訳 {{ctranslate2}} の処理タイプ" translation_compute_device: label: "AI翻訳の処理デバイス" @@ -179,7 +179,7 @@ config_page: desc: "容量が大きいモデルほど精度は高いですが、その分CPUやGPUを占有します。\n※特にmediumより容量の大きいモデルは、CPU/GPUの性能によっては使用すらも困難です。" model_template: "{{model_name}} モデル ({{capacity}})" recommended_model_template: "{{model_name}} モデル ({{capacity}}) [推奨]" - whisper_compute_type: + transcription_compute_type: label: "AI音声認識 {{whisper}} の処理タイプ" transcription_compute_device: label: "AI音声認識で使用する処理デバイス" diff --git a/locales/ko.yml b/locales/ko.yml index 288c63e3..af169b93 100644 --- a/locales/ko.yml +++ b/locales/ko.yml @@ -133,7 +133,7 @@ config_page: desc: "오프라인 번역 시의 번역 모델을 변경합니다." small: "일반 모델 ({{capacity}})" large: "정밀 모델 ({{capacity}})" - ctranslate2_compute_type: + translation_compute_type: label: translation_compute_device: label: "AI 번역 처리 장치" @@ -179,7 +179,7 @@ config_page: desc: "용량이 큰 모델일수록 정확도는 높지만, 그만큼 CPU나 GPU를 많이 차지합니다. * 특히 medium보다 용량이 큰 모델은 CPU/GPU 성능에 따라 사용 자체가 어려울 수 있습니다." model_template: "{{model_name}} 모델 ({{capacity}})" recommended_model_template: "{{model_name}} 모델 ({{capacity}}) (권장)" - whisper_compute_type: + transcription_compute_type: label: transcription_compute_device: label: diff --git a/locales/zh-Hans.yml b/locales/zh-Hans.yml index 7cfc594e..8322c96f 100644 --- a/locales/zh-Hans.yml +++ b/locales/zh-Hans.yml @@ -133,7 +133,7 @@ config_page: desc: "可以选择用于离线翻译的翻译模型" small: "普通模型 ({{capacity}})" large: "高精度模型 ({{capacity}})" - ctranslate2_compute_type: + translation_compute_type: label: translation_compute_device: label: @@ -179,7 +179,7 @@ config_page: desc: model_template: "{{model_name}} 模型 ({{capacity}})" recommended_model_template: "{{model_name}} 模型 ({{capacity}}) (推荐)" - whisper_compute_type: + transcription_compute_type: label: transcription_compute_device: label: diff --git a/locales/zh-Hant.yml b/locales/zh-Hant.yml index 039b60ba..07b79b51 100644 --- a/locales/zh-Hant.yml +++ b/locales/zh-Hant.yml @@ -133,7 +133,7 @@ config_page: desc: "你可以選擇用於離線翻譯引擎的翻譯模型。" small: "基本模型({{capacity}})" large: "高準確率模型({{capacity}})" - ctranslate2_compute_type: + translation_compute_type: label: translation_compute_device: label: @@ -179,7 +179,7 @@ config_page: desc: model_template: "{{model_name}}模型({{capacity}})" recommended_model_template: "{{model_name}}模型({{capacity}})(推薦)" - whisper_compute_type: + transcription_compute_type: label: transcription_compute_device: label: diff --git a/src-ui/app/config_page/setting_section/setting_box/device/Device.jsx b/src-ui/app/config_page/setting_section/setting_box/device/Device.jsx index 28240452..ab5bff02 100644 --- a/src-ui/app/config_page/setting_section/setting_box/device/Device.jsx +++ b/src-ui/app/config_page/setting_section/setting_box/device/Device.jsx @@ -53,6 +53,7 @@ const Mic_Container = () => { setSelectedMicDevice(selected_data.selected_id); }; + // [Fix me] currentEnableAutoMicSelect.data === "pending"; ? not currentEnableAutoMicSelect.state === "pending"; ??(.state) const is_disabled_selector = currentEnableAutoMicSelect.data === true || currentEnableAutoMicSelect.data === "pending"; const getLabels = () => { diff --git a/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx b/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx index 2e0daf92..48639292 100644 --- a/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx +++ b/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx @@ -1,7 +1,8 @@ import { useEffect, useState } from "react"; import { useI18n } from "@useI18n"; import styles from "./Transcription.module.scss"; -import { updateLabelsById, genNumObjArray } from "@utils"; +import { updateLabelsById, genNumObjArray, arrayToObject } from "@utils"; +import { useStore_IsBreakPoint } from "@store"; import { useTranscription, @@ -12,11 +13,14 @@ import { DownloadModelsContainer, RadioButtonContainer, DropdownMenuContainer, - ComputeDeviceContainer, SliderContainer, + + useOnMouseLeaveDropdownMenu, } from "../_templates/Templates"; import { + DropdownMenu, + LabelComponent, SectionLabelComponent, } from "../_components/"; @@ -201,7 +205,6 @@ const TranscriptionEngine_Container = () => { - ); @@ -275,71 +278,76 @@ const WhisperWeightType_Box = () => { ); }; -const WhisperComputeType_Box = () => { - const { t } = useI18n(); - const { currentSelectableWhisperComputeTypeList } = useTranscription(); - const { currentSelectedWhisperComputeType, setSelectedWhisperComputeType } = useTranscription(); - const selectFunction = (selected_data) => { - setSelectedWhisperComputeType(selected_data.selected_id); - }; - - const whisper_compute_type_label = t("config_page.transcription.whisper_compute_type.label", { - whisper: "Whisper" - }); - - return ( - - ); -}; - -// Duplicate -import { useComputeMode } from "@logics_common"; const TranscriptionComputeDevice_Box = () => { const { t } = useI18n(); - const { currentSelectedTranscriptionComputeDevice, setSelectedTranscriptionComputeDevice } = useTranscription(); - const { currentSelectableTranscriptionComputeDeviceList } = useTranscription(); - - const selectFunction = (selected_data) => { - const target_obj = currentSelectableTranscriptionComputeDeviceList.data[selected_data.selected_id]; - setSelectedTranscriptionComputeDevice(target_obj); - }; + const { + currentSelectableTranscriptionComputeDeviceList, + currentSelectedTranscriptionComputeDevice, + setSelectedTranscriptionComputeDevice, + currentSelectedTranscriptionComputeType, + setSelectedTranscriptionComputeType, + } = useTranscription(); + const { onMouseLeaveFunction } = useOnMouseLeaveDropdownMenu(); + const { currentIsBreakPoint } = useStore_IsBreakPoint(); const list_for_ui = transformDeviceArray(currentSelectableTranscriptionComputeDeviceList.data); const target_index = findKeyByDeviceValue(currentSelectableTranscriptionComputeDeviceList.data, currentSelectedTranscriptionComputeDevice.data); + const selectable_compute_types = arrayToObject(currentSelectableTranscriptionComputeDeviceList.data[target_index].compute_types); - const { currentComputeMode } = useComputeMode(); - if (currentComputeMode.data === "cpu") { - return ( - - ) - } + + const selectFunction_ComputeDevice = (selected_data) => { + const target_obj = currentSelectableTranscriptionComputeDeviceList.data[selected_data.selected_id]; + setSelectedTranscriptionComputeDevice(target_obj); + }; + + const selectFunction_ComputeType = (selected_data) => { + setSelectedTranscriptionComputeType(selected_data.selected_id); + }; + + const device_container_class = clsx(styles.device_container, { + [styles.is_break_point]: currentIsBreakPoint.data, + }); + + const is_disabled_selector = currentSelectedTranscriptionComputeDevice.state === "pending" || currentSelectedTranscriptionComputeType.state === "pending"; return ( - +
+
+ +
+ +
+
+

{t("config_page.transcription.transcription_compute_device.label")}

+ +
+ +
+

{t("config_page.transcription.transcription_compute_type.label")}

+ +
+
+
+
+
); }; @@ -399,8 +407,6 @@ const Advanced_Container = () => { ); - - }; export const MicAvgLogprobContainer = () => { diff --git a/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.module.scss b/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.module.scss index 1170a41c..8eed1df9 100644 --- a/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.module.scss +++ b/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.module.scss @@ -2,4 +2,120 @@ display: flex; flex-direction: column; gap: 6.4rem; +} + + + + + + + + + + +// [Fix me] Need refactor. +.mic_container { + display: flex; + flex-direction: column; + border-bottom: solid 0.1rem var(--dark_800_color); + padding-bottom: 1rem; +} + +.speaker_container { + padding-top: 0rem; +} + +.device_container { + display: flex; + width: 100%; + justify-content: space-between; + align-items: center; + padding: 2rem; + margin-bottom: 0rem; + &.is_break_point { + flex-direction: column; + gap: 2rem; + align-items: start; + & .device_contents { + display: flex; + width: 100%; + justify-content: space-between; + padding-left: 0rem; + } + } +} + +.threshold_container { + padding: 2rem; +} + + + +.threshold_container { + display: flex; + width: 100%; + flex-direction: column; + justify-content: space-between; + align-items: center; + gap: 2rem; +} + +.threshold_switch_section { + display: flex; + width: 100%; + justify-content: space-between; + align-items: center; + flex-shrink: 0; +} + +.threshold_section { + width: 100%; +} + + + + +.device_label { + font-size: 1.8rem; +} + +.device_contents { + display: flex; + width: 100%; + justify-content: end; + padding-left: 2rem; + gap: 2rem; +} + +.device_auto_select_wrapper { + display: flex; + flex-direction: column; + gap: 1.2rem; + justify-content: center; + align-items: center; +} + +.device_dropdown_wrapper { + display: flex; + flex-direction: row; + gap: 2.8rem; +} + +.device_dropdown { + display: flex; + flex-direction: column; + gap: 0.6rem; + white-space: nowrap; + max-width: 24rem; + &.is_disabled { + pointer-events: none; + } +} + +.device_secondary_label { + padding-left: 0.2rem; + padding-right: 0.4rem; + font-size: 1.4rem; + color: var(--dark_500_color); + white-space: nowrap; } \ No newline at end of file diff --git a/src-ui/app/config_page/setting_section/setting_box/translation/Translation.jsx b/src-ui/app/config_page/setting_section/setting_box/translation/Translation.jsx index cdb8d351..a69da769 100644 --- a/src-ui/app/config_page/setting_section/setting_box/translation/Translation.jsx +++ b/src-ui/app/config_page/setting_section/setting_box/translation/Translation.jsx @@ -1,7 +1,8 @@ import { useEffect, useState } from "react"; import { useI18n } from "@useI18n"; import styles from "./Translation.module.scss"; -import { updateLabelsById } from "@utils"; +import { updateLabelsById, arrayToObject } from "@utils"; +import { useStore_IsBreakPoint } from "@store"; import { useTranslation, @@ -10,15 +11,19 @@ import { import { DownloadModelsContainer, DeeplAuthKeyContainer, - DropdownMenuContainer, - ComputeDeviceContainer, + + useOnMouseLeaveDropdownMenu, } from "../_templates/Templates"; +import { + DropdownMenu, + LabelComponent, +} from "../_components/"; + export const Translation = () => { return ( <> - @@ -73,73 +78,75 @@ const CTranslate2WeightType_Box = () => { ); }; -const CTranslate2ComputeType_Box = () => { - const { t } = useI18n(); - const { currentSelectableCTranslate2ComputeTypeList } = useTranslation(); - const { currentSelectedCTranslate2ComputeType, setSelectedCTranslate2ComputeType } = useTranslation(); - - const selectFunction = (selected_data) => { - setSelectedCTranslate2ComputeType(selected_data.selected_id); - }; - - const ctranslate2_compute_type_label = t("config_page.translation.ctranslate2_compute_type.label", { - ctranslate2: "CTranslate2" - }); - - return ( - - ); -}; - -// Duplicate -import { useComputeMode } from "@logics_common"; const TranslationComputeDevice_Box = () => { const { t } = useI18n(); - const { currentSelectedTranslationComputeDevice, setSelectedTranslationComputeDevice } = useTranslation(); - const { currentSelectableTranslationComputeDeviceList } = useTranslation(); - - const selectFunction = (selected_data) => { - const target_obj = currentSelectableTranslationComputeDeviceList.data[selected_data.selected_id]; - setSelectedTranslationComputeDevice(target_obj); - }; + const { + currentSelectableTranslationComputeDeviceList, + currentSelectedTranslationComputeDevice, + setSelectedTranslationComputeDevice, + currentSelectedTranslationComputeType, + setSelectedTranslationComputeType, + } = useTranslation(); + const { onMouseLeaveFunction } = useOnMouseLeaveDropdownMenu(); + const { currentIsBreakPoint } = useStore_IsBreakPoint(); const list_for_ui = transformDeviceArray(currentSelectableTranslationComputeDeviceList.data); const target_index = findKeyByDeviceValue(currentSelectableTranslationComputeDeviceList.data, currentSelectedTranslationComputeDevice.data); + const selectable_compute_types = arrayToObject(currentSelectableTranslationComputeDeviceList.data[target_index].compute_types); - const { currentComputeMode } = useComputeMode(); - const translation_compute_device_label = t("config_page.translation.translation_compute_device.label", { - ctranslate2: "CTranslate2" + + const selectFunction_ComputeDevice = (selected_data) => { + const target_obj = currentSelectableTranslationComputeDeviceList.data[selected_data.selected_id]; + setSelectedTranslationComputeDevice(target_obj); + }; + + const selectFunction_ComputeType = (selected_data) => { + setSelectedTranslationComputeType(selected_data.selected_id); + }; + + const device_container_class = clsx(styles.device_container, { + [styles.is_break_point]: currentIsBreakPoint.data, }); - if (currentComputeMode.data === "cpu") { - return ( - - ) - } + + const is_disabled_selector = currentSelectedTranslationComputeDevice.state === "pending" || currentSelectedTranslationComputeType.state === "pending"; return ( - +
+
+ +
+ +
+
+

{t("config_page.translation.translation_compute_device.label")}

+ +
+ +
+

{t("config_page.translation.translation_compute_type.label")}

+ +
+
+
+
+
); }; diff --git a/src-ui/app/config_page/setting_section/setting_box/translation/Translation.module.scss b/src-ui/app/config_page/setting_section/setting_box/translation/Translation.module.scss index e69de29b..7486c466 100644 --- a/src-ui/app/config_page/setting_section/setting_box/translation/Translation.module.scss +++ b/src-ui/app/config_page/setting_section/setting_box/translation/Translation.module.scss @@ -0,0 +1,106 @@ +// [Fix me] Need refactor. +.mic_container { + display: flex; + flex-direction: column; + border-bottom: solid 0.1rem var(--dark_800_color); + padding-bottom: 1rem; +} + +.speaker_container { + padding-top: 0rem; +} + +.device_container { + display: flex; + width: 100%; + justify-content: space-between; + align-items: center; + padding: 2rem; + margin-bottom: 0rem; + &.is_break_point { + flex-direction: column; + gap: 2rem; + align-items: start; + & .device_contents { + display: flex; + width: 100%; + justify-content: space-between; + padding-left: 0rem; + } + } +} + +.threshold_container { + padding: 2rem; +} + + + +.threshold_container { + display: flex; + width: 100%; + flex-direction: column; + justify-content: space-between; + align-items: center; + gap: 2rem; +} + +.threshold_switch_section { + display: flex; + width: 100%; + justify-content: space-between; + align-items: center; + flex-shrink: 0; +} + +.threshold_section { + width: 100%; +} + + + + +.device_label { + font-size: 1.8rem; +} + +.device_contents { + display: flex; + width: 100%; + justify-content: end; + padding-left: 2rem; + gap: 2rem; +} + +.device_auto_select_wrapper { + display: flex; + flex-direction: column; + gap: 1.2rem; + justify-content: center; + align-items: center; +} + +.device_dropdown_wrapper { + display: flex; + flex-direction: row; + gap: 2.8rem; +} + +.device_dropdown { + display: flex; + flex-direction: column; + gap: 0.6rem; + white-space: nowrap; + max-width: 24rem; + &.is_disabled { + pointer-events: none; + } +} + +.device_secondary_label { + padding-left: 0.2rem; + padding-right: 0.4rem; + font-size: 1.4rem; + color: var(--dark_500_color); + white-space: nowrap; +} \ No newline at end of file diff --git a/src-ui/logics/configs/transcription/useTranscription.js b/src-ui/logics/configs/transcription/useTranscription.js index 4af6a28c..1511d77a 100644 --- a/src-ui/logics/configs/transcription/useTranscription.js +++ b/src-ui/logics/configs/transcription/useTranscription.js @@ -14,8 +14,7 @@ import { useStore_WhisperWeightTypeStatus, useStore_SelectedWhisperWeightType, - useStore_SelectedWhisperComputeType, - useStore_SelectableWhisperComputeTypeList, + useStore_SelectedTranscriptionComputeType, useStore_MicAvgLogprob, useStore_MicNoSpeechProb, @@ -48,8 +47,7 @@ export const useTranscription = () => { const { currentSelectedWhisperWeightType, updateSelectedWhisperWeightType, pendingSelectedWhisperWeightType } = useStore_SelectedWhisperWeightType(); - const { currentSelectableWhisperComputeTypeList, updateSelectableWhisperComputeTypeList, pendingSelectableWhisperComputeTypeList } = useStore_SelectableWhisperComputeTypeList(); - const { currentSelectedWhisperComputeType, updateSelectedWhisperComputeType, pendingSelectedWhisperComputeType } = useStore_SelectedWhisperComputeType(); + const { currentSelectedTranscriptionComputeType, updateSelectedTranscriptionComputeType, pendingSelectedTranscriptionComputeType } = useStore_SelectedTranscriptionComputeType(); const { currentSelectableTranscriptionComputeDeviceList, updateSelectableTranscriptionComputeDeviceList, pendingSelectableTranscriptionComputeDeviceList } = useStore_SelectableTranscriptionComputeDeviceList(); const { currentSelectedTranscriptionComputeDevice, updateSelectedTranscriptionComputeDevice, pendingSelectedTranscriptionComputeDevice } = useStore_SelectedTranscriptionComputeDevice(); @@ -255,28 +253,19 @@ export const useTranscription = () => { }; - const getSelectableWhisperComputeTypeList = () => { - pendingSelectableWhisperComputeTypeList(); - asyncStdoutToPython("/get/data/whisper_compute_type_list"); + + const getSelectedTranscriptionComputeType = () => { + pendingSelectedTranscriptionComputeType(); + asyncStdoutToPython("/get/data/transcription_compute_type"); }; - const updateSelectableWhisperComputeTypeList_FromBackend = (payload) => { - updateSelectableWhisperComputeTypeList(arrayToObject(payload)); + const setSelectedTranscriptionComputeType = (selected_transcription_compute_type) => { + pendingSelectedTranscriptionComputeType(); + asyncStdoutToPython("/set/data/transcription_compute_type", selected_transcription_compute_type); }; - - const getSelectedWhisperComputeType = () => { - pendingSelectedWhisperComputeType(); - asyncStdoutToPython("/get/data/whisper_compute_type"); - }; - - const setSelectedWhisperComputeType = (selected_whisper_compute_type) => { - pendingSelectedWhisperComputeType(); - asyncStdoutToPython("/set/data/whisper_compute_type", selected_whisper_compute_type); - }; - - const setSuccessSelectedWhisperComputeType = (selected_whisper_compute_type) => { - updateSelectedWhisperComputeType(selected_whisper_compute_type); + const setSuccessSelectedTranscriptionComputeType = (selected_transcription_compute_type) => { + updateSelectedTranscriptionComputeType(selected_transcription_compute_type); showNotification_SaveSuccess(); }; @@ -452,16 +441,11 @@ export const useTranscription = () => { setSuccessSelectedWhisperWeightType, - currentSelectableWhisperComputeTypeList, - getSelectableWhisperComputeTypeList, - updateSelectableWhisperComputeTypeList, - updateSelectableWhisperComputeTypeList_FromBackend, - - currentSelectedWhisperComputeType, - getSelectedWhisperComputeType, - updateSelectedWhisperComputeType, - setSelectedWhisperComputeType, - setSuccessSelectedWhisperComputeType, + currentSelectedTranscriptionComputeType, + getSelectedTranscriptionComputeType, + updateSelectedTranscriptionComputeType, + setSelectedTranscriptionComputeType, + setSuccessSelectedTranscriptionComputeType, currentSelectableTranscriptionComputeDeviceList, diff --git a/src-ui/logics/configs/translation/useTranslation.js b/src-ui/logics/configs/translation/useTranslation.js index 28ab10a4..5c3bb83e 100644 --- a/src-ui/logics/configs/translation/useTranslation.js +++ b/src-ui/logics/configs/translation/useTranslation.js @@ -1,8 +1,7 @@ import { useStore_CTranslate2WeightTypeStatus, useStore_SelectedCTranslate2WeightType, - useStore_SelectableCTranslate2ComputeTypeList, - useStore_SelectedCTranslate2ComputeType, + useStore_SelectedTranslationComputeType, useStore_SelectableTranslationComputeDeviceList, useStore_SelectedTranslationComputeDevice, useStore_DeepLAuthKey, @@ -20,8 +19,7 @@ export const useTranslation = () => { const { currentCTranslate2WeightTypeStatus, updateCTranslate2WeightTypeStatus, pendingCTranslate2WeightTypeStatus } = useStore_CTranslate2WeightTypeStatus(); const { currentSelectedCTranslate2WeightType, updateSelectedCTranslate2WeightType, pendingSelectedCTranslate2WeightType } = useStore_SelectedCTranslate2WeightType(); - const { currentSelectableCTranslate2ComputeTypeList, updateSelectableCTranslate2ComputeTypeList, pendingSelectableCTranslate2ComputeTypeList } = useStore_SelectableCTranslate2ComputeTypeList(); - const { currentSelectedCTranslate2ComputeType, updateSelectedCTranslate2ComputeType, pendingSelectedCTranslate2ComputeType } = useStore_SelectedCTranslate2ComputeType(); + const { currentSelectedTranslationComputeType, updateSelectedTranslationComputeType, pendingSelectedTranslationComputeType } = useStore_SelectedTranslationComputeType(); const { currentSelectableTranslationComputeDeviceList, updateSelectableTranslationComputeDeviceList, pendingSelectableTranslationComputeDeviceList } = useStore_SelectableTranslationComputeDeviceList(); const { currentSelectedTranslationComputeDevice, updateSelectedTranslationComputeDevice, pendingSelectedTranslationComputeDevice } = useStore_SelectedTranslationComputeDevice(); @@ -87,29 +85,18 @@ export const useTranslation = () => { }; - - const getSelectableCTranslate2ComputeTypeList = () => { - pendingSelectableCTranslate2ComputeTypeList(); - asyncStdoutToPython("/get/data/ctranslate2_compute_type_list"); + const getSelectedTranslationComputeType = () => { + pendingSelectedTranslationComputeType(); + asyncStdoutToPython("/get/data/translation_compute_type"); }; - const updateSelectableCTranslate2ComputeTypeList_FromBackend = (payload) => { - updateSelectableCTranslate2ComputeTypeList(arrayToObject(payload)); + const setSelectedTranslationComputeType = (selected_translation_compute_type) => { + pendingSelectedTranslationComputeType(); + asyncStdoutToPython("/set/data/translation_compute_type", selected_translation_compute_type); }; - - const getSelectedCTranslate2ComputeType = () => { - pendingSelectedCTranslate2ComputeType(); - asyncStdoutToPython("/get/data/ctranslate2_compute_type"); - }; - - const setSelectedCTranslate2ComputeType = (selected_ctranslate2_compute_type) => { - pendingSelectedCTranslate2ComputeType(); - asyncStdoutToPython("/set/data/ctranslate2_compute_type", selected_ctranslate2_compute_type); - }; - - const setSuccessSelectedCTranslate2ComputeType = (selected_ctranslate2_compute_type) => { - updateSelectedCTranslate2ComputeType(selected_ctranslate2_compute_type); + const setSuccessSelectedTranslationComputeType = (selected_translation_compute_type) => { + updateSelectedTranslationComputeType(selected_translation_compute_type); showNotification_SaveSuccess(); }; @@ -182,16 +169,11 @@ export const useTranslation = () => { setSuccessSelectedCTranslate2WeightType, - currentSelectableCTranslate2ComputeTypeList, - getSelectableCTranslate2ComputeTypeList, - updateSelectableCTranslate2ComputeTypeList, - updateSelectableCTranslate2ComputeTypeList_FromBackend, - - currentSelectedCTranslate2ComputeType, - getSelectedCTranslate2ComputeType, - updateSelectedCTranslate2ComputeType, - setSelectedCTranslate2ComputeType, - setSuccessSelectedCTranslate2ComputeType, + currentSelectedTranslationComputeType, + getSelectedTranslationComputeType, + updateSelectedTranslationComputeType, + setSelectedTranslationComputeType, + setSuccessSelectedTranslationComputeType, currentSelectableTranslationComputeDeviceList, diff --git a/src-ui/logics/useReceiveRoutes.js b/src-ui/logics/useReceiveRoutes.js index a4b92e6e..049697a3 100644 --- a/src-ui/logics/useReceiveRoutes.js +++ b/src-ui/logics/useReceiveRoutes.js @@ -172,9 +172,8 @@ export const ROUTE_META_LIST = [ { endpoint: "/get/data/ctranslate2_weight_type", ns: configs, hook_name: "useTranslation", method_name: "updateSelectedCTranslate2WeightType" }, { endpoint: "/set/data/ctranslate2_weight_type", ns: configs, hook_name: "useTranslation", method_name: "setSuccessSelectedCTranslate2WeightType" }, - { endpoint: "/get/data/ctranslate2_compute_type_list", ns: configs, hook_name: "useTranslation", method_name: "updateSelectableCTranslate2ComputeTypeList_FromBackend" }, - { endpoint: "/get/data/ctranslate2_compute_type", ns: configs, hook_name: "useTranslation", method_name: "updateSelectedCTranslate2ComputeType" }, - { endpoint: "/set/data/ctranslate2_compute_type", ns: configs, hook_name: "useTranslation", method_name: "setSuccessSelectedCTranslate2ComputeType" }, + { endpoint: "/get/data/translation_compute_type", ns: configs, hook_name: "useTranslation", method_name: "updateSelectedTranslationComputeType" }, + { endpoint: "/set/data/translation_compute_type", ns: configs, hook_name: "useTranslation", method_name: "setSuccessSelectedTranslationComputeType" }, { endpoint: "/run/downloaded_ctranslate2_weight", ns: configs, hook_name: "useTranslation", method_name: "downloadedCTranslate2WeightType" }, { endpoint: "/run/download_ctranslate2_weight", ns: null, hook_name: null, method_name: null }, @@ -218,9 +217,8 @@ export const ROUTE_META_LIST = [ { endpoint: "/get/data/whisper_weight_type", ns: configs, hook_name: "useTranscription", method_name: "updateSelectedWhisperWeightType" }, { endpoint: "/set/data/whisper_weight_type", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSelectedWhisperWeightType" }, - { endpoint: "/get/data/whisper_compute_type_list", ns: configs, hook_name: "useTranscription", method_name: "updateSelectableWhisperComputeTypeList_FromBackend" }, - { endpoint: "/get/data/whisper_compute_type", ns: configs, hook_name: "useTranscription", method_name: "updateSelectedWhisperComputeType" }, - { endpoint: "/set/data/whisper_compute_type", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSelectedWhisperComputeType" }, + { endpoint: "/get/data/transcription_compute_type", ns: configs, hook_name: "useTranscription", method_name: "updateSelectedTranscriptionComputeType" }, + { endpoint: "/set/data/transcription_compute_type", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSelectedTranscriptionComputeType" }, { endpoint: "/run/downloaded_whisper_weight", ns: configs, hook_name: "useTranscription", method_name: "downloadedWhisperWeightType" }, diff --git a/src-ui/store.js b/src-ui/store.js index 6d110bce..3286b165 100644 --- a/src-ui/store.js +++ b/src-ui/store.js @@ -220,12 +220,9 @@ export const { atomInstance: Atom_DeepLAuthKey, useHook: useStore_DeepLAuthKey } export const { atomInstance: Atom_SelectedCTranslate2WeightType, useHook: useStore_SelectedCTranslate2WeightType } = createAtomWithHook("", "SelectedCTranslate2WeightType"); export const { atomInstance: Atom_CTranslate2WeightTypeStatus, useHook: useStore_CTranslate2WeightTypeStatus } = createAtomWithHook(ctranslate2_weight_type_status, "CTranslate2WeightTypeStatus"); -export const { atomInstance: Atom_SelectableCTranslate2ComputeTypeList, useHook: useStore_SelectableCTranslate2ComputeTypeList } = createAtomWithHook({}, "SelectableCTranslate2ComputeTypeList"); -export const { atomInstance: Atom_SelectedCTranslate2ComputeType, useHook: useStore_SelectedCTranslate2ComputeType } = createAtomWithHook("", "SelectedCTranslate2ComputeType"); - - export const { atomInstance: Atom_SelectableTranslationComputeDeviceList, useHook: useStore_SelectableTranslationComputeDeviceList } = createAtomWithHook({}, "SelectableTranslationComputeDeviceList"); export const { atomInstance: Atom_SelectedTranslationComputeDevice, useHook: useStore_SelectedTranslationComputeDevice } = createAtomWithHook("", "SelectedTranslationComputeDevice"); +export const { atomInstance: Atom_SelectedTranslationComputeType, useHook: useStore_SelectedTranslationComputeType } = createAtomWithHook("", "SelectedTranslationComputeType"); // Transcription export const { atomInstance: Atom_MicRecordTimeout, useHook: useStore_MicRecordTimeout } = createAtomWithHook(0, "MicRecordTimeout"); @@ -240,12 +237,9 @@ export const { atomInstance: Atom_SelectedWhisperWeightType, useHook: useStore_S export const { atomInstance: Atom_WhisperWeightTypeStatus, useHook: useStore_WhisperWeightTypeStatus } = createAtomWithHook(whisper_weight_type_status, "WhisperWeightTypeStatus"); export const { atomInstance: Atom_SelectedTranscriptionEngine, useHook: useStore_SelectedTranscriptionEngine } = createAtomWithHook(whisper_weight_type_status, "SelectedTranscriptionEngine"); -export const { atomInstance: Atom_SelectableWhisperComputeTypeList, useHook: useStore_SelectableWhisperComputeTypeList } = createAtomWithHook({}, "SelectableWhisperComputeTypeList"); -export const { atomInstance: Atom_SelectedWhisperComputeType, useHook: useStore_SelectedWhisperComputeType } = createAtomWithHook("", "SelectedWhisperComputeType"); - - export const { atomInstance: Atom_SelectableTranscriptionComputeDeviceList, useHook: useStore_SelectableTranscriptionComputeDeviceList } = createAtomWithHook({}, "SelectableTranscriptionComputeDeviceList"); export const { atomInstance: Atom_SelectedTranscriptionComputeDevice, useHook: useStore_SelectedTranscriptionComputeDevice } = createAtomWithHook("", "SelectedTranscriptionComputeDevice"); +export const { atomInstance: Atom_SelectedTranscriptionComputeType, useHook: useStore_SelectedTranscriptionComputeType } = createAtomWithHook("", "SelectedTranscriptionComputeType"); export const { atomInstance: Atom_MicAvgLogprob, useHook: useStore_MicAvgLogprob } = createAtomWithHook(-0.8, "MicAvgLogprob"); export const { atomInstance: Atom_MicNoSpeechProb, useHook: useStore_MicNoSpeechProb } = createAtomWithHook(0.6, "MicNoSpeechProb"); From 5366622fca924099609e07e7bbf4a9731cec53f3 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Fri, 26 Sep 2025 23:30:39 +0900 Subject: [PATCH 49/92] [Update] Refactor compute device management: change methods to instance methods and set compute types to "auto" --- src-python/controller.py | 12 ++++++++---- src-python/mainloop.py | 3 +++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index a746ecee..20bfbe88 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -656,13 +656,15 @@ class Controller: def getSelectedTranslationComputeDevice(*args, **kwargs) -> dict: return {"status":200, "result":config.SELECTED_TRANSLATION_COMPUTE_DEVICE} - @staticmethod - def setSelectedTranslationComputeDevice(device:str, *args, **kwargs) -> dict: + def setSelectedTranslationComputeDevice(self, device:str, *args, **kwargs) -> dict: printLog("setSelectedTranslationComputeDevice", device) pre_device = config.SELECTED_TRANSLATION_COMPUTE_DEVICE + pre_compute_type = config.TRANSLATION_COMPUTE_TYPE config.SELECTED_TRANSLATION_COMPUTE_DEVICE = device + config.TRANSLATION_COMPUTE_TYPE = "auto" try: model.changeTranslatorCTranslate2Model() + self.run(200, self.run_mapping["translation_compute_type"], config.TRANSLATION_COMPUTE_TYPE) except Exception as e: # VRAM不足エラーの検出(デバイス切り替え時) is_vram_error, error_message = model.detectVRAMError(e) @@ -670,6 +672,7 @@ class Controller: # 前のデバイス設定に戻す printLog("VRAM error detected, reverting device setting") config.SELECTED_TRANSLATION_COMPUTE_DEVICE = pre_device + config.TRANSLATION_COMPUTE_TYPE = pre_compute_type model.changeTranslatorCTranslate2Model() else: # その他のエラーは通常通り処理 @@ -684,10 +687,11 @@ class Controller: def getSelectedTranscriptionComputeDevice(*args, **kwargs) -> dict: return {"status":200, "result":config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE} - @staticmethod - def setSelectedTranscriptionComputeDevice(device:str, *args, **kwargs) -> dict: + def setSelectedTranscriptionComputeDevice(self, device:str, *args, **kwargs) -> dict: printLog("setSelectedTranscriptionComputeDevice", device) config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE = device + config.TRANSCRIPTION_COMPUTE_TYPE = "auto" + self.run(200, self.run_mapping["transcription_compute_type"], config.TRANSCRIPTION_COMPUTE_TYPE) return {"status":200,"result":config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE} @staticmethod diff --git a/src-python/mainloop.py b/src-python/mainloop.py index a32fef8a..2df4d53e 100644 --- a/src-python/mainloop.py +++ b/src-python/mainloop.py @@ -48,6 +48,9 @@ run_mapping = { "selected_translation_engines":"/run/selected_translation_engines", "translation_engines":"/run/translation_engines", + "translation_compute_type":"/run/translation_compute_type", + "transcription_compute_type":"/run/transcription_compute_type", + "mic_host_list":"/run/mic_host_list", "mic_device_list":"/run/mic_device_list", "speaker_device_list":"/run/speaker_device_list", From 6effedcce2c2f349ef06b7dbb31fbed043ccac13 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Sat, 27 Sep 2025 07:07:54 +0900 Subject: [PATCH 50/92] [Update] Refactor compute type management: rename properties to 'SELECTED_TRANSLATION_COMPUTE_TYPE' and 'SELECTED_TRANSCRIPTION_COMPUTE_TYPE' --- src-python/config.py | 28 ++++++++++++++-------------- src-python/controller.py | 36 ++++++++++++++++++------------------ src-python/mainloop.py | 12 ++++++------ src-python/model.py | 6 +++--- 4 files changed, 41 insertions(+), 41 deletions(-) diff --git a/src-python/config.py b/src-python/config.py index 07926324..71e97af7 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -815,15 +815,15 @@ class Config: self.saveConfig(inspect.currentframe().f_code.co_name, value) @property - @json_serializable('TRANSLATION_COMPUTE_TYPE') - def TRANSLATION_COMPUTE_TYPE(self): - return self._TRANSLATION_COMPUTE_TYPE + @json_serializable('SELECTED_TRANSLATION_COMPUTE_TYPE') + def SELECTED_TRANSLATION_COMPUTE_TYPE(self): + return self._SELECTED_TRANSLATION_COMPUTE_TYPE - @TRANSLATION_COMPUTE_TYPE.setter - def TRANSLATION_COMPUTE_TYPE(self, value): + @SELECTED_TRANSLATION_COMPUTE_TYPE.setter + def SELECTED_TRANSLATION_COMPUTE_TYPE(self, value): if isinstance(value, str): if value in self.SELECTED_TRANSLATION_COMPUTE_DEVICE["compute_types"]: - self._TRANSLATION_COMPUTE_TYPE = value + self._SELECTED_TRANSLATION_COMPUTE_TYPE = value self.saveConfig(inspect.currentframe().f_code.co_name, value) @property @@ -839,15 +839,15 @@ class Config: self.saveConfig(inspect.currentframe().f_code.co_name, value) @property - @json_serializable('TRANSCRIPTION_COMPUTE_TYPE') - def TRANSCRIPTION_COMPUTE_TYPE(self): - return self._TRANSCRIPTION_COMPUTE_TYPE + @json_serializable('SELECTED_TRANSCRIPTION_COMPUTE_TYPE') + def SELECTED_TRANSCRIPTION_COMPUTE_TYPE(self): + return self._SELECTED_TRANSCRIPTION_COMPUTE_TYPE - @TRANSCRIPTION_COMPUTE_TYPE.setter - def TRANSCRIPTION_COMPUTE_TYPE(self, value): + @SELECTED_TRANSCRIPTION_COMPUTE_TYPE.setter + def SELECTED_TRANSCRIPTION_COMPUTE_TYPE(self, value): if isinstance(value, str): if value in self.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["compute_types"]: - self._TRANSCRIPTION_COMPUTE_TYPE = value + self._SELECTED_TRANSCRIPTION_COMPUTE_TYPE = value self.saveConfig(inspect.currentframe().f_code.co_name, value) @property @@ -1209,9 +1209,9 @@ class Config: self._SELECTED_TRANSLATION_COMPUTE_DEVICE = copy.deepcopy(self.SELECTABLE_COMPUTE_DEVICE_LIST[0]) self._SELECTED_TRANSCRIPTION_COMPUTE_DEVICE = copy.deepcopy(self.SELECTABLE_COMPUTE_DEVICE_LIST[0]) self._CTRANSLATE2_WEIGHT_TYPE = "small" - self._TRANSLATION_COMPUTE_TYPE = "auto" + self._SELECTED_TRANSLATION_COMPUTE_TYPE = "auto" self._WHISPER_WEIGHT_TYPE = "base" - self._TRANSCRIPTION_COMPUTE_TYPE = "auto" + self._SELECTED_TRANSCRIPTION_COMPUTE_TYPE = "auto" self._AUTO_CLEAR_MESSAGE_BOX = True self._SEND_ONLY_TRANSLATED_MESSAGES = False self._OVERLAY_SMALL_LOG = False diff --git a/src-python/controller.py b/src-python/controller.py index 20bfbe88..d03ae106 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -659,12 +659,12 @@ class Controller: def setSelectedTranslationComputeDevice(self, device:str, *args, **kwargs) -> dict: printLog("setSelectedTranslationComputeDevice", device) pre_device = config.SELECTED_TRANSLATION_COMPUTE_DEVICE - pre_compute_type = config.TRANSLATION_COMPUTE_TYPE + pre_compute_type = config.SELECTED_TRANSLATION_COMPUTE_TYPE config.SELECTED_TRANSLATION_COMPUTE_DEVICE = device - config.TRANSLATION_COMPUTE_TYPE = "auto" + config.SELECTED_TRANSLATION_COMPUTE_TYPE = "auto" try: model.changeTranslatorCTranslate2Model() - self.run(200, self.run_mapping["translation_compute_type"], config.TRANSLATION_COMPUTE_TYPE) + self.run(200, self.run_mapping["selected_translation_compute_type"], config.SELECTED_TRANSLATION_COMPUTE_TYPE) except Exception as e: # VRAM不足エラーの検出(デバイス切り替え時) is_vram_error, error_message = model.detectVRAMError(e) @@ -672,7 +672,7 @@ class Controller: # 前のデバイス設定に戻す printLog("VRAM error detected, reverting device setting") config.SELECTED_TRANSLATION_COMPUTE_DEVICE = pre_device - config.TRANSLATION_COMPUTE_TYPE = pre_compute_type + config.SELECTED_TRANSLATION_COMPUTE_TYPE = pre_compute_type model.changeTranslatorCTranslate2Model() else: # その他のエラーは通常通り処理 @@ -690,8 +690,8 @@ class Controller: def setSelectedTranscriptionComputeDevice(self, device:str, *args, **kwargs) -> dict: printLog("setSelectedTranscriptionComputeDevice", device) config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE = device - config.TRANSCRIPTION_COMPUTE_TYPE = "auto" - self.run(200, self.run_mapping["transcription_compute_type"], config.TRANSCRIPTION_COMPUTE_TYPE) + config.SELECTED_TRANSCRIPTION_COMPUTE_TYPE = "auto" + self.run(200, self.run_mapping["selected_transcription_compute_type"], config.SELECTED_TRANSCRIPTION_COMPUTE_TYPE) return {"status":200,"result":config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE} @staticmethod @@ -1455,13 +1455,13 @@ class Controller: return {"status":200, "result":config.CTRANSLATE2_WEIGHT_TYPE} @staticmethod - def getTranslationComputeType(*args, **kwargs) -> dict: - return {"status":200, "result":config.TRANSLATION_COMPUTE_TYPE} + def getSelectedTranslationComputeType(*args, **kwargs) -> dict: + return {"status":200, "result":config.SELECTED_TRANSLATION_COMPUTE_TYPE} @staticmethod - def setTranslationComputeType(data, *args, **kwargs) -> dict: - pre_compute_type = config.TRANSLATION_COMPUTE_TYPE - config.TRANSLATION_COMPUTE_TYPE = str(data) + def setSelectedTranslationComputeType(data, *args, **kwargs) -> dict: + pre_compute_type = config.SELECTED_TRANSLATION_COMPUTE_TYPE + config.SELECTED_TRANSLATION_COMPUTE_TYPE = str(data) if model.checkTranslatorCTranslate2ModelWeight(config.CTRANSLATE2_WEIGHT_TYPE): def callback(): model.changeTranslatorCTranslate2Model() @@ -1470,8 +1470,8 @@ class Controller: th_callback.start() th_callback.join() else: - config.TRANSLATION_COMPUTE_TYPE = pre_compute_type - return {"status":200, "result":config.TRANSLATION_COMPUTE_TYPE} + config.SELECTED_TRANSLATION_COMPUTE_TYPE = pre_compute_type + return {"status":200, "result":config.SELECTED_TRANSLATION_COMPUTE_TYPE} @staticmethod def getWhisperWeightType(*args, **kwargs) -> dict: @@ -1483,13 +1483,13 @@ class Controller: return {"status":200, "result": config.WHISPER_WEIGHT_TYPE} @staticmethod - def getTranscriptionComputeType(*args, **kwargs) -> dict: - return {"status":200, "result":config.TRANSCRIPTION_COMPUTE_TYPE} + def getSelectedTranscriptionComputeType(*args, **kwargs) -> dict: + return {"status":200, "result":config.SELECTED_TRANSCRIPTION_COMPUTE_TYPE} @staticmethod - def setTranscriptionComputeType(data, *args, **kwargs) -> dict: - config.TRANSCRIPTION_COMPUTE_TYPE = str(data) - return {"status":200, "result":config.TRANSCRIPTION_COMPUTE_TYPE} + def setSelectedTranscriptionComputeType(data, *args, **kwargs) -> dict: + config.SELECTED_TRANSCRIPTION_COMPUTE_TYPE = str(data) + return {"status":200, "result":config.SELECTED_TRANSCRIPTION_COMPUTE_TYPE} @staticmethod def getSendMessageFormatParts(*args, **kwargs) -> dict: diff --git a/src-python/mainloop.py b/src-python/mainloop.py index 2df4d53e..bcb808b1 100644 --- a/src-python/mainloop.py +++ b/src-python/mainloop.py @@ -48,8 +48,8 @@ run_mapping = { "selected_translation_engines":"/run/selected_translation_engines", "translation_engines":"/run/translation_engines", - "translation_compute_type":"/run/translation_compute_type", - "transcription_compute_type":"/run/transcription_compute_type", + "selected_translation_compute_type":"/run/selected_translation_compute_type", + "selected_transcription_compute_type":"/run/selected_transcription_compute_type", "mic_host_list":"/run/mic_host_list", "mic_device_list":"/run/mic_device_list", @@ -165,8 +165,8 @@ mapping = { "/get/data/ctranslate2_weight_type": {"status": True, "variable":controller.getCtranslate2WeightType}, "/set/data/ctranslate2_weight_type": {"status": True, "variable":controller.setCtranslate2WeightType}, - "/get/data/translation_compute_type": {"status": True, "variable":controller.getTranslationComputeType}, - "/set/data/translation_compute_type": {"status": True, "variable":controller.setTranslationComputeType}, + "/get/data/selected_translation_compute_type": {"status": True, "variable":controller.getSelectedTranslationComputeType}, + "/set/data/selected_translation_compute_type": {"status": True, "variable":controller.setSelectedTranslationComputeType}, "/run/download_ctranslate2_weight": {"status": True, "variable":controller.downloadCtranslate2Weight}, @@ -271,8 +271,8 @@ mapping = { "/get/data/whisper_weight_type": {"status": True, "variable":controller.getWhisperWeightType}, "/set/data/whisper_weight_type": {"status": True, "variable":controller.setWhisperWeightType}, - "/get/data/transcription_compute_type": {"status": True, "variable":controller.getTranscriptionComputeType}, - "/set/data/transcription_compute_type": {"status": True, "variable":controller.setTranscriptionComputeType}, + "/get/data/selected_transcription_compute_type": {"status": True, "variable":controller.getSelectedTranscriptionComputeType}, + "/set/data/selected_transcription_compute_type": {"status": True, "variable":controller.setSelectedTranscriptionComputeType}, "/run/download_whisper_weight": {"status": True, "variable":controller.downloadWhisperWeight}, diff --git a/src-python/model.py b/src-python/model.py index 639d375f..70687119 100644 --- a/src-python/model.py +++ b/src-python/model.py @@ -116,7 +116,7 @@ class Model: model_type=config.CTRANSLATE2_WEIGHT_TYPE, device=config.SELECTED_TRANSLATION_COMPUTE_DEVICE["device"], device_index=config.SELECTED_TRANSLATION_COMPUTE_DEVICE["device_index"], - compute_type=config.TRANSLATION_COMPUTE_TYPE + compute_type=config.SELECTED_TRANSLATION_COMPUTE_TYPE ) def downloadCTranslate2ModelWeight(self, weight_type, callback=None, end_callback=None): @@ -440,7 +440,7 @@ class Model: whisper_weight_type=config.WHISPER_WEIGHT_TYPE, device=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device"], device_index=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device_index"], - compute_type=config.TRANSCRIPTION_COMPUTE_TYPE, + compute_type=config.SELECTED_TRANSCRIPTION_COMPUTE_TYPE, ) def sendMicTranscript(): try: @@ -624,7 +624,7 @@ class Model: whisper_weight_type=config.WHISPER_WEIGHT_TYPE, device=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device"], device_index=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device_index"], - compute_type=config.TRANSCRIPTION_COMPUTE_TYPE, + compute_type=config.SELECTED_TRANSCRIPTION_COMPUTE_TYPE, ) def sendSpeakerTranscript(): try: From e9067c05c641f41e463ce9c9c2f2047c1f39b90c Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Sat, 27 Sep 2025 07:55:45 +0900 Subject: [PATCH 51/92] [Update/Chore] UI: Compute Type: Rename and add endpoint '/run/'. --- .../logics/configs/transcription/useTranscription.js | 4 ++-- src-ui/logics/configs/translation/useTranslation.js | 4 ++-- src-ui/logics/useReceiveRoutes.js | 10 ++++++---- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src-ui/logics/configs/transcription/useTranscription.js b/src-ui/logics/configs/transcription/useTranscription.js index 1511d77a..008e4b10 100644 --- a/src-ui/logics/configs/transcription/useTranscription.js +++ b/src-ui/logics/configs/transcription/useTranscription.js @@ -256,12 +256,12 @@ export const useTranscription = () => { const getSelectedTranscriptionComputeType = () => { pendingSelectedTranscriptionComputeType(); - asyncStdoutToPython("/get/data/transcription_compute_type"); + asyncStdoutToPython("/get/data/selected_transcription_compute_type"); }; const setSelectedTranscriptionComputeType = (selected_transcription_compute_type) => { pendingSelectedTranscriptionComputeType(); - asyncStdoutToPython("/set/data/transcription_compute_type", selected_transcription_compute_type); + asyncStdoutToPython("/set/data/selected_transcription_compute_type", selected_transcription_compute_type); }; const setSuccessSelectedTranscriptionComputeType = (selected_transcription_compute_type) => { diff --git a/src-ui/logics/configs/translation/useTranslation.js b/src-ui/logics/configs/translation/useTranslation.js index 5c3bb83e..ea288a02 100644 --- a/src-ui/logics/configs/translation/useTranslation.js +++ b/src-ui/logics/configs/translation/useTranslation.js @@ -87,12 +87,12 @@ export const useTranslation = () => { const getSelectedTranslationComputeType = () => { pendingSelectedTranslationComputeType(); - asyncStdoutToPython("/get/data/translation_compute_type"); + asyncStdoutToPython("/get/data/selected_translation_compute_type"); }; const setSelectedTranslationComputeType = (selected_translation_compute_type) => { pendingSelectedTranslationComputeType(); - asyncStdoutToPython("/set/data/translation_compute_type", selected_translation_compute_type); + asyncStdoutToPython("/set/data/selected_translation_compute_type", selected_translation_compute_type); }; const setSuccessSelectedTranslationComputeType = (selected_translation_compute_type) => { diff --git a/src-ui/logics/useReceiveRoutes.js b/src-ui/logics/useReceiveRoutes.js index 049697a3..bb21acfd 100644 --- a/src-ui/logics/useReceiveRoutes.js +++ b/src-ui/logics/useReceiveRoutes.js @@ -172,8 +172,9 @@ export const ROUTE_META_LIST = [ { endpoint: "/get/data/ctranslate2_weight_type", ns: configs, hook_name: "useTranslation", method_name: "updateSelectedCTranslate2WeightType" }, { endpoint: "/set/data/ctranslate2_weight_type", ns: configs, hook_name: "useTranslation", method_name: "setSuccessSelectedCTranslate2WeightType" }, - { endpoint: "/get/data/translation_compute_type", ns: configs, hook_name: "useTranslation", method_name: "updateSelectedTranslationComputeType" }, - { endpoint: "/set/data/translation_compute_type", ns: configs, hook_name: "useTranslation", method_name: "setSuccessSelectedTranslationComputeType" }, + { endpoint: "/run/selected_translation_compute_type", ns: configs, hook_name: "useTranslation", method_name: "updateSelectedTranslationComputeType" }, + { endpoint: "/get/data/selected_translation_compute_type", ns: configs, hook_name: "useTranslation", method_name: "updateSelectedTranslationComputeType" }, + { endpoint: "/set/data/selected_translation_compute_type", ns: configs, hook_name: "useTranslation", method_name: "setSuccessSelectedTranslationComputeType" }, { endpoint: "/run/downloaded_ctranslate2_weight", ns: configs, hook_name: "useTranslation", method_name: "downloadedCTranslate2WeightType" }, { endpoint: "/run/download_ctranslate2_weight", ns: null, hook_name: null, method_name: null }, @@ -217,8 +218,9 @@ export const ROUTE_META_LIST = [ { endpoint: "/get/data/whisper_weight_type", ns: configs, hook_name: "useTranscription", method_name: "updateSelectedWhisperWeightType" }, { endpoint: "/set/data/whisper_weight_type", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSelectedWhisperWeightType" }, - { endpoint: "/get/data/transcription_compute_type", ns: configs, hook_name: "useTranscription", method_name: "updateSelectedTranscriptionComputeType" }, - { endpoint: "/set/data/transcription_compute_type", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSelectedTranscriptionComputeType" }, + { endpoint: "/run/selected_transcription_compute_type", ns: configs, hook_name: "useTranscription", method_name: "updateSelectedTranscriptionComputeType" }, + { endpoint: "/get/data/selected_transcription_compute_type", ns: configs, hook_name: "useTranscription", method_name: "updateSelectedTranscriptionComputeType" }, + { endpoint: "/set/data/selected_transcription_compute_type", ns: configs, hook_name: "useTranscription", method_name: "setSuccessSelectedTranscriptionComputeType" }, { endpoint: "/run/downloaded_whisper_weight", ns: configs, hook_name: "useTranscription", method_name: "downloadedWhisperWeightType" }, From 9a11c6ff9fdc76dd4f1876e4a1e31ef72e83481d Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Sun, 28 Sep 2025 02:07:47 +0900 Subject: [PATCH 52/92] [Update] Config Page: Compute Device/Type: Add localization and make compute types order properly. --- locales/en.yml | 17 ++-- locales/ja.yml | 17 ++-- locales/ko.yml | 17 ++-- locales/zh-Hans.yml | 17 ++-- locales/zh-Hant.yml | 17 ++-- .../_download_button/_DownloadButton.jsx | 2 +- .../transcription/Transcription.jsx | 79 +++++++++++++++++-- .../setting_box/translation/Translation.jsx | 79 +++++++++++++++++-- .../version_label/VersionLabel.jsx | 6 +- 9 files changed, 205 insertions(+), 46 deletions(-) diff --git a/locales/en.yml b/locales/en.yml index 76cfd208..dfcc1d43 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -73,8 +73,17 @@ update_modal: is_current_compute_device: "The version currently in use" config_page: - version: "Version {{version}}" - model_download_button_label: "Download" + common: + version: "Version {{version}}" + model_download_button_label: "Download" + compute_device: + desc: "The accuracy and speed of each processing type may vary depending on your machine specs, and the compatibility with calculation methods may differ from the displayed order. Please use this as a general guideline." + label_device: "Processing Device" + label_type: "Processing Type" + type_template_auto: "Automatic" + type_template_low: "{{type_name}} (Lower accuracy, faster processing)" + type_template_high: "{{type_name}} (Higher accuracy, slower processing)" + side_menu_labels: device: "Device" appearance: "Appearance" @@ -133,8 +142,6 @@ config_page: desc: "You can choose the translation model when using the {{ctranslate2}} translation engine." small: "Basic Model ({{capacity}})" large: "High Accuracy Model ({{capacity}})" - translation_compute_type: - label: "Processing type for AI translation {{ctranslate2}}" translation_compute_device: label: "Processing device for AI translation" deepl_auth_key: @@ -179,8 +186,6 @@ config_page: desc: "Larger models have higher accuracy, but they also consume more CPU or GPU resources.\nEspecially for models larger than medium, it may be difficult or even impossible to use them depending on the performance of your CPU/GPU." model_template: "{{model_name}} model ({{capacity}})" recommended_model_template: "{{model_name}} model ({{capacity}}) (Recommended)" - transcription_compute_type: - label: "Processing type for AI transcription {{whisper}}" transcription_compute_device: label: "Processing Device Used For AI transcription" diff --git a/locales/ja.yml b/locales/ja.yml index ab47d45c..46cd24aa 100644 --- a/locales/ja.yml +++ b/locales/ja.yml @@ -73,8 +73,17 @@ update_modal: is_current_compute_device: "現在使用中のバージョン" config_page: - version: "バージョン {{version}}" - model_download_button_label: "ダウンロード" + common: + version: "バージョン {{version}}" + model_download_button_label: "ダウンロード" + compute_device: + desc: "各処理タイプの精度・速度は、マシンスペックによって計算方法に相性があり、表示順とは異なる事があるため、大まかな目安としてください。" + label_device: "処理デバイス" + label_type: "処理タイプ" + type_template_auto: "自動" + type_template_low: "{{type_name}} (精度が悪く、処理は早い)" + type_template_high: "{{type_name}} (精度が良く、処理は遅い)" + side_menu_labels: device: "デバイス" appearance: "デザイン" @@ -133,8 +142,6 @@ config_page: desc: "翻訳エンジン「{{ctranslate2}}」で翻訳する際に、使用する翻訳モデルを選択できます。" small: "通常モデル ({{capacity}})" large: "高精度モデル ({{capacity}})" - translation_compute_type: - label: "AI翻訳 {{ctranslate2}} の処理タイプ" translation_compute_device: label: "AI翻訳の処理デバイス" deepl_auth_key: @@ -179,8 +186,6 @@ config_page: desc: "容量が大きいモデルほど精度は高いですが、その分CPUやGPUを占有します。\n※特にmediumより容量の大きいモデルは、CPU/GPUの性能によっては使用すらも困難です。" model_template: "{{model_name}} モデル ({{capacity}})" recommended_model_template: "{{model_name}} モデル ({{capacity}}) [推奨]" - transcription_compute_type: - label: "AI音声認識 {{whisper}} の処理タイプ" transcription_compute_device: label: "AI音声認識で使用する処理デバイス" diff --git a/locales/ko.yml b/locales/ko.yml index af169b93..e24cf6cb 100644 --- a/locales/ko.yml +++ b/locales/ko.yml @@ -73,8 +73,17 @@ update_modal: is_current_compute_device: "현재 사용 중인 버전" config_page: - version: "버전 {{version}}" - model_download_button_label: "다운로드" + common: + version: "버전 {{version}}" + model_download_button_label: "다운로드" + compute_device: + desc: + label_device: + label_type: + type_template_auto: + type_template_low: + type_template_high: + side_menu_labels: device: "장치" appearance: "모양" @@ -133,8 +142,6 @@ config_page: desc: "오프라인 번역 시의 번역 모델을 변경합니다." small: "일반 모델 ({{capacity}})" large: "정밀 모델 ({{capacity}})" - translation_compute_type: - label: translation_compute_device: label: "AI 번역 처리 장치" deepl_auth_key: @@ -179,8 +186,6 @@ config_page: desc: "용량이 큰 모델일수록 정확도는 높지만, 그만큼 CPU나 GPU를 많이 차지합니다. * 특히 medium보다 용량이 큰 모델은 CPU/GPU 성능에 따라 사용 자체가 어려울 수 있습니다." model_template: "{{model_name}} 모델 ({{capacity}})" recommended_model_template: "{{model_name}} 모델 ({{capacity}}) (권장)" - transcription_compute_type: - label: transcription_compute_device: label: diff --git a/locales/zh-Hans.yml b/locales/zh-Hans.yml index 8322c96f..1c76d6ad 100644 --- a/locales/zh-Hans.yml +++ b/locales/zh-Hans.yml @@ -73,8 +73,17 @@ update_modal: is_current_compute_device: config_page: - version: "版本 {{version}}" - model_download_button_label: + common: + version: "版本 {{version}}" + model_download_button_label: + compute_device: + desc: + label_device: + label_type: + type_template_auto: + type_template_low: + type_template_high: + side_menu_labels: device: appearance: "外观" @@ -133,8 +142,6 @@ config_page: desc: "可以选择用于离线翻译的翻译模型" small: "普通模型 ({{capacity}})" large: "高精度模型 ({{capacity}})" - translation_compute_type: - label: translation_compute_device: label: deepl_auth_key: @@ -179,8 +186,6 @@ config_page: desc: model_template: "{{model_name}} 模型 ({{capacity}})" recommended_model_template: "{{model_name}} 模型 ({{capacity}}) (推荐)" - transcription_compute_type: - label: transcription_compute_device: label: diff --git a/locales/zh-Hant.yml b/locales/zh-Hant.yml index 07b79b51..f1c59e5e 100644 --- a/locales/zh-Hant.yml +++ b/locales/zh-Hant.yml @@ -73,8 +73,17 @@ update_modal: is_current_compute_device: config_page: - version: "版本 {{version}}" - model_download_button_label: + common: + version: "版本 {{version}}" + model_download_button_label: + compute_device: + desc: + label_device: + label_type: + type_template_auto: + type_template_low: + type_template_high: + side_menu_labels: device: appearance: "外觀" @@ -133,8 +142,6 @@ config_page: desc: "你可以選擇用於離線翻譯引擎的翻譯模型。" small: "基本模型({{capacity}})" large: "高準確率模型({{capacity}})" - translation_compute_type: - label: translation_compute_device: label: deepl_auth_key: @@ -179,8 +186,6 @@ config_page: desc: model_template: "{{model_name}}模型({{capacity}})" recommended_model_template: "{{model_name}}模型({{capacity}})(推薦)" - transcription_compute_type: - label: transcription_compute_device: label: diff --git a/src-ui/app/config_page/setting_section/setting_box/_components/_atoms/_download_button/_DownloadButton.jsx b/src-ui/app/config_page/setting_section/setting_box/_components/_atoms/_download_button/_DownloadButton.jsx index ac17b759..1cad4958 100644 --- a/src-ui/app/config_page/setting_section/setting_box/_components/_atoms/_download_button/_DownloadButton.jsx +++ b/src-ui/app/config_page/setting_section/setting_box/_components/_atoms/_download_button/_DownloadButton.jsx @@ -29,7 +29,7 @@ export const _DownloadButton = ({option, ...props}) => { className={styles.download_button} onClick={() => props.downloadStartFunction(option.id)} > -

{t("config_page.model_download_button_label")}

+

{t("config_page.common.model_download_button_label")}

); case option.update_button: diff --git a/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx b/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx index 48639292..1bc5cd07 100644 --- a/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx +++ b/src-ui/app/config_page/setting_section/setting_box/transcription/Transcription.jsx @@ -278,7 +278,7 @@ const WhisperWeightType_Box = () => { ); }; - +// Duplicate const TranscriptionComputeDevice_Box = () => { const { t } = useI18n(); const { @@ -295,8 +295,72 @@ const TranscriptionComputeDevice_Box = () => { const target_index = findKeyByDeviceValue(currentSelectableTranscriptionComputeDeviceList.data, currentSelectedTranscriptionComputeDevice.data); - const selectable_compute_types = arrayToObject(currentSelectableTranscriptionComputeDeviceList.data[target_index].compute_types); + const DEFAULT_ORDER = [ + "auto", + "int8", + "int8_bfloat16", + "int8_float16", + "int8_float32", + "bfloat16", + "float16", + "int16", + "float32" + ]; + const sortComputeTypesArray = (compute_types_array = [], order) => { + const src_set = new Set(compute_types_array); + + const from_order = order.filter((id) => src_set.has(id)); + + const invalid_ids = compute_types_array.filter((id) => !order.includes(id)); + if (invalid_ids.length > 0) { + console.error("[sortComputeTypesArray] Unsupported compute types ignored:", invalid_ids); + } + + return from_order; + }; + + + const buildSimpleLabels = (ordered_array = []) => { + const n = ordered_array.length; + if (n === 0) return {}; + + const labels = {}; + + ordered_array.forEach((id, idx) => { + if (idx === 0 && id === "auto") { + labels[id] = t("config_page.common.compute_device.type_template_auto"); + return; + } + + if (idx === 1) { + labels[id] = t( + "config_page.common.compute_device.type_template_low", + { type_name: id } + ); + return; + } + + if (idx === n - 1) { + labels[id] = t( + "config_page.common.compute_device.type_template_high", + { type_name: id } + ); + return; + } + + labels[id] = id; + }); + + return labels; + }; + + + const computeTypesArray = currentSelectableTranscriptionComputeDeviceList.data[target_index].compute_types; + + const ordered_array = sortComputeTypesArray(computeTypesArray, DEFAULT_ORDER); + + const new_compute_types_labels = buildSimpleLabels(ordered_array); const selectFunction_ComputeDevice = (selected_data) => { const target_obj = currentSelectableTranscriptionComputeDeviceList.data[selected_data.selected_id]; @@ -316,12 +380,15 @@ const TranscriptionComputeDevice_Box = () => { return (
- +
-

{t("config_page.transcription.transcription_compute_device.label")}

+

{t("config_page.common.compute_device.label_device")}

{
-

{t("config_page.transcription.transcription_compute_type.label")}

+

{t("config_page.common.compute_device.label_type")}

{ ); }; - +// Duplicate const TranslationComputeDevice_Box = () => { const { t } = useI18n(); const { @@ -94,8 +94,72 @@ const TranslationComputeDevice_Box = () => { const target_index = findKeyByDeviceValue(currentSelectableTranslationComputeDeviceList.data, currentSelectedTranslationComputeDevice.data); - const selectable_compute_types = arrayToObject(currentSelectableTranslationComputeDeviceList.data[target_index].compute_types); + const DEFAULT_ORDER = [ + "auto", + "int8", + "int8_bfloat16", + "int8_float16", + "int8_float32", + "bfloat16", + "float16", + "int16", + "float32" + ]; + const sortComputeTypesArray = (compute_types_array = [], order) => { + const src_set = new Set(compute_types_array); + + const from_order = order.filter((id) => src_set.has(id)); + + const invalid_ids = compute_types_array.filter((id) => !order.includes(id)); + if (invalid_ids.length > 0) { + console.error("[sortComputeTypesArray] Unsupported compute types ignored:", invalid_ids); + } + + return from_order; + }; + + + const buildSimpleLabels = (ordered_array = []) => { + const n = ordered_array.length; + if (n === 0) return {}; + + const labels = {}; + + ordered_array.forEach((id, idx) => { + if (idx === 0 && id === "auto") { + labels[id] = t("config_page.common.compute_device.type_template_auto"); + return; + } + + if (idx === 1) { + labels[id] = t( + "config_page.common.compute_device.type_template_low", + { type_name: id } + ); + return; + } + + if (idx === n - 1) { + labels[id] = t( + "config_page.common.compute_device.type_template_high", + { type_name: id } + ); + return; + } + + labels[id] = id; + }); + + return labels; + }; + + + const computeTypesArray = currentSelectableTranslationComputeDeviceList.data[target_index].compute_types; + + const ordered_array = sortComputeTypesArray(computeTypesArray, DEFAULT_ORDER); + + const new_compute_types_labels = buildSimpleLabels(ordered_array); const selectFunction_ComputeDevice = (selected_data) => { const target_obj = currentSelectableTranslationComputeDeviceList.data[selected_data.selected_id]; @@ -115,12 +179,15 @@ const TranslationComputeDevice_Box = () => { return (
- +
-

{t("config_page.translation.translation_compute_device.label")}

+

{t("config_page.common.compute_device.label_device")}

{
-

{t("config_page.translation.translation_compute_type.label")}

+

{t("config_page.common.compute_device.label_type")}

{ const { currentComputeMode } = useComputeMode(); const version_label = currentComputeMode.data === "cpu" - ? t("config_page.version", { version: currentSoftwareVersion.data }) + ? t("config_page.common.version", { version: currentSoftwareVersion.data }) : currentComputeMode.data === "cuda" - ? t("config_page.version", { version: currentSoftwareVersion.data }) + " CUDA" - : t("config_page.version", { version: currentSoftwareVersion.data }); + ? t("config_page.common.version", { version: currentSoftwareVersion.data }) + " CUDA" + : t("config_page.common.version", { version: currentSoftwareVersion.data }); const is_cpu = currentComputeMode.data === "cpu"; From cde8b4b806c23ab337b9860dcc49d02a4f0a9983 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Sun, 28 Sep 2025 06:57:58 +0900 Subject: [PATCH 53/92] =?UTF-8?q?selected=5Ftranslation=5Fcompute=5Ftype/s?= =?UTF-8?q?elected=5Ftranscription=5Fcompute=5Ftype=E3=81=AE=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=82=B1=E3=83=BC=E3=82=B9=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/backend_test.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src-python/backend_test.py b/src-python/backend_test.py index 2550ff92..6f1de883 100644 --- a/src-python/backend_test.py +++ b/src-python/backend_test.py @@ -357,6 +357,10 @@ class TestMainloop(): expected_status = [400] case "/set/data/osc_port": data = random.randint(1024, 65535) + case "/set/data/selected_translation_compute_type": + data = random.choice(self.config_dict["selected_translation_compute_device"]["compute_types"]) + case "/set/data/selected_transcription_compute_type": + data = random.choice(self.config_dict["selected_transcription_compute_device"]["compute_types"]) case _: data = None expected_status = [404] @@ -645,6 +649,13 @@ class TestMainloop(): print(f"- エンドポイント: {endpoint}") print(f" ステータス: {result['status']}") print(f" 結果: {result['result']}\n") + if invalid_tests > 0: + print("無効なテストの詳細:") + for endpoint, result in self.test_results.items(): + if result["expected_status"] == [404]: + print(f"- エンドポイント: {endpoint}") + print(f" ステータス: {result['status']}") + print(f" 結果: {result['result']}\n") if failed_tests > 0: print("失敗したテストの詳細:") for endpoint, result in self.test_results.items(): From e32e5c1b5f8c0079434a8ef9e238f46a86818ca1 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Mon, 29 Sep 2025 10:15:03 +0900 Subject: [PATCH 54/92] [Update] Transliterator: Change tokenizer mode to SplitMode.C and enhance analyze method for better reading assignment --- .../transliteration_transliterator.py | 54 +++++++++++++++---- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/src-python/models/transliteration/transliteration_transliterator.py b/src-python/models/transliteration/transliteration_transliterator.py index f2a9780f..9d395a13 100644 --- a/src-python/models/transliteration/transliteration_transliterator.py +++ b/src-python/models/transliteration/transliteration_transliterator.py @@ -8,7 +8,7 @@ except ImportError: class Transliterator: def __init__(self): self.tokenizer_obj = dictionary.Dictionary().create() - self.mode = tokenizer.Tokenizer.SplitMode.A + self.mode = tokenizer.Tokenizer.SplitMode.C @staticmethod def is_kanji(ch: str) -> bool: @@ -101,7 +101,21 @@ class Transliterator: for t in tokens: surface = t.surface() reading = t.reading_form() - + pos = t.part_of_speech() + print("surface:", surface, " reading:", reading, " pos:", pos) + + if pos and pos[0] in ["記号", "補助記号"]: + reading = surface + + if surface == reading: + results.append({ + "orig": surface, + "kana": reading, + "hira": surface, + "hepburn": surface, + }) + continue + # 単純に1文字ずつ処理 if len(surface) == 1: # 1文字の場合はそのまま @@ -134,32 +148,48 @@ class Transliterator: else: # 最後の漢字ブロックの場合 kanji_reading = reading[reading_pos:] - + + # 空の読みを避ける + if not kanji_reading and reading_pos < len(reading): + kanji_reading = reading[reading_pos:] + if not kanji_reading and kanji_block: + # 読みが空だが漢字ブロックがある場合、残りの読みを全て割り当てる + kanji_reading = reading[reading_pos:] + + # reading_posの更新を正確に行うために、割り当てられた読みの長さをチェック + len_allocated_reading = len(kanji_reading) + if reading_pos + len_allocated_reading > len(reading): + len_allocated_reading = len(reading) - reading_pos + results.append({ "orig": kanji_block, "kana": kanji_reading, "hira": self.kata_to_hira(kanji_reading), "hepburn": katakana_to_hepburn(kanji_reading, use_macron=use_macron) }) - reading_pos += len(kanji_reading) + reading_pos += len_allocated_reading else: # 非漢字の場合 non_kanji_block = "" while i < len(surface) and not self.is_kanji(surface[i]): non_kanji_block += surface[i] i += 1 - - # 非漢字部分の読み(通常は文字数分) - non_kanji_reading = reading[reading_pos:reading_pos + len(non_kanji_block)] - + + # 非漢字部分の読み(通常は文字数分、または残りの読みの分だけ) + len_block = len(non_kanji_block) + non_kanji_reading = reading[reading_pos:reading_pos + len_block] + + # 割り当てられた読みの長さ + len_allocated_reading = len(non_kanji_reading) + results.append({ "orig": non_kanji_block, "kana": non_kanji_reading, "hira": self.kata_to_hira(non_kanji_reading), "hepburn": katakana_to_hepburn(non_kanji_reading, use_macron=use_macron) }) - reading_pos += len(non_kanji_reading) - + reading_pos += len_allocated_reading + return results # --- テスト --- @@ -180,6 +210,10 @@ if __name__ == "__main__": "取り敢えず検索してみる", "見知らぬ土地で冒険する", "彼は優れたエンジニアです", + " ".join(list("[]<>!@#$%^&*()_+-={}|\;:'\",.<>/?`~")), + " ".join(list("「」<>!@#$%^&*()_+-={}|\;:'",./?`~")), + " ".join(list("♪♫♬♭♮♯°℃℉№Å®©™✓✔✕✖★☆○●◎◇◆□■△▲▽▼※→←↑↓↔︎↕︎⇄⇅∞∴∵∷≪≫≦≧±×÷≠≈≡⊂⊃⊆⊇⊄⊅∪∩∈∋∅∀∃∠⊥⌒∂∇√∫∬∮∑∏∧∨¬⇒⇔∀∃∠⊥⌒∂∇√∫∬∮∑∏")), + " ".join(list("😀😃😄😁😆😅😂🤣😊😇🙂")) ] transliterator = Transliterator() From 957cddfe1882c8219e820e9ebd3cb0992a8aa563 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Tue, 30 Sep 2025 04:55:23 +0900 Subject: [PATCH 55/92] [bugfix] UI: Transliteration: Fix the bug that isn't shown when only hiragana or only hepburn is provided. --- .../message_container/MessageContainer.jsx | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx b/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx index d53ccaa6..cb38e838 100644 --- a/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx +++ b/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx @@ -94,6 +94,7 @@ const MessageWithTransliteration = ({ item }) => { const hira = token.hira ?? ""; const hepburn = token.hepburn ?? ""; + // Only hovered romaji if it exists. (No ruby cuz 'orig' and 'hira' are same.) if (hira && hira === orig && hepburn) { return ( @@ -102,7 +103,8 @@ const MessageWithTransliteration = ({ item }) => { ); } - if (hira && hira !== orig && hepburn) { + // Ruby hiragana and hovered romaji. + if (hira && hepburn) { return ( {orig} @@ -111,15 +113,20 @@ const MessageWithTransliteration = ({ item }) => { ); } - if (hepburn && hepburn !== orig) { - return ( - - {orig} - {hepburn} - - ); + // Ruby romaji or hiragana. + if (hepburn || hira) { + const ruby = hepburn ? hepburn : hira; + if (ruby !== orig) { + return ( + + {orig} + {ruby} + + ); + }; } + // Nothing. Original only. return ( {orig} From fe3fea34fffe0c108e7419c23b647819040cb8c8 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 2 Oct 2025 22:58:13 +0900 Subject: [PATCH 56/92] [Cleanup] Transliterator: Remove debug print statement from analyze method --- .../models/transliteration/transliteration_transliterator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src-python/models/transliteration/transliteration_transliterator.py b/src-python/models/transliteration/transliteration_transliterator.py index 9d395a13..7c85ebee 100644 --- a/src-python/models/transliteration/transliteration_transliterator.py +++ b/src-python/models/transliteration/transliteration_transliterator.py @@ -102,7 +102,6 @@ class Transliterator: surface = t.surface() reading = t.reading_form() pos = t.part_of_speech() - print("surface:", surface, " reading:", reading, " pos:", pos) if pos and pos[0] in ["記号", "補助記号"]: reading = surface From 4b55a9bca2e62c2a6f796712a292a78212d64176 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 2 Oct 2025 23:05:06 +0900 Subject: [PATCH 57/92] [Fix] Controller: Remove redundant assignments in auto mic/speaker select methods --- src-python/controller.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index 16856ddc..26276856 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -1050,20 +1050,20 @@ class Controller: def setEnableAutoMicSelect(self, *args, **kwargs) -> dict: if config.AUTO_MIC_SELECT is False: + config.AUTO_MIC_SELECT = True device_manager.setCallbackProcessBeforeUpdateDevices(self.stopAccessDevices) device_manager.setCallbackDefaultMicDevice(self.updateSelectedMicDevice) device_manager.setCallbackProcessAfterUpdateDevices(self.restartAccessDevices) device_manager.forceUpdateAndSetMicDevices() - config.AUTO_MIC_SELECT = True return {"status":200, "result":config.AUTO_MIC_SELECT} @staticmethod def setDisableAutoMicSelect(*args, **kwargs) -> dict: if config.AUTO_MIC_SELECT is True: + config.AUTO_MIC_SELECT = False device_manager.clearCallbackProcessBeforeUpdateDevices() device_manager.clearCallbackDefaultMicDevice() device_manager.clearCallbackProcessAfterUpdateDevices() - config.AUTO_MIC_SELECT = False return {"status":200, "result":config.AUTO_MIC_SELECT} @staticmethod @@ -1247,20 +1247,20 @@ class Controller: def setEnableAutoSpeakerSelect(self, *args, **kwargs) -> dict: if config.AUTO_SPEAKER_SELECT is False: + config.AUTO_SPEAKER_SELECT = True device_manager.setCallbackProcessBeforeUpdateDevices(self.stopAccessDevices) device_manager.setCallbackDefaultSpeakerDevice(self.updateSelectedSpeakerDevice) device_manager.setCallbackProcessAfterUpdateDevices(self.restartAccessDevices) device_manager.forceUpdateAndSetSpeakerDevices() - config.AUTO_SPEAKER_SELECT = True return {"status":200, "result":config.AUTO_SPEAKER_SELECT} @staticmethod def setDisableAutoSpeakerSelect(*args, **kwargs) -> dict: if config.AUTO_SPEAKER_SELECT is True: + config.AUTO_SPEAKER_SELECT = False device_manager.clearCallbackProcessBeforeUpdateDevices() device_manager.clearCallbackDefaultSpeakerDevice() device_manager.clearCallbackProcessAfterUpdateDevices() - config.AUTO_SPEAKER_SELECT = False return {"status":200, "result":config.AUTO_SPEAKER_SELECT} @staticmethod From 741cfed8c3c71635c8bfc8a8cead3a3a1bf5af3a Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Fri, 3 Oct 2025 01:10:03 +0900 Subject: [PATCH 58/92] [Update/Chore] Localizations: Add texts to Romaji/Hiragana settings and adjust all locales languages. --- locales/en.yml | 10 ++++--- locales/ja.yml | 30 ++++++++++--------- locales/ko.yml | 8 +++++ locales/zh-Hans.yml | 8 +++++ locales/zh-Hant.yml | 8 +++++ .../setting_box/others/Others.jsx | 19 ++++++++---- 6 files changed, 60 insertions(+), 23 deletions(-) diff --git a/locales/en.yml b/locales/en.yml index b89361ff..8215ce6a 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -259,12 +259,14 @@ config_page: received_message_format: label: "Message Format (Speaker2Chatbox)" desc: "Currently, it is used in Speaker2Chatbox." + common_convert_message_hiragana_romaji: + desc_1: "Supported only when Japanese is selected as the translation language." + desc_2: "Due to the complexity of Japanese, it has limitations on accuracy." convert_message_to_romaji: - label: Show Romaji - desc: Supported only when Japanese is selected as the translation language. When enabled along with '{{convert_message_to_hiragana}}', romaji will be shown on mouse hover. + label: "Show Romaji" + desc: "When enabled along with '{{convert_message_to_hiragana}}', romaji will be shown on mouse hover." convert_message_to_hiragana: - label: Show Hiragana - desc: Supported only when Japanese is selected as the translation language. + label: "Show Hiragana" hotkeys: diff --git a/locales/ja.yml b/locales/ja.yml index b4038d5b..90117091 100644 --- a/locales/ja.yml +++ b/locales/ja.yml @@ -254,17 +254,19 @@ config_page: translated: "翻訳" for_multi_translation: "多言語翻訳用" send_message_format: - label: メッセージフォーマット(送信) - desc: VRChatで相手に実際に見えるフォーマットを変更できます。 + label: "メッセージフォーマット(送信)" + desc: "VRChatで相手に実際に見えるフォーマットを変更できます。" received_message_format: label: メッセージフォーマット(Speaker2Chatbox) desc: 今のところ、Speaker2Chatboxで送信した時の表示に使われます。 + common_convert_message_hiragana_romaji: + desc_1: "翻訳言語として日本語を選択した時のみサポート。" + desc_2: "日本語は特殊なため、精度には限界があります。" convert_message_to_romaji: - label: ローマ字を表示 - desc: 翻訳言語として日本語を選択した時のみサポート。「{{convert_message_to_hiragana}}」と同時に有効にした場合は、マウスホバーで表示されます。 + label: "ローマ字を表示" + desc: "「{{convert_message_to_hiragana}}」と同時に有効にした場合は、マウスホバーで表示されます。" convert_message_to_hiragana: - label: ひらがなを表示 - desc: 翻訳言語として日本語を選択した時のみサポート。 + label: "ひらがなを表示" hotkeys: toggle_vrct_visibility: @@ -279,14 +281,14 @@ config_page: plugins: downloaded_version: "ダウンロード済バージョン: {{downloaded_version}}" latest_version: "最新バージョン: {{latest_version}}" - available_after_updating: 最新版にアップデート後 利用可能 - unavailable_downloaded: 現在利用不可 使用中VRCTバージョンとの互換性なし - no_latest_info: 最新情報が取得できません - using_latest_version: 最新版を使用中 - available_latest_version: 最新版を利用可能 - unavailable_latest_version: 最新版は現在利用不可 - available_in_latest_vrct_version: VRCT最新版で利用可能 - unavailable_not_downloaded: 現在利用不可 + available_after_updating: "最新版にアップデート後 利用可能" + unavailable_downloaded: "現在利用不可 使用中VRCTバージョンとの互換性なし" + no_latest_info: "最新情報が取得できません" + using_latest_version: "最新版を使用中" + available_latest_version: "最新版を利用可能" + unavailable_latest_version: "最新版は現在利用不可" + available_in_latest_vrct_version: "VRCT最新版で利用可能" + unavailable_not_downloaded: "現在利用不可" advanced_settings: osc_ip_address: diff --git a/locales/ko.yml b/locales/ko.yml index e24cf6cb..6134f1ad 100644 --- a/locales/ko.yml +++ b/locales/ko.yml @@ -259,6 +259,14 @@ config_page: received_message_format: label: "메시지 형식 (Speaker2Chatbox)" desc: "현재로서는 Speaker2Chatbox로 전송할 때의 표시용으로 사용됩니다." + common_convert_message_hiragana_romaji: + desc_1: + desc_2: + convert_message_to_romaji: + label: + desc: + convert_message_to_hiragana: + label: hotkeys: toggle_vrct_visibility: diff --git a/locales/zh-Hans.yml b/locales/zh-Hans.yml index 1c76d6ad..9bdb088c 100644 --- a/locales/zh-Hans.yml +++ b/locales/zh-Hans.yml @@ -259,6 +259,14 @@ config_page: received_message_format: label: desc: + common_convert_message_hiragana_romaji: + desc_1: + desc_2: + convert_message_to_romaji: + label: + desc: + convert_message_to_hiragana: + label: hotkeys: toggle_vrct_visibility: diff --git a/locales/zh-Hant.yml b/locales/zh-Hant.yml index f1c59e5e..827876b4 100644 --- a/locales/zh-Hant.yml +++ b/locales/zh-Hant.yml @@ -259,6 +259,14 @@ config_page: received_message_format: label: desc: + common_convert_message_hiragana_romaji: + desc_1: + desc_2: + convert_message_to_romaji: + label: + desc: + convert_message_to_hiragana: + label: hotkeys: toggle_vrct_visibility: diff --git a/src-ui/app/config_page/setting_section/setting_box/others/Others.jsx b/src-ui/app/config_page/setting_section/setting_box/others/Others.jsx index 5ef39ce0..3328f7f9 100644 --- a/src-ui/app/config_page/setting_section/setting_box/others/Others.jsx +++ b/src-ui/app/config_page/setting_section/setting_box/others/Others.jsx @@ -211,13 +211,18 @@ const ConvertMessageToRomajiContainer = () => { const { t } = useI18n(); const { currentConvertMessageToRomaji, toggleConvertMessageToRomaji } = useOthers(); + const desc_1 = t("config_page.others.common_convert_message_hiragana_romaji.desc_1"); + const desc_2 = t("config_page.others.common_convert_message_hiragana_romaji.desc_2"); + const desc_romaji = t( + "config_page.others.convert_message_to_romaji.desc", + { convert_message_to_hiragana: t("config_page.others.convert_message_to_hiragana.label") } + ); + const desc = [desc_1, desc_2, desc_romaji].join("\n"); + return ( @@ -228,10 +233,14 @@ const ConvertMessageToHiraganaContainer = () => { const { t } = useI18n(); const { currentConvertMessageToHiragana, toggleConvertMessageToHiragana } = useOthers(); + const desc_1 = t("config_page.others.common_convert_message_hiragana_romaji.desc_1"); + const desc_2 = t("config_page.others.common_convert_message_hiragana_romaji.desc_2"); + const desc = [desc_1, desc_2].join("\n"); + return ( From 7b1e9136ee61f6386d224bf1d3c3879ed126cd3c Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Sat, 4 Oct 2025 22:25:55 +0900 Subject: [PATCH 59/92] [Update] Transliterator: Enhance transliteration control and improve tokenizer initialization --- src-python/controller.py | 13 +++ src-python/model.py | 13 ++- .../transliteration_transliterator.py | 92 +++---------------- 3 files changed, 40 insertions(+), 78 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index 16856ddc..5c360a91 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -915,12 +915,16 @@ class Controller: @staticmethod def setEnableConvertMessageToRomaji(*args, **kwargs) -> dict: if config.CONVERT_MESSAGE_TO_ROMAJI is False: + if config.CONVERT_MESSAGE_TO_HIRAGANA is False: + model.startTransliteration() config.CONVERT_MESSAGE_TO_ROMAJI = True return {"status":200, "result":config.CONVERT_MESSAGE_TO_ROMAJI} @staticmethod def setDisableConvertMessageToRomaji(*args, **kwargs) -> dict: if config.CONVERT_MESSAGE_TO_ROMAJI is True: + if config.CONVERT_MESSAGE_TO_HIRAGANA is False: + model.stopTransliteration() config.CONVERT_MESSAGE_TO_ROMAJI = False return {"status":200, "result":config.CONVERT_MESSAGE_TO_ROMAJI} @@ -931,12 +935,16 @@ class Controller: @staticmethod def setEnableConvertMessageToHiragana(*args, **kwargs) -> dict: if config.CONVERT_MESSAGE_TO_HIRAGANA is False: + if config.CONVERT_MESSAGE_TO_ROMAJI is False: + model.startTransliteration() config.CONVERT_MESSAGE_TO_HIRAGANA = True return {"status":200, "result":config.CONVERT_MESSAGE_TO_HIRAGANA} @staticmethod def setDisableConvertMessageToHiragana(*args, **kwargs) -> dict: if config.CONVERT_MESSAGE_TO_HIRAGANA is True: + if config.CONVERT_MESSAGE_TO_ROMAJI is False: + model.stopTransliteration() config.CONVERT_MESSAGE_TO_HIRAGANA = False return {"status":200, "result":config.CONVERT_MESSAGE_TO_HIRAGANA} @@ -2466,6 +2474,11 @@ class Controller: self.updateDownloadedWhisperModelWeight() self.updateTranscriptionEngine() + # set Transliteration status + printLog("Set Transliteration") + if config.CONVERT_MESSAGE_TO_ROMAJI is True or config.CONVERT_MESSAGE_TO_HIRAGANA is True: + model.startTransliteration() + self.initializationProgress(3) # set word filter diff --git a/src-python/model.py b/src-python/model.py index 9d29c2d0..6048c630 100644 --- a/src-python/model.py +++ b/src-python/model.py @@ -99,7 +99,7 @@ class Model: self.overlay_image = OverlayImage(config.PATH_LOCAL) self.mic_audio_queue = None self.mic_mute_status = None - self.transliterator = Transliterator() + self.transliterator = None self.watchdog = Watchdog(config.WATCHDOG_TIMEOUT, config.WATCHDOG_INTERVAL) self.osc_handler = OSCHandler(config.OSC_IP_ADDRESS, config.OSC_PORT) self.websocket_server = None @@ -277,6 +277,14 @@ class Model: self.previous_receive_message = message return repeat_flag + def startTransliteration(self): + if self.transliterator is None: + self.transliterator = Transliterator() + + def stopTransliteration(self): + if self.transliterator is not None: + self.transliterator = None + def convertMessageToTransliteration(self, message: str, hiragana: bool=True, romaji: bool=True) -> str: if hiragana is False and romaji is False: return message @@ -287,6 +295,9 @@ class Model: if romaji: keys_to_keep.add("hepburn") + if self.transliterator is None: + self.startTransliteration() + data_list = self.transliterator.analyze(message, use_macron=False) filtered_list = [ {key: value for key, value in item.items() if key in keys_to_keep} diff --git a/src-python/models/transliteration/transliteration_transliterator.py b/src-python/models/transliteration/transliteration_transliterator.py index 7c85ebee..b8e64f7d 100644 --- a/src-python/models/transliteration/transliteration_transliterator.py +++ b/src-python/models/transliteration/transliteration_transliterator.py @@ -7,7 +7,7 @@ except ImportError: class Transliterator: def __init__(self): - self.tokenizer_obj = dictionary.Dictionary().create() + self.tokenizer_obj = dictionary.Dictionary(dict_type="full").create() self.mode = tokenizer.Tokenizer.SplitMode.C @staticmethod @@ -22,7 +22,7 @@ class Transliterator: ) @staticmethod - def split_kanji_okurigana(surface: str, reading_kana: str): + def split_kanji_okurigana(surface: str, reading_kana: str, use_macron: bool = True): """ 1語の表層形(surface)と読み(reading_kana)を [ {"orig":..., "kana":..., "hira":..., "hepburn":...}, ... ] に分割 @@ -69,15 +69,13 @@ class Transliterator: # 空の読みを避ける if not kana_for_kan and kana_left: kana_for_kan = kana_left[:1] - - result.append( - { - "orig": part, - "kana": kana_for_kan, - "hira": Transliterator.kata_to_hira(kana_for_kan), - "hepburn": katakana_to_hepburn(kana_for_kan, use_macron=True) - } - ) + + result.append({ + "orig": part, + "kana": kana_for_kan, + "hira": Transliterator.kata_to_hira(kana_for_kan), + "hepburn": katakana_to_hepburn(kana_for_kan, use_macron=use_macron) + }) kana_left = kana_left[len(kana_for_kan):] else: # 非漢字部分(送り仮名など) @@ -87,14 +85,14 @@ class Transliterator: "orig": part, "kana": kana_for_okuri, "hira": Transliterator.kata_to_hira(kana_for_okuri), - "hepburn": katakana_to_hepburn(kana_for_okuri, use_macron=True) + "hepburn": katakana_to_hepburn(kana_for_okuri, use_macron=use_macron) } ) kana_left = kana_left[len(kana_for_okuri):] return result - def analyze(self, text: str, use_macron: bool = True): + def analyze(self, text: str, use_macron: bool = False): tokens = self.tokenizer_obj.tokenize(text, self.mode) results = [] @@ -103,7 +101,7 @@ class Transliterator: reading = t.reading_form() pos = t.part_of_speech() - if pos and pos[0] in ["記号", "補助記号"]: + if pos and pos[0] in ["記号", "補助記号", "空白"]: reading = surface if surface == reading: @@ -125,69 +123,9 @@ class Transliterator: "hepburn": katakana_to_hepburn(reading, use_macron=use_macron) }) else: - # 複数文字の場合は文字種別で分割 - i = 0 - reading_pos = 0 - - while i < len(surface): - char = surface[i] - - if self.is_kanji(char): - # 漢字の場合、連続する漢字をまとめて処理 - kanji_block = "" - while i < len(surface) and self.is_kanji(surface[i]): - kanji_block += surface[i] - i += 1 - - # 漢字ブロックの読みを推定 - if i < len(surface): - # 後に文字がある場合、送り仮名を考慮 - remaining_chars = len(surface) - i - kanji_reading = reading[reading_pos:-remaining_chars] if remaining_chars > 0 else reading[reading_pos:] - else: - # 最後の漢字ブロックの場合 - kanji_reading = reading[reading_pos:] - - # 空の読みを避ける - if not kanji_reading and reading_pos < len(reading): - kanji_reading = reading[reading_pos:] - if not kanji_reading and kanji_block: - # 読みが空だが漢字ブロックがある場合、残りの読みを全て割り当てる - kanji_reading = reading[reading_pos:] - - # reading_posの更新を正確に行うために、割り当てられた読みの長さをチェック - len_allocated_reading = len(kanji_reading) - if reading_pos + len_allocated_reading > len(reading): - len_allocated_reading = len(reading) - reading_pos - - results.append({ - "orig": kanji_block, - "kana": kanji_reading, - "hira": self.kata_to_hira(kanji_reading), - "hepburn": katakana_to_hepburn(kanji_reading, use_macron=use_macron) - }) - reading_pos += len_allocated_reading - else: - # 非漢字の場合 - non_kanji_block = "" - while i < len(surface) and not self.is_kanji(surface[i]): - non_kanji_block += surface[i] - i += 1 - - # 非漢字部分の読み(通常は文字数分、または残りの読みの分だけ) - len_block = len(non_kanji_block) - non_kanji_reading = reading[reading_pos:reading_pos + len_block] - - # 割り当てられた読みの長さ - len_allocated_reading = len(non_kanji_reading) - - results.append({ - "orig": non_kanji_block, - "kana": non_kanji_reading, - "hira": self.kata_to_hira(non_kanji_reading), - "hepburn": katakana_to_hepburn(non_kanji_reading, use_macron=use_macron) - }) - reading_pos += len_allocated_reading + # 複数文字の場合は既存のユーティリティで分割 + parts = self.split_kanji_okurigana(surface, reading, use_macron=use_macron) + results.extend(parts) return results From 3ee724622457d48b7ca45b6dd95770744dfd1447 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Sun, 5 Oct 2025 16:18:58 +0900 Subject: [PATCH 60/92] [Feature] Transliterator: Implement contextual transliteration rules and integrate with analysis method --- .../transliteration_context_rules.py | 134 +++++++++++++++ .../transliteration_transliterator.py | 156 ++++++++++++------ 2 files changed, 244 insertions(+), 46 deletions(-) create mode 100644 src-python/models/transliteration/transliteration_context_rules.py diff --git a/src-python/models/transliteration/transliteration_context_rules.py b/src-python/models/transliteration/transliteration_context_rules.py new file mode 100644 index 00000000..d0b5d339 --- /dev/null +++ b/src-python/models/transliteration/transliteration_context_rules.py @@ -0,0 +1,134 @@ +from typing import List, Dict +import re + +"""Contextual transliteration rules for tokenized results. + +This module provides a compact rule engine that can modify token +readings (kana) based on neighboring tokens. Rules are embedded in +``DEFAULT_RULES`` to simplify packaging (no external JSON required). + +Key points +- Rules are applied in descending ``priority`` order. +- Supported match modes: ``equals`` (exact match) and ``regex``. +- ``direction`` chooses whether to inspect the next or previous token. +- When a rule sets ``kana``, the engine overwrites ``kana`` and clears + ``hira``/``hepburn``; callers should recompute them after rules run. + +The engine mutates the provided ``results`` list in-place and also +returns it for convenience. +""" +DEFAULT_RULES = { + "rules": [ + { + "name": "nan_next_tdna", + "target": "何", + "match_mode": "equals", + "direction": "next", + "kana_set": list("タチツテトダヂヅデドナニヌネノ"), + "on_true": {"kana": "ナン"}, + "on_false": {"kana": "ナニ"} + } + ] +} + + + +def apply_context_rules(results: List[Dict], use_macron: bool = False) -> List[Dict]: + """Apply contextual rewrite rules to `results`. + + Parameters + - results: list of token dicts produced by Transliterator.split_kanji_okurigana + where each entry contains at least the keys: 'orig', 'kana', 'hira', 'hepburn'. + - use_macron: passed through for compatibility; rules themselves don't use it + + Returns + - The (possibly modified) `results` list. The list is also modified in-place. + + The engine supports 'equals' and 'regex' match modes, next/prev neighbor + inspection, and simple actions that overwrite `kana` (caller must recalc + `hira`/`hepburn` afterwards). + """ + + # prepare rules: sort by priority (desc) and precompile regex where provided + raw_rules = DEFAULT_RULES.get("rules", []) + rules = sorted(raw_rules, key=lambda r: r.get("priority", 0), reverse=True) + for r in rules: + if r.get("match_mode") == "regex" and r.get("pattern"): + try: + r["_re"] = re.compile(r["pattern"]) + except Exception: + r["_re"] = None + + i = 0 + n = len(results) + while i < n: + entry = results[i] + orig = entry.get("orig", "") + # skip tokens with empty orig (symbols, whitespace, etc.) + if not orig: + i += 1 + continue + + for rule in rules: + target = rule.get("target") + mode = rule.get("match_mode", "equals") + direction = rule.get("direction", "next") + kana_set = set(rule.get("kana_set", [])) + on_true = rule.get("on_true", {}) + on_false = rule.get("on_false", {}) + + matched = False + if mode == "equals" and orig == target: + matched = True + elif mode == "regex": + cre = rule.get("_re") + if cre and cre.search(orig): + matched = True + # regex or other modes can be added later + + if not matched: + continue + + # decide neighbor token based on direction + neighbor_entry = None + if direction == "next": + j = i + 1 + while j < n: + if results[j].get("orig"): + neighbor_entry = results[j] + break + j += 1 + elif direction == "prev": + j = i - 1 + while j >= 0: + if results[j].get("orig"): + neighbor_entry = results[j] + break + j -= 1 + + condition = False + if neighbor_entry: + nk = neighbor_entry.get("kana", "") + if nk: + first = nk[0] + if first in kana_set: + condition = True + else: + # fallback to orig-first-char check + fo = neighbor_entry.get("orig", "")[:1] + if fo and 'ァ' <= fo <= 'ン' and fo in kana_set: + condition = True + + # Apply action: simple overwrite of kana/hira/hepburn for the matched token + action = on_true if condition else on_false + if "kana" in action: + entry["kana"] = action["kana"] + entry["hira"] = "" + entry["hepburn"] = "" + # once a rule applied, do not apply further rules to this token + break + + i += 1 + + # return the (possibly modified) results for convenience/pure-function style usage + return results diff --git a/src-python/models/transliteration/transliteration_transliterator.py b/src-python/models/transliteration/transliteration_transliterator.py index b8e64f7d..e25b3be4 100644 --- a/src-python/models/transliteration/transliteration_transliterator.py +++ b/src-python/models/transliteration/transliteration_transliterator.py @@ -4,6 +4,10 @@ try: from .transliteration_kana_to_hepburn import katakana_to_hepburn except ImportError: from transliteration_kana_to_hepburn import katakana_to_hepburn +try: + from .transliteration_context_rules import apply_context_rules +except ImportError: + from transliteration_context_rules import apply_context_rules class Transliterator: def __init__(self): @@ -23,10 +27,24 @@ class Transliterator: @staticmethod def split_kanji_okurigana(surface: str, reading_kana: str, use_macron: bool = True): + """Split a single surface word and its kana reading into parts. + + Inputs: + - surface: the surface form (may contain kanji + kana) + - reading_kana: the katakana reading for the whole surface + + Output: + - a list of dicts: [{"orig": str, "kana": str, "hira": str, "hepburn": str}, ...] + + Notes: + - The function allocates portions of ``reading_kana`` to each contiguous + kanji/non-kanji block in ``surface``. Allocation is heuristic: an + initial allocation based on block length is used and any remainder is + distributed left-to-right preferring kanji blocks. + - This function is pure (no external side effects) and returns the + constructed list. """ - 1語の表層形(surface)と読み(reading_kana)を - [ {"orig":..., "kana":..., "hira":..., "hepburn":...}, ... ] に分割 - """ + result = [] # 表層を「漢字ブロック」と「非漢字ブロック」に分割 @@ -46,53 +64,73 @@ class Transliterator: # 読みを分配 kana_left = reading_kana - for i, (is_kan, part) in enumerate(blocks): - if is_kan: - # 漢字ブロックの処理 - if len(blocks) == 1: - # 単一ブロック(全て漢字)の場合 - kana_for_kan = kana_left - elif i == len(blocks) - 1: - # 最後のブロック(漢字)の場合 - kana_for_kan = kana_left - else: - # 中間の漢字ブロックの場合 - # 後続の非漢字ブロックの文字数を計算 - remaining_non_kanji = sum(len(p) for is_k, p in blocks[i+1:] if not is_k) - if remaining_non_kanji > 0 and len(kana_left) > remaining_non_kanji: - kana_for_kan = kana_left[:-remaining_non_kanji] - else: - # 漢字1文字あたり最低1文字の読みを割り当て - min_kana = len(part) - kana_for_kan = kana_left[:max(min_kana, len(kana_left) - remaining_non_kanji)] - - # 空の読みを避ける - if not kana_for_kan and kana_left: - kana_for_kan = kana_left[:1] + # We'll allocate kana to each block by initial guess = len(part) (characters) + # and distribute any remaining kana left-to-right preferring kanji blocks. + kana_len = len(kana_left) - result.append({ - "orig": part, - "kana": kana_for_kan, - "hira": Transliterator.kata_to_hira(kana_for_kan), - "hepburn": katakana_to_hepburn(kana_for_kan, use_macron=use_macron) - }) - kana_left = kana_left[len(kana_for_kan):] - else: - # 非漢字部分(送り仮名など) - kana_for_okuri = kana_left[:len(part)] - result.append( - { - "orig": part, - "kana": kana_for_okuri, - "hira": Transliterator.kata_to_hira(kana_for_okuri), - "hepburn": katakana_to_hepburn(kana_for_okuri, use_macron=use_macron) - } - ) - kana_left = kana_left[len(kana_for_okuri):] + # initial allocation per block + allocs = [len(part) for _, part in blocks] + allocated = sum(allocs) + remaining = kana_len - allocated + + # distribute extra kana to kanji blocks first (left-to-right) + if remaining > 0: + for idx, (is_kan, _) in enumerate(blocks): + if remaining <= 0: + break + if is_kan: + allocs[idx] += 1 + remaining -= 1 + # if still remaining, distribute to all blocks left-to-right + idx = 0 + while remaining > 0 and len(blocks) > 0: + allocs[idx] += 1 + remaining -= 1 + idx = (idx + 1) % len(blocks) + + # if remaining < 0 (reading shorter than base), shrink allocations from right + if remaining < 0: + # remove from rightmost blocks as needed + need = -remaining + idx = len(blocks) - 1 + while need > 0 and idx >= 0: + take = min(allocs[idx] - 1, need) if allocs[idx] > 1 else 0 + allocs[idx] -= take + need -= take + idx -= 1 + + # now slice kana_left according to allocs + pos = 0 + for (is_kan, part), cnt in zip(blocks, allocs): + kana_for_part = kana_left[pos:pos+cnt] + pos += cnt + result.append({ + "orig": part, + "kana": kana_for_part, + "hira": Transliterator.kata_to_hira(kana_for_part), + "hepburn": katakana_to_hepburn(kana_for_part, use_macron=use_macron) + }) return result def analyze(self, text: str, use_macron: bool = False): + """Tokenize ``text`` and produce per-subunit reading information. + + Returns a list of dicts for each token/sub-part with keys: + - orig: original surface string (one or more characters) + - kana: katakana reading for this part (may be adapted by context rules) + - hira: hiragana reading (derived from kana) + - hepburn: Latin transcription (derived from kana) + + Side-effects / notes: + - The function calls ``apply_context_rules(results, use_macron=...)`` + which both mutates ``results`` in-place and returns it. This method + safely accepts the returned list and then recalculates ``hira`` and + ``hepburn`` for entries whose ``kana`` was changed. + - If rule application fails, analysis still returns the best-effort + results. + """ + tokens = self.tokenizer_obj.tokenize(text, self.mode) results = [] @@ -127,11 +165,37 @@ class Transliterator: parts = self.split_kanji_okurigana(surface, reading, use_macron=use_macron) results.extend(parts) + # 文脈ルールを適用(別ファイル) + try: + results = apply_context_rules(results, use_macron=use_macron) or results + except Exception: + # ルール適用で失敗しても解析結果は返す + pass + + # apply_context_rules が kana を書き換えた場合、hira と hepburn を再計算 + for entry in results: + kana = entry.get("kana", "") + if kana: + entry["hira"] = self.kata_to_hira(kana) + entry["hepburn"] = katakana_to_hepburn(kana, use_macron=use_macron) + return results # --- テスト --- if __name__ == "__main__": + import pprint test_cases = [ + "向こうへ行く", + "行事を行う", + "上がる", + "上る", + "入り込む", + "何", + "何が好き?", + "何色が好き?", + "何色ありますか?", + "何語ですか?", + "テーブルに色鉛筆は何色ありますか?" "美しい花を見る", "東京に行く", "漢字とカタカナの混在", @@ -155,4 +219,4 @@ if __name__ == "__main__": transliterator = Transliterator() for case in test_cases: - print(transliterator.analyze(case)) \ No newline at end of file + pprint.pprint(transliterator.analyze(case), sort_dicts=False) \ No newline at end of file From ca07aef201ac23e15f17511b934dfab51026530f Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Sun, 5 Oct 2025 17:15:25 +0900 Subject: [PATCH 61/92] [Update] Translator: Add check to return original message if source and target languages are the same --- src-python/models/translation/translation_translator.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src-python/models/translation/translation_translator.py b/src-python/models/translation/translation_translator.py index 897fcd1b..a9a1a56a 100644 --- a/src-python/models/translation/translation_translator.py +++ b/src-python/models/translation/translation_translator.py @@ -100,6 +100,9 @@ class Translator(): def translate(self, translator_name, source_language, target_language, target_country, message): try: + if source_language == target_language: + return message + result = "" source_language, target_language = self.getLanguageCode(translator_name, target_country, source_language, target_language) match translator_name: From 4b0c7e9775bc9e821163408264d3492109cf89b7 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Sun, 5 Oct 2025 21:15:44 +0900 Subject: [PATCH 62/92] [Update] Controller and DeviceManager: Refactor device management methods for mic and speaker separation --- src-python/controller.py | 54 +++++++++++++++++++++--------------- src-python/device_manager.py | 54 +++++++++++++++++++++++++----------- 2 files changed, 70 insertions(+), 38 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index 26276856..1f6c03ef 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -86,27 +86,31 @@ class Controller: settings, ) - def restartAccessDevices(self) -> None: + def restartAccessMicDevices(self) -> None: if config.ENABLE_TRANSCRIPTION_SEND is True: self.startThreadingTranscriptionSendMessage() - if config.ENABLE_TRANSCRIPTION_RECEIVE is True: - self.startThreadingTranscriptionReceiveMessage() if config.ENABLE_CHECK_ENERGY_SEND is True: model.startCheckMicEnergy( self.progressBarMicEnergy, ) + + def restartAccessSpeakerDevices(self) -> None: + if config.ENABLE_TRANSCRIPTION_RECEIVE is True: + self.startThreadingTranscriptionReceiveMessage() if config.ENABLE_CHECK_ENERGY_RECEIVE is True: model.startCheckSpeakerEnergy( self.progressBarSpeakerEnergy, ) - def stopAccessDevices(self) -> None: + def stopAccessMicDevices(self) -> None: if config.ENABLE_TRANSCRIPTION_SEND is True: self.stopThreadingTranscriptionSendMessage() - if config.ENABLE_TRANSCRIPTION_RECEIVE is True: - self.stopThreadingTranscriptionReceiveMessage() if config.ENABLE_CHECK_ENERGY_SEND is True: model.stopCheckMicEnergy() + + def stopAccessSpeakerDevices(self) -> None: + if config.ENABLE_TRANSCRIPTION_RECEIVE is True: + self.stopThreadingTranscriptionReceiveMessage() if config.ENABLE_CHECK_ENERGY_RECEIVE is True: model.stopCheckSpeakerEnergy() @@ -1048,22 +1052,25 @@ class Controller: def getAutoMicSelect(*args, **kwargs) -> dict: return {"status":200, "result":config.AUTO_MIC_SELECT} + def applyAutoMicSelect(self) -> None: + device_manager.setCallbackProcessBeforeUpdateMicDevices(self.stopAccessMicDevices) + device_manager.setCallbackDefaultMicDevice(self.updateSelectedMicDevice) + device_manager.setCallbackProcessAfterUpdateMicDevices(self.restartAccessMicDevices) + device_manager.forceUpdateAndSetMicDevices() + def setEnableAutoMicSelect(self, *args, **kwargs) -> dict: if config.AUTO_MIC_SELECT is False: + self.applyAutoMicSelect() config.AUTO_MIC_SELECT = True - device_manager.setCallbackProcessBeforeUpdateDevices(self.stopAccessDevices) - device_manager.setCallbackDefaultMicDevice(self.updateSelectedMicDevice) - device_manager.setCallbackProcessAfterUpdateDevices(self.restartAccessDevices) - device_manager.forceUpdateAndSetMicDevices() return {"status":200, "result":config.AUTO_MIC_SELECT} @staticmethod def setDisableAutoMicSelect(*args, **kwargs) -> dict: if config.AUTO_MIC_SELECT is True: - config.AUTO_MIC_SELECT = False - device_manager.clearCallbackProcessBeforeUpdateDevices() + device_manager.clearCallbackProcessBeforeUpdateMicDevices() device_manager.clearCallbackDefaultMicDevice() - device_manager.clearCallbackProcessAfterUpdateDevices() + device_manager.clearCallbackProcessAfterUpdateMicDevices() + config.AUTO_MIC_SELECT = False return {"status":200, "result":config.AUTO_MIC_SELECT} @staticmethod @@ -1245,22 +1252,25 @@ class Controller: def getAutoSpeakerSelect(*args, **kwargs) -> dict: return {"status":200, "result":config.AUTO_SPEAKER_SELECT} + def applyAutoSpeakerSelect(self) -> None: + device_manager.setCallbackProcessBeforeUpdateSpeakerDevices(self.stopAccessSpeakerDevices) + device_manager.setCallbackDefaultSpeakerDevice(self.updateSelectedSpeakerDevice) + device_manager.setCallbackProcessAfterUpdateSpeakerDevices(self.restartAccessSpeakerDevices) + device_manager.forceUpdateAndSetSpeakerDevices() + def setEnableAutoSpeakerSelect(self, *args, **kwargs) -> dict: if config.AUTO_SPEAKER_SELECT is False: + self.applyAutoSpeakerSelect() config.AUTO_SPEAKER_SELECT = True - device_manager.setCallbackProcessBeforeUpdateDevices(self.stopAccessDevices) - device_manager.setCallbackDefaultSpeakerDevice(self.updateSelectedSpeakerDevice) - device_manager.setCallbackProcessAfterUpdateDevices(self.restartAccessDevices) - device_manager.forceUpdateAndSetSpeakerDevices() return {"status":200, "result":config.AUTO_SPEAKER_SELECT} @staticmethod def setDisableAutoSpeakerSelect(*args, **kwargs) -> dict: if config.AUTO_SPEAKER_SELECT is True: - config.AUTO_SPEAKER_SELECT = False - device_manager.clearCallbackProcessBeforeUpdateDevices() + device_manager.clearCallbackProcessBeforeUpdateSpeakerDevices() device_manager.clearCallbackDefaultSpeakerDevice() - device_manager.clearCallbackProcessAfterUpdateDevices() + device_manager.clearCallbackProcessAfterUpdateSpeakerDevices() + config.AUTO_SPEAKER_SELECT = False return {"status":200, "result":config.AUTO_SPEAKER_SELECT} @staticmethod @@ -2507,9 +2517,9 @@ class Controller: printLog("Init Auto Device Selection") if config.AUTO_MIC_SELECT is True: - self.setEnableAutoMicSelect() + self.applyAutoMicSelect() if config.AUTO_SPEAKER_SELECT is True: - self.setEnableAutoSpeakerSelect() + self.applyAutoSpeakerSelect() printLog("Init Overlay") if (config.OVERLAY_SMALL_LOG is True or config.OVERLAY_LARGE_LOG is True): diff --git a/src-python/device_manager.py b/src-python/device_manager.py index e2a8571a..7f741d26 100644 --- a/src-python/device_manager.py +++ b/src-python/device_manager.py @@ -178,7 +178,8 @@ class DeviceManager: sleep(1) enumerator.UnregisterEndpointNotificationCallback(cb) comtypes.CoUninitialize() - self.runProcessBeforeUpdateDevices() + self.runProcessBeforeUpdateMicDevices() + self.runProcessBeforeUpdateSpeakerDevices() sleep(2) for _ in range(10): self.update() @@ -186,7 +187,8 @@ class DeviceManager: break sleep(2) self.noticeUpdateDevices() - self.runProcessAfterUpdateDevices() + self.runProcessAfterUpdateMicDevices() + self.runProcessAfterUpdateSpeakerDevices() except Exception: errorLogging() finally: @@ -234,25 +236,45 @@ class DeviceManager: def clearCallbackSpeakerDeviceList(self): self.callback_speaker_device_list = None - def setCallbackProcessBeforeUpdateDevices(self, callback): - self.callback_process_before_update_devices = callback + def setCallbackProcessBeforeUpdateMicDevices(self, callback): + self.callback_process_before_update_mic_devices = callback - def clearCallbackProcessBeforeUpdateDevices(self): - self.callback_process_before_update_devices = None + def clearCallbackProcessBeforeUpdateMicDevices(self): + self.callback_process_before_update_mic_devices = None - def runProcessBeforeUpdateDevices(self): - if isinstance(self.callback_process_before_update_devices, Callable): - self.callback_process_before_update_devices() + def runProcessBeforeUpdateMicDevices(self): + if isinstance(self.callback_process_before_update_mic_devices, Callable): + self.callback_process_before_update_mic_devices() - def setCallbackProcessAfterUpdateDevices(self, callback): - self.callback_process_after_update_devices = callback + def setCallbackProcessAfterUpdateMicDevices(self, callback): + self.callback_process_after_update_mic_devices = callback - def clearCallbackProcessAfterUpdateDevices(self): - self.callback_process_after_update_devices = None + def clearCallbackProcessAfterUpdateMicDevices(self): + self.callback_process_after_update_mic_devices = None - def runProcessAfterUpdateDevices(self): - if isinstance(self.callback_process_after_update_devices, Callable): - self.callback_process_after_update_devices() + def runProcessAfterUpdateMicDevices(self): + if isinstance(self.callback_process_after_update_mic_devices, Callable): + self.callback_process_after_update_mic_devices() + + def setCallbackProcessBeforeUpdateSpeakerDevices(self, callback): + self.callback_process_before_update_speaker_devices = callback + + def clearCallbackProcessBeforeUpdateSpeakerDevices(self): + self.callback_process_before_update_speaker_devices = None + + def runProcessBeforeUpdateSpeakerDevices(self): + if isinstance(self.callback_process_before_update_speaker_devices, Callable): + self.callback_process_before_update_speaker_devices() + + def setCallbackProcessAfterUpdateSpeakerDevices(self, callback): + self.callback_process_after_update_speaker_devices = callback + + def clearCallbackProcessAfterUpdateSpeakerDevices(self): + self.callback_process_after_update_speaker_devices = None + + def runProcessAfterUpdateSpeakerDevices(self): + if isinstance(self.callback_process_after_update_speaker_devices, Callable): + self.callback_process_after_update_speaker_devices() def noticeUpdateDevices(self): if self.update_flag_default_mic_device is True: From b632d662253d324fde1197016c15473c7417aa94 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Mon, 6 Oct 2025 14:30:24 +0900 Subject: [PATCH 63/92] [Update] UI: Add disable translation function when config page has opened. --- .../ConfigPageCloseTriggerController.jsx | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src-ui/app/_app_controllers/ConfigPageCloseTriggerController.jsx b/src-ui/app/_app_controllers/ConfigPageCloseTriggerController.jsx index c04e2a1a..3a39be58 100644 --- a/src-ui/app/_app_controllers/ConfigPageCloseTriggerController.jsx +++ b/src-ui/app/_app_controllers/ConfigPageCloseTriggerController.jsx @@ -15,8 +15,14 @@ import { useStore_MainFunctionsStateMemory } from "@store"; export const ConfigPageCloseTriggerController = () => { const { currentIsOpenedConfigPage } = useIsOpenedConfigPage(); - const { currentMainFunctionsStateMemory, updateMainFunctionsStateMemory} = useStore_MainFunctionsStateMemory(); const { + currentMainFunctionsStateMemory, + updateMainFunctionsStateMemory, + } = useStore_MainFunctionsStateMemory(); + + const { + currentTranslationStatus, + setTranslation, currentTranscriptionSendStatus, setTranscriptionSend, currentTranscriptionReceiveStatus, @@ -34,12 +40,14 @@ export const ConfigPageCloseTriggerController = () => { const memorizeLatestMainFunctionsState = () => { updateMainFunctionsStateMemory({ + translation: currentTranslationStatus.data, transcription_send: currentTranscriptionSendStatus.data, transcription_receive: currentTranscriptionReceiveStatus.data, }); }; const restoreMainFunctionState = () => { + if (currentMainFunctionsStateMemory.data.translation === true) setTranslation(true); if (currentMainFunctionsStateMemory.data.transcription_send === true) setTranscriptionSend(true); if (currentMainFunctionsStateMemory.data.transcription_receive === true) setTranscriptionReceive(true); }; @@ -48,6 +56,7 @@ export const ConfigPageCloseTriggerController = () => { if (currentIsOpenedConfigPage.data === true) { // When config page is opened. memorizeLatestMainFunctionsState(); unregisterAll(); + if (currentTranslationStatus.data === true) setTranslation(false); if (currentTranscriptionSendStatus.data === true) setTranscriptionSend(false); if (currentTranscriptionReceiveStatus.data === true) setTranscriptionReceive(false); } else if (currentIsOpenedConfigPage.data === false) { // When config page is closed. From e2d401b8488556e8a611ef1fbaa7c32c89d001d3 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Mon, 6 Oct 2025 14:30:35 +0900 Subject: [PATCH 64/92] =?UTF-8?q?=E5=AE=9F=E8=A3=85=E6=BC=8F=E3=82=8C?= =?UTF-8?q?=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/backend_test.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src-python/backend_test.py b/src-python/backend_test.py index 6f1de883..54196ea1 100644 --- a/src-python/backend_test.py +++ b/src-python/backend_test.py @@ -368,6 +368,7 @@ class TestMainloop(): if expected_status == [401]: print(f"-> {Color.YELLOW}[SKIP]{Color.RESET} No test available for this endpoint: {endpoint}.") self.record_test_result(endpoint, None, None, expected_status) # テスト結果を記録 + success=True return success elif expected_status == [404]: print(f"-> {Color.RED}[ERROR]{Color.RESET} Unknown endpoint: {endpoint}.") @@ -474,6 +475,7 @@ class TestMainloop(): if expected_status == [401]: print(f"-> {Color.YELLOW}[SKIP]{Color.RESET} No test available for this endpoint: {endpoint}.") self.record_test_result(endpoint, None, None, expected_status) # テスト結果を記録 + success=True return success elif expected_status == [404]: print(f"-> {Color.RED}[ERROR]{Color.RESET} Unknown endpoint: {endpoint}.") @@ -669,11 +671,11 @@ if __name__ == "__main__": import traceback try: test = TestMainloop() - test.test_endpoints_on_off_all() - test.test_set_data_endpoints_all() - test.test_run_endpoints_all() - test.test_delete_data_endpoints_all() - # test.test_endpoints_all_random() + # test.test_endpoints_on_off_all() + # test.test_set_data_endpoints_all() + # test.test_run_endpoints_all() + # test.test_delete_data_endpoints_all() + test.test_endpoints_all_random() # test.test_endpoints_on_off_continuous() # test.test_endpoints_on_off_random() # test.test_endpoints_specific_random() From 4572aee2b71c51160c20b78a703043e39fc841b6 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Mon, 6 Oct 2025 16:40:05 +0900 Subject: [PATCH 65/92] [Update] Controller and Model: Refactor translation device management and add parameter change tracking --- src-python/controller.py | 79 +++++++++---------- src-python/model.py | 6 ++ .../translation/translation_translator.py | 7 ++ 3 files changed, 50 insertions(+), 42 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index 4015016f..f40a25ae 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -1,3 +1,4 @@ +import copy from typing import Callable, Any from time import sleep from subprocess import Popen @@ -753,25 +754,10 @@ class Controller: def setSelectedTranslationComputeDevice(self, device:str, *args, **kwargs) -> dict: printLog("setSelectedTranslationComputeDevice", device) - pre_device = config.SELECTED_TRANSLATION_COMPUTE_DEVICE - pre_compute_type = config.SELECTED_TRANSLATION_COMPUTE_TYPE config.SELECTED_TRANSLATION_COMPUTE_DEVICE = device config.SELECTED_TRANSLATION_COMPUTE_TYPE = "auto" - try: - model.changeTranslatorCTranslate2Model() - self.run(200, self.run_mapping["selected_translation_compute_type"], config.SELECTED_TRANSLATION_COMPUTE_TYPE) - except Exception as e: - # VRAM不足エラーの検出(デバイス切り替え時) - is_vram_error, error_message = model.detectVRAMError(e) - if is_vram_error: - # 前のデバイス設定に戻す - printLog("VRAM error detected, reverting device setting") - config.SELECTED_TRANSLATION_COMPUTE_DEVICE = pre_device - config.SELECTED_TRANSLATION_COMPUTE_TYPE = pre_compute_type - model.changeTranslatorCTranslate2Model() - else: - # その他のエラーは通常通り処理 - errorLogging() + self.run(200, self.run_mapping["selected_translation_compute_type"], config.SELECTED_TRANSLATION_COMPUTE_TYPE) + model.setChangedTranslatorParameters(True) return {"status":200,"result":config.SELECTED_TRANSLATION_COMPUTE_DEVICE} @staticmethod @@ -801,12 +787,39 @@ class Controller: # def getMaxSpeakerThreshold(*args, **kwargs) -> dict: # return {"status":200, "result":config.MAX_SPEAKER_THRESHOLD} - @staticmethod - def setEnableTranslation(*args, **kwargs) -> dict: + def setEnableTranslation(self, *args, **kwargs) -> dict: if config.ENABLE_TRANSLATION is False: - if model.isLoadedCTranslate2Model() is False: - model.changeTranslatorCTranslate2Model() - config.ENABLE_TRANSLATION = True + if model.isLoadedCTranslate2Model() is False or model.isChangedTranslatorParameters() is True: + try: + model.changeTranslatorCTranslate2Model() + model.setChangedTranslatorParameters(False) + config.ENABLE_TRANSLATION = True + except Exception as e: + # VRAM不足エラーの検出(デバイス切り替え時) + is_vram_error, error_message = model.detectVRAMError(e) + if is_vram_error: + # Defaultのデバイス設定に戻す + printLog("VRAM error detected, reverting device setting") + self.setDisableTranslation() + config.SELECTED_TRANSLATION_COMPUTE_DEVICE = copy.deepcopy(config.SELECTABLE_COMPUTE_DEVICE_LIST[0]) + config.SELECTED_TRANSLATION_COMPUTE_TYPE = "auto" + self.run(200, self.run_mapping["selected_translation_compute_device"], config.SELECTED_TRANSLATION_COMPUTE_DEVICE) + self.run(200, self.run_mapping["selected_translation_compute_type"], config.SELECTED_TRANSLATION_COMPUTE_TYPE) + self.run( + 400, + self.run_mapping["enable_translation"], + { + "message":"Translation disabled due to VRAM overflow", + "data": False + }, + ) + model.changeTranslatorCTranslate2Model() + model.setChangedTranslatorParameters(False) + else: + # その他のエラーは通常通り処理 + errorLogging() + else: + config.ENABLE_TRANSLATION = True return {"status":200, "result":config.ENABLE_TRANSLATION} @staticmethod @@ -1571,17 +1584,8 @@ class Controller: @staticmethod def setCtranslate2WeightType(data, *args, **kwargs) -> dict: - pre_weight_type = config.CTRANSLATE2_WEIGHT_TYPE config.CTRANSLATE2_WEIGHT_TYPE = str(data) - if model.checkTranslatorCTranslate2ModelWeight(config.CTRANSLATE2_WEIGHT_TYPE): - def callback(): - model.changeTranslatorCTranslate2Model() - th_callback = Thread(target=callback) - th_callback.daemon = True - th_callback.start() - th_callback.join() - else: - config.CTRANSLATE2_WEIGHT_TYPE = pre_weight_type + model.setChangedTranslatorParameters(True) return {"status":200, "result":config.CTRANSLATE2_WEIGHT_TYPE} @staticmethod @@ -1590,17 +1594,8 @@ class Controller: @staticmethod def setSelectedTranslationComputeType(data, *args, **kwargs) -> dict: - pre_compute_type = config.SELECTED_TRANSLATION_COMPUTE_TYPE config.SELECTED_TRANSLATION_COMPUTE_TYPE = str(data) - if model.checkTranslatorCTranslate2ModelWeight(config.CTRANSLATE2_WEIGHT_TYPE): - def callback(): - model.changeTranslatorCTranslate2Model() - th_callback = Thread(target=callback) - th_callback.daemon = True - th_callback.start() - th_callback.join() - else: - config.SELECTED_TRANSLATION_COMPUTE_TYPE = pre_compute_type + model.setChangedTranslatorParameters(True) return {"status":200, "result":config.SELECTED_TRANSLATION_COMPUTE_TYPE} @staticmethod diff --git a/src-python/model.py b/src-python/model.py index 6048c630..bbf43604 100644 --- a/src-python/model.py +++ b/src-python/model.py @@ -128,6 +128,12 @@ class Model: def isLoadedCTranslate2Model(self): return self.translator.isLoadedCTranslate2Model() + def isChangedTranslatorParameters(self): + return self.translator.isChangedTranslatorParameters() + + def setChangedTranslatorParameters(self, is_changed): + self.translator.setChangedTranslatorParameters(is_changed) + def checkTranscriptionWhisperModelWeight(self, weight_type:str): return checkWhisperWeight(config.PATH_LOCAL, weight_type) diff --git a/src-python/models/translation/translation_translator.py b/src-python/models/translation/translation_translator.py index a9a1a56a..a12b326e 100644 --- a/src-python/models/translation/translation_translator.py +++ b/src-python/models/translation/translation_translator.py @@ -23,6 +23,7 @@ class Translator(): self.ctranslate2_translator = None self.ctranslate2_tokenizer = None self.is_loaded_ctranslate2_model = False + self.is_changed_translator_parameters = False self.is_enable_translators = ENABLE_TRANSLATORS def authenticationDeepLAuthKey(self, authkey): @@ -64,6 +65,12 @@ class Translator(): def isLoadedCTranslate2Model(self): return self.is_loaded_ctranslate2_model + def isChangedTranslatorParameters(self): + return self.is_changed_translator_parameters + + def setChangedTranslatorParameters(self, is_changed): + self.is_changed_translator_parameters = is_changed + def translateCTranslate2(self, message, source_language, target_language): result = False if self.is_loaded_ctranslate2_model is True: From a167e57ff08f325383d56da94528b7e724bde470 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Tue, 7 Oct 2025 13:33:04 +0900 Subject: [PATCH 66/92] [Update] Test: Add translation tests for all language pairs and log results [Update] remove unsupported langs --- src-python/backend_test.py | 65 +++++++++++++++++++ .../translation/translation_languages.py | 6 +- 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/src-python/backend_test.py b/src-python/backend_test.py index 54196ea1..d56b1622 100644 --- a/src-python/backend_test.py +++ b/src-python/backend_test.py @@ -627,6 +627,70 @@ class TestMainloop(): self.test_delete_data_endpoints_single(endpoint) print("----データ削除系のエンドポイントのテスト終了----") + def test_translate_language(self, text): + """ + 指定された言語ペアで翻訳をテストする + :param text: 翻訳するテキスト + :return: 翻訳結果とステータスコード + """ + # エンドポイント + endpoint = "/run/send_message_box" + result, status = self.main.handleRequest(endpoint, text) + return result, status + + def test_translate_all_language_pairs(self): + results = {} + # 翻訳機能を有効にする + self.main.handleRequest("/set/enable/translation", None) + # 対応する言語コードのリストを取得 + self.config_dict["selectable_language_list"], _ = self.main.handleRequest("/get/data/selectable_language_list", None) + selectable_language_list = self.config_dict.get("selectable_language_list", None) + # すべての言語ペアで翻訳をテスト + for source_lang in selectable_language_list: + results[source_lang["language"]] = {} + for target_lang in selectable_language_list: + results[source_lang["language"]][target_lang["language"]] = {} + data = {} + for i in ["1", "2", "3"]: + data[i] = {} + data[i]["1"] = source_lang | {"enable": True} + self.main.handleRequest("/set/data/selected_your_languages", data) + data = {} + for i in ["1", "2", "3"]: + data[i] = {} + for j in ["1", "2", "3"]: + if j == "1": + data[i][j] = target_lang | {"enable": True} + else: + data[i][j] = target_lang | {"enable": False} + self.main.handleRequest("/set/data/selected_target_languages", data) + + # 翻訳エンジンを設定する(例: "CTranslate2") + self.config_dict["translation_engines"], _ = self.main.handleRequest("/get/data/translation_engines", None) + translation_engines = self.config_dict.get("translation_engines", None) + for engine in translation_engines: + results[source_lang["language"]][target_lang["language"]][engine] = None + data = {} + for i in ["1", "2", "3"]: + data[i] = engine + self.main.handleRequest("/set/data/selected_translation_engines", data) + + # テスト翻訳を実行 + print(f"Translating from {source_lang} to {target_lang} using {engine}") + result, status = self.test_translate_language({"id":"000001", "message":"こんにちわ 世界!"}) + if status == 200: + print(f"-> {Color.GREEN}[PASS]{Color.RESET} Translation from {source_lang} to {target_lang}: {result}") + results[source_lang["language"]][target_lang["language"]][engine] = True + else: + print(f"-> {Color.RED}[ERROR]{Color.RESET} Translation from {source_lang} to {target_lang} failed with status {status}") + results[source_lang["language"]][target_lang["language"]][engine] = False + # 翻訳機能を無効にする + self.main.handleRequest("/set/disable/translation", None) + print("----すべての言語ペアでの翻訳テスト終了----") + import json + with open("translation_test_results.json", "w", encoding="utf-8") as f: + json.dump(results, f, indent=4, ensure_ascii=False) + def generate_summary(self): """ テスト結果のサマリーを生成して表示する @@ -679,6 +743,7 @@ if __name__ == "__main__": # test.test_endpoints_on_off_continuous() # test.test_endpoints_on_off_random() # test.test_endpoints_specific_random() + # test.test_translate_all_language_pairs() test.generate_summary() except KeyboardInterrupt: print("Interrupted by user, shutting down...") diff --git a/src-python/models/translation/translation_languages.py b/src-python/models/translation/translation_languages.py index a697960b..2a660e17 100644 --- a/src-python/models/translation/translation_languages.py +++ b/src-python/models/translation/translation_languages.py @@ -176,7 +176,7 @@ dict_google_languages = { "Belarusian":"be", "Cebuano":"ceb", "Esperanto":"eo", - "Basque":"eu", + # "Basque":"eu", "Irish":"ga" } translation_lang["Google"] = { @@ -317,7 +317,7 @@ dict_ctranslate2_languages = { "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk", - "Telugu": "te", + # "Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn", @@ -328,7 +328,7 @@ dict_ctranslate2_languages = { "Estonian": "et", "Macedonian": "mk", "Breton": "br", - "Basque": "eu", + # "Basque": "eu", "Icelandic": "is", "Armenian": "hy", "Nepali": "ne", From 6c655b6043ae649edd8ab90fdc7ea1713fdc94c1 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Tue, 7 Oct 2025 19:01:55 +0900 Subject: [PATCH 67/92] [Update] OverlayImage: Improve font loading logic with error handling for font file paths --- src-python/controller.py | 8 ----- src-python/models/overlay/overlay_image.py | 37 ++++++++++++++++------ 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src-python/controller.py b/src-python/controller.py index f40a25ae..d5322134 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -1136,7 +1136,6 @@ class Controller: else: raise ValueError() except Exception: - errorLogging() response = { "status":400, "result":{ @@ -1178,7 +1177,6 @@ class Controller: else: raise ValueError() except Exception: - errorLogging() response = { "status":400, "result":{ @@ -1203,7 +1201,6 @@ class Controller: else: raise ValueError() except Exception: - errorLogging() response = { "status":400, "result":{ @@ -1228,7 +1225,6 @@ class Controller: else: raise ValueError() except Exception: - errorLogging() response = { "status":400, "result":{ @@ -1319,7 +1315,6 @@ class Controller: else: raise ValueError() except Exception: - errorLogging() response = { "status":400, "result":{ @@ -1360,7 +1355,6 @@ class Controller: else: raise ValueError() except Exception: - errorLogging() response = { "status":400, "result":{ @@ -1385,7 +1379,6 @@ class Controller: else: raise ValueError() except Exception: - errorLogging() response = { "status":400, "result":{ @@ -1411,7 +1404,6 @@ class Controller: else: raise ValueError() except Exception: - errorLogging() response = { "status":400, "result":{ diff --git a/src-python/models/overlay/overlay_image.py b/src-python/models/overlay/overlay_image.py index fec4c741..708ad11c 100644 --- a/src-python/models/overlay/overlay_image.py +++ b/src-python/models/overlay/overlay_image.py @@ -67,9 +67,16 @@ class OverlayImage: font_path = os_path.join(self.root_path, font_family) font = ImageFont.truetype(font_path, font_size) except Exception: - errorLogging() - font_path = os_path.join(os_path.dirname(__file__), "..", "..", "..", "fonts", font_family) - font = ImageFont.truetype(font_path, font_size) + # overlayフォルダから操作している場合 + if os_path.exists(os_path.join(os_path.dirname(__file__), "..", "..", "..", "fonts", font_family)): + font_path = os_path.join(os_path.dirname(__file__), "..", "..", "..", "fonts", font_family) + font = ImageFont.truetype(font_path, font_size) + elif os_path.exists(os_path.join(os_path.dirname(__file__), "fonts", font_family)): + # src-pythonフォルダから操作している場合 + font_path = os_path.join(os_path.dirname(__file__), "fonts", font_family) + font = ImageFont.truetype(font_path, font_size) + else: + raise FileNotFoundError(f"Font file not found: {font_family}") text_width = draw.textlength(text, font) character_width = text_width // len(text) @@ -171,9 +178,14 @@ class OverlayImage: font_path = os_path.join(self.root_path, font_family) font = ImageFont.truetype(font_path, font_size) except Exception: - errorLogging() - font_path = os_path.join(os_path.dirname(__file__), "..", "..", "..", "fonts", font_family) - font = ImageFont.truetype(font_path, font_size) + if os_path.exists(os_path.join(os_path.dirname(__file__), "..", "..", "..", "fonts", font_family)): + font_path = os_path.join(os_path.dirname(__file__), "..", "..", "..", "fonts", font_family) + font = ImageFont.truetype(font_path, font_size) + elif os_path.exists(os_path.join(os_path.dirname(__file__), "fonts", font_family)): + font_path = os_path.join(os_path.dirname(__file__), "fonts", font_family) + font = ImageFont.truetype(font_path, font_size) + else: + raise FileNotFoundError(f"Font file not found: {font_family}") # 改行を含んだtextの最大の文字数を計算する text_width = max(draw.textlength(line, font) for line in text.split("\n")) @@ -207,9 +219,16 @@ class OverlayImage: font_path = os_path.join(self.root_path, self.LANGUAGES["Default"]) font = ImageFont.truetype(font_path, font_size) except Exception: - errorLogging() - font_path = os_path.join(os_path.dirname(__file__), "..", "..", "..", "fonts", self.LANGUAGES["Default"]) - font = ImageFont.truetype(font_path, font_size) + # overlayフォルダから操作している場合 + if os_path.exists(os_path.join(os_path.dirname(__file__), "..", "..", "..", "fonts", self.LANGUAGES["Default"])): + font_path = os_path.join(os_path.dirname(__file__), "..", "..", "..", "fonts", self.LANGUAGES["Default"]) + font = ImageFont.truetype(font_path, font_size) + elif os_path.exists(os_path.join(os_path.dirname(__file__), "fonts", self.LANGUAGES["Default"])): + # src-pythonフォルダから操作している場合 + font_path = os_path.join(os_path.dirname(__file__), "fonts", self.LANGUAGES["Default"]) + font = ImageFont.truetype(font_path, font_size) + else: + raise FileNotFoundError(f"Font file not found: {self.LANGUAGES['Default']}") text_height = font_size + ui_padding text_width = draw.textlength(date_time, font) From 78a580f52112d563dbac1cbe0bf55e3ea30ef404 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 8 Oct 2025 13:01:16 +0900 Subject: [PATCH 68/92] =?UTF-8?q?OSCQuery=E3=81=AE=E5=90=8D=E5=89=8D?= =?UTF-8?q?=E8=A7=A3=E6=B1=BA=E3=81=AE=E3=81=9F=E3=82=81service=E5=90=8D?= =?UTF-8?q?=E3=82=92=E3=83=A6=E3=83=8B=E3=83=BC=E3=82=AF=E3=81=AB=E5=A4=89?= =?UTF-8?q?=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit service_nameを "VRCT" -> f"VRCT:{UTC}" --- src-python/models/osc/osc.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src-python/models/osc/osc.py b/src-python/models/osc/osc.py index a47f1f8d..ce64623b 100644 --- a/src-python/models/osc/osc.py +++ b/src-python/models/osc/osc.py @@ -1,4 +1,4 @@ -import asyncio +import time from typing import Any from time import sleep from threading import Thread @@ -120,7 +120,9 @@ class OSCHandler: while True: try: - self.osc_query_service = OSCQueryService(self.osc_query_service_name, self.http_port, self.osc_server_port) + # osc_server_name + UTC timestampでユニークなサービス名を生成 + service_name = f"{self.osc_query_service_name}:{int(time.time())}" + self.osc_query_service = OSCQueryService(service_name, self.http_port, self.osc_server_port) for filter, target in self.dict_filter_and_target.items(): self.osc_query_service.advertise_endpoint(filter, access=OSCAccess.READWRITE_VALUE) break From faec6e91ba8b414f09016477257ebca58791af99 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Wed, 8 Oct 2025 16:25:15 +0900 Subject: [PATCH 69/92] [bugfix] UI: Fix the bug that the main functions ware not in loading status even while restoring the status 'enable'. --- .../ConfigPageCloseTriggerController.jsx | 9 +++++++++ src-ui/logics/main/useMainFunction.js | 3 +++ 2 files changed, 12 insertions(+) diff --git a/src-ui/app/_app_controllers/ConfigPageCloseTriggerController.jsx b/src-ui/app/_app_controllers/ConfigPageCloseTriggerController.jsx index 3a39be58..f74c0327 100644 --- a/src-ui/app/_app_controllers/ConfigPageCloseTriggerController.jsx +++ b/src-ui/app/_app_controllers/ConfigPageCloseTriggerController.jsx @@ -23,10 +23,13 @@ export const ConfigPageCloseTriggerController = () => { const { currentTranslationStatus, setTranslation, + pendingTranslationStatus, currentTranscriptionSendStatus, setTranscriptionSend, + pendingTranscriptionSendStatus, currentTranscriptionReceiveStatus, setTranscriptionReceive, + pendingTranscriptionReceiveStatus, } = useMainFunction(); const { currentMicThresholdCheckStatus, @@ -47,6 +50,12 @@ export const ConfigPageCloseTriggerController = () => { }; const restoreMainFunctionState = () => { + // First, set loading status all before waiting a backend process. + if (currentMainFunctionsStateMemory.data.translation === true) pendingTranslationStatus(); + if (currentMainFunctionsStateMemory.data.transcription_send === true) pendingTranscriptionSendStatus(); + if (currentMainFunctionsStateMemory.data.transcription_receive === true) pendingTranscriptionReceiveStatus(); + + // Then, restore them. if (currentMainFunctionsStateMemory.data.translation === true) setTranslation(true); if (currentMainFunctionsStateMemory.data.transcription_send === true) setTranscriptionSend(true); if (currentMainFunctionsStateMemory.data.transcription_receive === true) setTranscriptionReceive(true); diff --git a/src-ui/logics/main/useMainFunction.js b/src-ui/logics/main/useMainFunction.js index 334ef643..06afc075 100644 --- a/src-ui/logics/main/useMainFunction.js +++ b/src-ui/logics/main/useMainFunction.js @@ -87,16 +87,19 @@ export const useMainFunction = () => { toggleTranslation, updateTranslationStatus, setTranslation, + pendingTranslationStatus, // Exception.(It shouldn't be used in other function, normally.) currentTranscriptionSendStatus, toggleTranscriptionSend, updateTranscriptionSendStatus, setTranscriptionSend, + pendingTranscriptionSendStatus, // Exception.(It shouldn't be used in other function, normally.) currentTranscriptionReceiveStatus, toggleTranscriptionReceive, updateTranscriptionReceiveStatus, setTranscriptionReceive, + pendingTranscriptionReceiveStatus, // Exception.(It shouldn't be used in other function, normally.) currentForegroundStatus, toggleForeground, From 28e952ada3663b4428f9ec350d7690415ab67ca2 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Thu, 9 Oct 2025 09:23:05 +0900 Subject: [PATCH 70/92] =?UTF-8?q?=F0=9F=91=8D=EF=B8=8F[Update]=20Version?= =?UTF-8?q?=203.2.2=20->=203.3.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/config.py | 2 +- src-tauri/tauri.conf.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src-python/config.py b/src-python/config.py index c350befa..9500b8fe 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -1048,7 +1048,7 @@ class Config: def init_config(self): # Read Only - self._VERSION = "3.2.2" + self._VERSION = "3.3.0" if getattr(sys, 'frozen', False): self._PATH_LOCAL = os_path.dirname(sys.executable) else: diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index 9e2402ef..8bc95f6b 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -1,7 +1,7 @@ { "$schema": "https://schema.tauri.app/config/2", "productName": "VRCT", - "version": "3.2.2", + "version": "3.3.0", "identifier": "com.vrct.app", "build": { "beforeDevCommand": "", From 5efa9c37d66352f9df15853d43b1766b06d25adb Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:11:59 +0900 Subject: [PATCH 71/92] Add documentation for modules and runtime instructions - Created detailed documentation for the device_manager, model, model_extra, osc, overlay, overlay_image, transcription, translation, transliteration, utils, watchdog, and websocket modules. - Added a comprehensive run events payloads document outlining the payloads sent during various run events in the controller. - Included runtime instructions and dependencies for setting up the project in a Windows environment. - Introduced a mypy configuration file to manage type checking and ignore errors in specific modules temporarily. --- src-python/docs/CHANGELOG.md | 27 + src-python/docs/CODING_RULES.md | 171 +++++ src-python/docs/README.md | 18 + src-python/docs/api.md | 704 +++++++++++++++++++++ src-python/docs/architecture.md | 21 + src-python/docs/diagrams.md | 51 ++ src-python/docs/modules/config.md | 203 ++++++ src-python/docs/modules/controller.md | 158 +++++ src-python/docs/modules/device_manager.md | 73 +++ src-python/docs/modules/model.md | 105 +++ src-python/docs/modules/model_extra.md | 60 ++ src-python/docs/modules/osc.md | 17 + src-python/docs/modules/overlay.md | 31 + src-python/docs/modules/overlay_image.md | 115 ++++ src-python/docs/modules/transcription.md | 51 ++ src-python/docs/modules/translation.md | 21 + src-python/docs/modules/transliteration.md | 17 + src-python/docs/modules/utils.md | 74 +++ src-python/docs/modules/watchdog.md | 12 + src-python/docs/modules/websocket.md | 18 + src-python/docs/run_events_payloads.md | 125 ++++ src-python/docs/runtime.md | 43 ++ src-python/mypy.ini | 32 + 23 files changed, 2147 insertions(+) create mode 100644 src-python/docs/CHANGELOG.md create mode 100644 src-python/docs/CODING_RULES.md create mode 100644 src-python/docs/README.md create mode 100644 src-python/docs/api.md create mode 100644 src-python/docs/architecture.md create mode 100644 src-python/docs/diagrams.md create mode 100644 src-python/docs/modules/config.md create mode 100644 src-python/docs/modules/controller.md create mode 100644 src-python/docs/modules/device_manager.md create mode 100644 src-python/docs/modules/model.md create mode 100644 src-python/docs/modules/model_extra.md create mode 100644 src-python/docs/modules/osc.md create mode 100644 src-python/docs/modules/overlay.md create mode 100644 src-python/docs/modules/overlay_image.md create mode 100644 src-python/docs/modules/transcription.md create mode 100644 src-python/docs/modules/translation.md create mode 100644 src-python/docs/modules/transliteration.md create mode 100644 src-python/docs/modules/utils.md create mode 100644 src-python/docs/modules/watchdog.md create mode 100644 src-python/docs/modules/websocket.md create mode 100644 src-python/docs/run_events_payloads.md create mode 100644 src-python/docs/runtime.md create mode 100644 src-python/mypy.ini diff --git a/src-python/docs/CHANGELOG.md b/src-python/docs/CHANGELOG.md new file mode 100644 index 00000000..723f3942 --- /dev/null +++ b/src-python/docs/CHANGELOG.md @@ -0,0 +1,27 @@ +# CHANGELOG + +## 2025-10-09 — 型チェック整備と安全性向上 + +- 修正: `controller.py` + - `Controller.chatMessage` の戻り値注釈を `dict` に明示(関数は JSON 系の応答オブジェクトを返します)。 + - `Controller.checkSoftwareUpdated` が実際に応答を返すように `return` を追加。 + +- 修正: `model.py` + - `startCheckMicEnergy` / `startCheckSpeakerEnergy` のコールバック引数を Optional に変更し、呼び出し前に `callable` チェックを追加。これにより None を渡しても安全に扱えるようになりました。 + - `convertMessageToTransliteration` の返り値を常に list に統一。hiragana/romaji が False の場合は空リストを返します。 + - `createOverlayImageLargeLog` 等の Overlay 作成関数で `target_language` を dict で受けた場合に内部で言語リストへ正規化する挙動を明確化。 + +- 目的: mypy の型チェックの警告/エラーを削減し、ランタイムでの None 呼び出しによるクラッシュを防止するための低リスクな変更です。 + +- 注記: + - 追加で `types-requests` をプロジェクト仮想環境にインストールし、mypy の外部型スタブ不足を解消しました。 + - 本チェンジは内部の型注釈とガードを中心としており、動作ロジックの大きな変更は行っていません。動作確認は mypy(型チェック)と ruff(lint)を通過したことをもって行っています。 + +## 1.0.0 (initial) +- 初回ドキュメント作成: ソースコードに基づく仕様書 / 詳細設計書を docs 配下に追加。 +- 対象: utils, model, controller, device_manager, config, translation, transcription, overlay, websocket, osc, transliteration, watchdog + +今後の作業候補: +- requirements.txt の自動生成とテストスイート追加 +- ドキュメントの API サンプル(リクエスト/レスポンス)追加 +- UML 図/シーケンス図の画像化 diff --git a/src-python/docs/CODING_RULES.md b/src-python/docs/CODING_RULES.md new file mode 100644 index 00000000..19a626b0 --- /dev/null +++ b/src-python/docs/CODING_RULES.md @@ -0,0 +1,171 @@ +# VRCT backend — コーディングルール + +目的: +- 可読性と保守性を保ちながら既存スタイルを尊重する。 +- 漸進的に型注釈を導入し、mypy と ruff のチェックに合わせる。 +- 自動化(CI / pre-commit)へ導出しやすくする。 + +注意: 既存の命名・構造(関数名・クラス名・変数名・run mapping のキー等)はコード上の互換性のためそのまま維持します。以下は新規実装やリファクタ時に従うべきルールです。 + +## 目次 +- 命名規則 +- モジュール・パッケージ構成 +- インポート +- 型注釈と mypy 方針 +- ドキュメンテーション / docstrings +- エラーハンドリングとロギング +- 非同期 / スレッド / キューの扱い +- テストと CI +- リファクタ・互換性の観点 + +--- + +## 命名規則 +- モジュール名: 小文字、アンダースコアで区切る(例: `overlay_utils.py`)。既存ファイルに従う。 +- パッケージ名: 小文字(`models`, `websocket` など)。 +- クラス名: CapWords (PascalCase)。既存クラス(`Controller`, `Model`, `Overlay`)に従う。 +- 関数・メソッド名: snake_case。 +- 変数名: snake_case。短い一時変数は `i`, `j`, `buf` 等の伝統的な省略形を可とするが、意味ある名前を優先する。 +- 定数: UPPER_SNAKE_CASE(`config.py` の定数に合わせる)。 +- run_mapping のキー: 現在は短い key(例: `transcription_mic`)を内部で使い `run_mapping` に `/run/...` を置いている。この慣習は維持する。Controller 内で `self.run_mapping[...]` を直接参照する実装は許容される。 + +例: `selected_translation_compute_device` は内部 key、`/run/selected_translation_compute_device` が外部イベント名である点を区別して使う。 + +## モジュール・パッケージ構成 +- 各サブ領域(ocr, overlay, transcription, translation, websocket 等)は `models/` 下に整理済みのため、同様の粒度で新機能は追加する。 +- パッケージは必ず `__init__.py` を置く(static analysis / mypy のため)。空の `__init__.py` でも可。これにより相対インポートが安定する。 + +## インポート +- 標準ライブラリ、サードパーティ、ローカルの順でインポートをまとめる。 +- ローカルモジュールを参照する場合は相対 import を使ってもよいが、プロジェクト全体を PYTHONPATH に入れてテスト/静的解析できるようにすること。 +- 例: +``` +import os +import json + +import numpy as np + +from . import overlay_utils +``` + +## 型注釈と mypy 方針 +- 戦略: Relax + incremental annotations(漸進的型付け)。以下を守る。 + - 新規コードは可能な限り型注釈を追加する(関数シグネチャ・返り値)。 + - 既存の大きな関数は段階的に注釈する。まずモジュール境界(public API)のシグネチャに注釈を入れる。内部の細かい変数は後回し。 + - CI では初期段階で mypy を `--ignore-missing-imports --allow-untyped-defs --allow-redefinition` のように緩めて実行する。段階的に `--check-untyped-defs` を有効化していく。 + - 型の `Any` を多用しない。どうしても必要な場合は `# type: ignore[assignment]` を付けて理由をコメントに残す。 + +## docstrings / コメント +- 重要な public 関数・メソッドとクラスに短い docstring を追加する(目的・引数・返り値の要約)。Google/Numpy スタイルのどちらかに統一する必要はないが、プロジェクト内で混乱しないよう短く統一すること。 +- 実装トリッキーな箇所には `# NOTE:` や `# FIXME:` コメントを残し、必要なら issue を紐付ける(例: `# NOTE: keep in sync with mainloop.run_mapping`)。 + +## エラーハンドリングとロギング +- 例外をキャッチするときは有用なコンテキストをログに残す(`errorLogging()` のようなユーティリティを使う)。 +- broad except: を使う場合は最低限 `errorLogging()` を呼び、必要なら `raise` して上位へ伝播する。 + +## 非同期 / スレッド / キューの扱い +- スレッドは `threading.Thread` を使っている箇所があるため、スレッド間通信は `queue.Queue` ベースで実装すること。 +- スレッドを生成する関数は `start_` プレフィックス(例: `start_transcription_thread`)のように命名すると分かりやすい。 + +## テスト・CI +- まずは軽量な CI ワークフローを入れる: + - ruff check + - mypy (relaxed) + - 自動テスト(将来的に pytest を追加) +- pre-commit フックの導入を推奨: ruff auto-fix と isort (import 整理) を採用できる。 + +## リファクタ・互換性 +- 既存 public API(stdin/stdout の endpoint 仕様や `run_mapping` のキー、Controller のメソッド名)は後方互換を優先する。変更が必要な場合は CHANGELOG に明示する。 + +## 小さなコーディング規約チェックリスト(PR テンプレ) +- 新しい public メソッドには docstring を付けたか +- 既存命名規則に従っているか(snake_case / PascalCase / UPPER_SNAKE_CASE) +- 型アノテーションをシグネチャに追加したか(可能な限り) +- 直接 stdout に JSON を print する箇所は `printResponse` 等ユーティリティ経由か確認する + +--- + +このドキュメントは現状のスタイルを尊重して最小限の規則を与えることを目的としています。次のステップを希望する場合: +- CI に ruff/mypy を組み込む PR を作成 +- pre-commit 用の設定ファイル(`.pre-commit-config.yaml`)を追加して自動整形を導入 +- 型注釈・テストのためのタスク分割(優先順位をつけた TODO) + +要望があれば、これをベースに `.pre-commit-config.yaml`、`pyproject.toml` の ruff 設定、あるいは CI ワークフローの雛形(GitHub Actions)を作成します。 + +## Copilot と共同作業するための具体例とテンプレート +以下は Copilot に推奨プロンプトを投げやすく、また PR 作成時に便利なテンプレート類です。コピー&ペーストして使用してください。 + +### 関数テンプレート(型注釈 + docstring) +```python +from typing import Any, Dict, Optional + +def example_handler(endpoint: str, data: Any) -> Dict[str, Any]: + """Handle an example endpoint. + + Args: + endpoint: incoming endpoint string (e.g. '/get/data/version') + data: request payload (None for many GETs) + + Returns: + A dict suitable for printResponse(status, endpoint, result) + """ + # implementation... + result = {"status": 200, "endpoint": endpoint, "result": data} + return result +``` + +### Controller の run 発行パターン(推奨) +Controller 内で run を呼ぶときは `self.run(self.run_mapping["key"], payload)` の形を維持してください。Copilot に尋ねるときは「この run key に対応する payload の形は?」と聞くとペイロード例を生成しやすいです。 + +### Docstring 例(Google スタイル) +```python +def set_selected_tab_no(tab_no: int) -> Dict[str, Any]: + """Set the current tab. + + Args: + tab_no: index of tab to select + + Returns: + A response dict with status and new tab number + """ + ... +``` + +### PR チェックリスト(拡張版) +- コーディング規則に従っているか +- 新しい public API の docstring があるか +- 型注釈を最小限追加しているか(特に関数シグネチャ) +- ruff check が通るか +- mypy(relaxed)で重大な型エラーが出ていないか +- docs (必要箇所) を更新したか(API 変更があれば) + +### 推奨 `.pre-commit-config.yaml`(例) +```yaml +repos: + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.14.0 + hooks: + - id: ruff + args: ["--fix"] + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.18.2 + hooks: + - id: mypy + args: ["--ignore-missing-imports", "--allow-untyped-defs", "--allow-redefinition"] +``` + +### 推奨 ruff 設定(pyproject.toml への最小設定例) +```toml +[tool.ruff] +line-length = 88 +extend-ignore = ["E203"] +select = ["E", "F", "W", "C90"] +``` + +--- + +更新が必要なら私が `.pre-commit-config.yaml`、`pyproject.toml`、および CI ワークフロー (GitHub Actions) の雛形を作成してコミットまでできます。どれを優先しますか? diff --git a/src-python/docs/README.md b/src-python/docs/README.md new file mode 100644 index 00000000..be03a5d0 --- /dev/null +++ b/src-python/docs/README.md @@ -0,0 +1,18 @@ +# VRCT — ドキュメント + +このドキュメントセットは、VRCT プロジェクト(`src-python`)に含まれる実装の仕様書 / 設計書 / 詳細設計書です。 + +目的 +- ソースコード構造、モジュール間データフロー、API エンドポイント、設定、実行手順、トラブルシュートを網羅して開発・運用の参照を容易にする。 + +対象 +- `utils.py`, `model.py`, `controller.py`, `mainloop.py`, `device_manager.py`, `config.py` および `models/` 以下の全モジュール。 + +ドキュメント構成(主要ファイル) +- `architecture.md` — アーキテクチャ概観 +- `modules/` — 各モジュールごとの詳細設計(個別ファイル) +- `api.md` — 外部/内部向け API エンドポイント マッピング(`mainloop.py` の `mapping` / `run_mapping` に準拠) +- `runtime.md` — 実行/セットアップ手順、依存関係 +- `diagrams.md` — システム図(Mermaid とテキスト両方) +- `CODING_RULES.md` — プロジェクト固有のコーディング規約(命名・型方針・lint/mypy 方針 等) +- `CHANGELOG.md` — 変更履歴 \ No newline at end of file diff --git a/src-python/docs/api.md b/src-python/docs/api.md new file mode 100644 index 00000000..373e29d4 --- /dev/null +++ b/src-python/docs/api.md @@ -0,0 +1,704 @@ +--- + + + +## API エンドポイント仕様 + +概要 +- このドキュメントは `mainloop.py` の `mapping` と `run_mapping` に定義された全エンドポイントを列挙します。 +- すべてのリクエストは標準入力経由で JSON を一行送る形で受信され、標準出力へ JSON 応答を出力します。 + +共通リクエスト形式 +- JSON オブジェクトを 1 行で標準入力に流します。 +- フィールド: + - `endpoint`: エンドポイント文字列 (例: `/get/data/version`) + - `data`: 任意(多くの GET 系は null、SET 系は新しい値やオブジェクト) + +例 +```json +{"endpoint":"/get/data/version","data":null} +``` + +共通レスポンス形式 +- mainloop は各リクエストの処理結果を次の形式で標準出力に出します(内部 util の `printResponse` を経由): + +成功例: +```json +{"status":200,"endpoint":"/get/data/version","result":"3.2.2"} +``` + +エラー例: +```json +{"status":400,"endpoint":"/set/data/osc_ip_address","result":{"message":"Invalid IP address","data":"127.0.0.1"}} +``` + +ロック状態と再試行 +- `mapping` にある各ハンドラは `"status": True|False` を持ちます。 + - False の場合、`handleRequest` は 423 (Locked endpoint) を返し、メインのハンドラはその要求をキューに戻して待機します(遅延再実行のため)。 + +run イベント +- `controller` は UI 更新などの非同期通知を行うために `run(status, endpoint, payload)` を呼び出します。これらは `run_mapping` にマップされ、外部 UI には `/run/...` 形式のエンドポイントで配信されます。 + +以下は `controller.py` から抽出した run イベントと、実際に送られるペイロードの具体例です。UI 側はこれらの JSON 形状を期待することで正しく動作します。 + +`/run/connected_network` (200) + - payload: true | false + +`/run/enable_ai_models` (200) + - payload: true | false + +`/run/mic_host_list` (200) + - payload: ["Host 1", "Host 2"] + +`/run/mic_device_list` (200) + - payload: ["Microphone (Realtek)", "Headset Microphone"] + +`/run/speaker_device_list` (200) + - payload: ["Speakers (Realtek)", "Headset"] + +`/run/initialization_complete` (200) + - payload: dict mapping endpoint -> current value (constructed from init_mapping) + - 例: {"/get/data/version":"3.2.2","/get/data/selected_tab_no":0} + +`/run/selected_mic_device` (200) + - payload: {"host": , "device": } + +`/run/selected_speaker_device` (200) + - payload: string (device name) + +`/run/error_device` (400) + - payload: {"message":"No mic device detected","data": null} + +`/run/check_mic_volume` (200) + - payload: numeric energy value (float) + +`/run/check_speaker_volume` (200) + - payload: numeric energy value (float) + +`/run/download_progress_ctranslate2_weight` (200) + - payload: {"weight_type":"m2m100_418m","progress":0.42} + +`/run/downloaded_ctranslate2_weight` (200) + - payload: "m2m100_418m" + +`/run/error_ctranslate2_weight` (400) + - payload: {"message":"CTranslate2 weight download error","data": null} + +`/run/download_progress_whisper_weight` (200) + - payload: {"weight_type":"base","progress":0.78} + +`/run/downloaded_whisper_weight` (200) + - payload: "base" + +`/run/error_whisper_weight` (400) + - payload: {"message":"Whisper weight download error","data": null} + +`/run/word_filter` (200) + - payload: {"message":"Detected by word filter: "} + +`/run/error_translation_engine` (400) + - payload: {"message":"Translation engine limit error","data": null} + +`/run/error_translation_mic_vram_overflow` (400) + - payload: {"message":"VRAM out of memory during translation of mic","data":""} + +`/run/error_translation_speaker_vram_overflow` (400) + - payload: {"message":"VRAM out of memory during translation of speaker","data":""} + +`/run/error_translation_chat_vram_overflow` (400) + - payload: {"message":"VRAM out of memory during translation of chat","data":""} + +`/run/enable_translation` (200/400) + - payload: on OOM: {"message":"Translation disabled due to VRAM overflow","data": false} + +`/run/transcription_send_mic_message` (200) + - payload: + { + "original": {"message": "Hello", "transliteration": []}, + "translations": [ {"message":"こんにちは","transliteration":[]}, ... ] + } + +`/run/transcription_receive_speaker_message` (200) + - payload: same shape as `/run/transcription_send_mic_message` + +`/run/software_update_info` (200) + - payload: e.g. {"has_update": true, "latest_version": "3.3.0"} + +`/run/selected_translation_compute_type` (200) + - payload: string ("auto"|"cpu"|"cuda:0") + +`/run/selected_transcription_compute_type` (200) + - payload: string + +`/run/selected_translation_compute_device` (200) + - payload: device descriptor (e.g. {"name":"cuda:0","type":"gpu"}) + +`/run/selected_translation_engines` (200) + - payload: config.SELECTED_TRANSLATION_ENGINES (list/dict per tab) + +`/run/translation_engines` (200) + - payload: ["CTranslate2"] + +`/run/initialization_progress` (200) + - payload: integer (1..4) + +`/run/enable_osc_query` (200) + - payload: {"data": true|false, "disabled_functions": ["vrc_mic_mute_sync"]} + + +エンドポイント一覧(mapping にある全エンドポイント) + +注: 各行の説明では、`method` 的な概念はありません。すべてのエンドポイントは JSON リクエストで同様に呼び出します。`data` の期待値は説明に記載しています。 + +1) メイン操作 + +- /set/enable/translation — data: null — 翻訳を有効にします。 + - 成功応答例: + ```json + {"status":200, "endpoint":"/set/enable/translation", "result": true} + ``` + - 失敗例(VRAM OOM を検出して無効化されたケースは run イベントで通知されます): + ```json + {"status":400, "endpoint":"/set/enable/translation", "result":{"message":"Translation disabled due to VRAM overflow","data":false}} + ``` + +- /set/disable/translation — data: null — 翻訳を無効にします。 + - 成功応答例: + ```json + {"status":200, "endpoint":"/set/disable/translation", "result": false} + ``` + +- /set/enable/transcription_send — data: null — マイク転写(送信)を有効化します。 + - 実行はスレッドで開始される場合がある。成功例: + ```json + {"status":200, "endpoint":"/set/enable/transcription_send", "result": true} + ``` + +- /set/disable/transcription_send — data: null — 停止要求。成功例: + ```json + {"status":200, "endpoint":"/set/disable/transcription_send", "result": false} + ``` + +- /set/enable/transcription_receive — data: null — スピーカー側の転写を有効化します。 +- /set/disable/transcription_receive — data: null — 無効化します。 + +- /set/enable/foreground — data: null — フォアグラウンド表示を有効化します。 + - 成功例: {"status":200, "endpoint":"/set/enable/foreground", "result": true} + +- /get/data/selected_tab_no — data: null — 現在のタブ番号を返します。 + - 例: {"status":200, "endpoint":"/get/data/selected_tab_no", "result": 0} + +- /get/data/main_window_sidebar_compact_mode — data: null — サイドバーのコンパクト表示の現在値を返します。 + - 例: {"status":200, "endpoint":"/get/data/main_window_sidebar_compact_mode","result": false} + + +- /set/data/selected_tab_no — data: int — タブ番号を設定します。 + - リクエスト例: {"endpoint":"/set/data/selected_tab_no","data":1} + - 成功応答例: {"status":200, "endpoint":"/set/data/selected_tab_no","result":1} + +- /get/data/translation_engines — data: null — 利用可能な翻訳エンジン一覧を返します。 + - 例: {"status":200, "endpoint":"/get/data/translation_engines","result":["CTranslate2"]} + +- /get/data/selectable_language_list — data: null — 選択可能な言語一覧(言語コード, country 等を含むデータ構造) + - 例: {"status":200, "endpoint":"/get/data/selectable_language_list","result":[{"language":"English","country":"US"},{"language":"Japanese","country":"JP"}]} + +- /get/data/transcription_engines — data: null — 利用可能な転写エンジン一覧 + - 例: {"status":200, "endpoint":"/get/data/transcription_engines","result":["Google","Whisper"]} + + +- /run/send_message_box — data: {"id": <任意>, "message": "..."} + - 内部で `Controller.chatMessage` を呼び出します。戻りは変換済メッセージ構造体。 + - リクエスト例: + ```json + {"endpoint":"/run/send_message_box","data":{"id":123,"message":"Hello"}} + ``` + - 成功応答例: + ```json + {"status":200,"endpoint":"/run/send_message_box","result":{"id":123,"original":{"message":"Hello","transliteration":[]},"translations":[{"message":"","transliteration":[]}]}} + ``` + +- /run/typing_message_box — data: null — OSC でタイピング状態を伝える場合に使用。成功例: {"status":200,...} +- /run/stop_typing_message_box — data: null — 停止。 + +- /run/send_text_overlay — data: object — オーバーレイに表示するテキストを更新します。例: {"text":"Hello","lang":"English"} + - 成功応答は送信した data をそのまま返すことが多い。 + +- /run/swap_your_language_and_target_language — data: null — 選択中の入出力言語を入れ替えます。成功例: {"status":200, ...} + + +/run/update_software — data: null — 非同期でアップデート処理を開始します。成功応答: {"status":200, "result": true} +/run/update_cuda_software — data: null — CUDA アップデートを開始します。 + + +/set/enable/transcription_receive — data: null — スピーカー側の転写(受信)を有効化 +/set/disable/transcription_receive — data: null — 無効化 + + +/set/enable/foreground — data: null — フォアグラウンド表示を有効化 +/set/disable/foreground — data: null — 無効化 + +- /get/data/selected_tab_no — data: null — 現在のタブ番号を返す +- /set/data/selected_tab_no — data: int — タブ番号を設定 + +- /get/data/translation_engines — data: null — 使える翻訳エンジン一覧を返す + +- /get/data/selected_translation_engines — data: null — 各タブで選択されている翻訳エンジン(タブ別辞書) + - 例: {"status":200, "endpoint":"/get/data/selected_translation_engines","result":{"0":["CTranslate2"],"1":["CTranslate2"]}} + +- /get/data/selected_your_languages — data: null — 各タブの入力言語設定 + - 例: {"status":200, "endpoint":"/get/data/selected_your_languages","result":{"0":{"language":"English","enable":true}}} + +- /get/data/selected_target_languages — data: null — 各タブの出力言語設定 + - 例: {"status":200, "endpoint":"/get/data/selected_target_languages","result":{"0":{"1":{"language":"Japanese","enable":true}}}} + +- /get/data/selected_transcription_engine — data: null — 現在選択されている転写エンジン + - 例: {"status":200, "endpoint":"/get/data/selected_transcription_engine","result":"Whisper"} + +- /run/send_message_box — data: {"id":..., "message": "..."} — チャット送信を実行(chatMessage を内部呼び出し) +- /run/typing_message_box — data: null — タイピング開始通知(OSC 経由で送信される場合あり) +- /run/stop_typing_message_box — data: null — タイピング停止 + +- /run/send_text_overlay — data: {text settings...} — オーバーレイ用のテキスト表示を更新 + +- /run/swap_your_language_and_target_language — data: null — 入出力言語を入れ替え + +- /run/update_software — data: null — ソフト更新処理をスレッドで開始 +- /run/update_cuda_software — data: null — CUDA 関連更新を開始 + +2) 表示・外観設定 +- /get/data/version — data: null — アプリ版を返す +- /get/data/transparency — data: null — 透過率 +- /set/data/transparency — data: int — 透過率を設定 +- /get/data/ui_scaling — data: null — UI スケール +- /set/data/ui_scaling — data: int +- /get/data/textbox_ui_scaling, /set/data/textbox_ui_scaling +- /get/data/message_box_ratio, /set/data/message_box_ratio +- /get/data/send_message_button_type, /set/data/send_message_button_type +- /get/data/show_resend_button, /set/enable/show_resend_button, /set/disable/show_resend_button +- /get/data/font_family, /set/data/font_family +- /get/data/ui_language, /set/data/ui_language +- /get/data/main_window_geometry, /set/data/main_window_geometry + +3) 計算デバイス関連 +- /get/data/compute_mode — data: null — compute mode +- /get/data/translation_compute_device_list — data: null — 選択可能な翻訳デバイス一覧 +- /get/data/selected_translation_compute_device — data: null +- /set/data/selected_translation_compute_device — data: device descriptor — 選択 +- /get/data/transcription_compute_device_list — same as translation +- /get/data/selected_transcription_compute_device, /set/data/selected_transcription_compute_device + +4) 翻訳設定 +- /get/data/selectable_ctranslate2_weight_type_dict — data: null — 利用可能な ctranslate2 重みの辞書 +- /get/data/ctranslate2_weight_type, /set/data/ctranslate2_weight_type +- /get/data/selected_translation_compute_type, /set/data/selected_translation_compute_type +- /run/download_ctranslate2_weight — data: "weight_type" — 指定した重みをダウンロード(非同期可) +- /get/data/deepl_auth_key — data: null — DeepL API キー(存在すれば返却、セキュリティ上の注意あり) +- /set/data/deepl_auth_key — data: "" — DeepL キーを設定(キー検証あり) +- /delete/data/deepl_auth_key — data: null — DeepL キーを削除 + +- /set/data/selected_translation_engines — data: dict/list — 各タブの翻訳エンジン選択を設定します。 + - 例: {"endpoint":"/set/data/selected_translation_engines","data":{"0":["CTranslate2"]}} + +- /set/data/selected_transcription_engine — data: string — 現在の転写エンジンを設定します。 + - 例: {"endpoint":"/set/data/selected_transcription_engine","data":"Whisper"} + +- /set/enable/main_window_sidebar_compact_mode — data: null — サイドバーをコンパクト表示に設定 + - 例: {"status":200,"endpoint":"/set/enable/main_window_sidebar_compact_mode","result": true} + +- /set/disable/main_window_sidebar_compact_mode — data: null — サイドバーのコンパクト表示を解除 + - 例: {"status":200,"endpoint":"/set/disable/main_window_sidebar_compact_mode","result": false} +- /get/data/convert_message_to_romaji, /set/enable/convert_message_to_romaji, /set/disable/convert_message_to_romaji +- /get/data/convert_message_to_hiragana, /set/enable/convert_message_to_hiragana, /set/disable/convert_message_to_hiragana + +5) トランスクリプション / デバイス +- /get/data/mic_host_list, /get/data/mic_device_list, /get/data/speaker_device_list +- /get/data/auto_mic_select, /set/enable/auto_mic_select, /set/disable/auto_mic_select +- /get/data/selected_mic_host, /set/data/selected_mic_host +- /get/data/selected_mic_device, /set/data/selected_mic_device +- /get/data/mic_threshold, /set/data/mic_threshold +- /get/data/mic_automatic_threshold, /set/enable/mic_automatic_threshold, /set/disable/mic_automatic_threshold +- /get/data/mic_record_timeout, /set/data/mic_record_timeout +- /get/data/mic_phrase_timeout, /set/data/mic_phrase_timeout +- /get/data/mic_max_phrases, /set/data/mic_max_phrases +- /get/data/hotkeys, /set/data/hotkeys +- /get/data/plugins_status, /set/data/plugins_status +- /get/data/mic_avg_logprob, /set/data/mic_avg_logprob +- /get/data/mic_no_speech_prob, /set/data/mic_no_speech_prob +- /set/enable/check_mic_threshold, /set/disable/check_mic_threshold +- /get/data/mic_word_filter, /set/data/mic_word_filter + +6) スピーカー側設定 +- /get/data/auto_speaker_select, /set/enable/auto_speaker_select, /set/disable/auto_speaker_select +- /get/data/selected_speaker_device, /set/data/selected_speaker_device +- /get/data/speaker_threshold, /set/data/speaker_threshold +- /get/data/speaker_automatic_threshold, /set/enable/speaker_automatic_threshold, /set/disable/speaker_automatic_threshold +- /get/data/speaker_record_timeout, /set/data/speaker_record_timeout +- /get/data/speaker_phrase_timeout, /set/data/speaker_phrase_timeout +- /get/data/speaker_max_phrases, /set/data/speaker_max_phrases +- /get/data/speaker_avg_logprob, /set/data/speaker_avg_logprob +- /get/data/speaker_no_speech_prob, /set/data/speaker_no_speech_prob +- /set/enable/check_speaker_threshold, /set/disable/check_speaker_threshold + +7) Whisper / トランスクリプション重み +- /get/data/selectable_whisper_weight_type_dict +- /get/data/whisper_weight_type, /set/data/whisper_weight_type +- /get/data/selected_transcription_compute_type, /set/data/selected_transcription_compute_type +- /run/download_whisper_weight — data: "weight_type" + +8) VR / オーバーレイ +- /get/data/overlay_small_log, /set/enable/overlay_small_log, /set/disable/overlay_small_log +- /get/data/overlay_small_log_settings, /set/data/overlay_small_log_settings +- /get/data/overlay_large_log, /set/enable/overlay_large_log, /set/disable/overlay_large_log +- /get/data/overlay_large_log_settings, /set/data/overlay_large_log_settings +- /get/data/overlay_show_only_translated_messages, /set/enable/overlay_show_only_translated_messages, /set/disable/overlay_show_only_translated_messages + +9) その他設定 +- /get/data/send_message_format_parts, /set/data/send_message_format_parts +- /get/data/received_message_format_parts, /set/data/received_message_format_parts +- /get/data/auto_clear_message_box, /set/enable/auto_clear_message_box, /set/disable/auto_clear_message_box +- /get/data/send_only_translated_messages, /set/enable/send_only_translated_messages, /set/disable/send_only_translated_messages +- /get/data/logger_feature, /set/enable/logger_feature, /set/disable/logger_feature +- /run/open_filepath_logs +- /get/data/vrc_mic_mute_sync, /set/enable/vrc_mic_mute_sync, /set/disable/vrc_mic_mute_sync +- /get/data/send_message_to_vrc, /set/enable/send_message_to_vrc, /set/disable/send_message_to_vrc +- /get/data/send_received_message_to_vrc, /set/enable/send_received_message_to_vrc, /set/disable/send_received_message_to_vrc + +10) WebSocket +- /get/data/websocket_host, /set/data/websocket_host +- /get/data/websocket_port, /set/data/websocket_port +- /get/data/websocket_server, /set/enable/websocket_server, /set/disable/websocket_server + +11) OSC / 高度設定 +- /get/data/osc_ip_address, /set/data/osc_ip_address +- /get/data/osc_port, /set/data/osc_port +- /get/data/notification_vrc_sfx, /set/enable/notification_vrc_sfx, /set/disable/notification_vrc_sfx +- /run/open_filepath_config_file +- /run/feed_watchdog + +挙動メモ / 注意点 +- `data` は受信時に `encodeBase64` が適用される場合があります(バイナリや特殊文字対策)。 +- いくつかのエンドポイントは内部的にバックグラウンドスレッドを立ち上げます(ダウンロード・更新処理・transliteration 等)。 +- 翻訳・転写関連は VRAM OOM を検知すると自動的に関連機能を無効化し、UI に 400 系の run イベントを送信します。API 消費者はこれらの run イベントを監視する必要があります。 + +次の作業 +- `docs/modules/controller.md` に記載した Controller のメソッド詳細と紐付けて、各エンドポイントごとに具体的な request/response のサンプル(body の構造)を追加します。 +### API / メッセージマッピング(詳細) + +このアプリは stdin/stdout を通じた 1 行 JSON メッセージで制御します。内部では `mainloop.py` の `mapping` が受信 endpoint を Controller のメソッドに結び付け、`run_mapping` が非同期通知のエンドポイントを定義します。 + +受信メッセージ(stdin) +```json +{ "endpoint": "/set/data/selected_tab_no", "data": 0 } +``` + +送信メッセージ(stdout) +- 成功: printResponse が次を出力します。 +```json +{ "status": 200, "endpoint": "/get/data/version", "result": "3.2.2" } +``` +- エラー: +```json +{ "status": 400, "endpoint": "/set/data/osc_ip_address", "result": {"message":"Invalid IP address","data":"127.0.0.1"} } +``` + +動作原則 +- `/get/data/*` : Controller の getter を呼び、設定やリストを返す。 +- `/set/data/*` : Controller の setter を呼び、設定を変更して新値を返す。 +- `/run/*` : 非同期アクションや UI ボタンが実行する処理(ダウンロード、更新、送信など)。 +- `mapping` の `"status": False` はロック(423 を返し、要求はキューに戻され再試行される)。 + +表記ルール +- Controller メソッドは `Controller.` の形式で明記。 +- `run events` は Controller が UI に通知する `run_mapping` の `/run/...` エンドポイント名を列挙します。 + +以下は `mainloop.py` の `mapping` に基づいた、主要エンドポイントの詳細(カテゴリ順)。 + +1) メイン操作(チャット/翻訳/転写) + +- Endpoint: `/set/enable/translation` + - Controller: `Controller.setEnableTranslation` + - data: null + - success: {status:200, result: true} + - error example: {status:400, result:{message:"Translation disabled due to VRAM overflow", data: False}} + - run events: `/run/enable_translation` を発行して UI に状態を通知する。 + +- Endpoint: `/set/disable/translation` + - Controller: `Controller.setDisableTranslation` + - data: null + - success: {status:200, result: false} + - run events: `/run/enable_translation` + +- Endpoint: `/set/enable/transcription_send` + - Controller: `Controller.setEnableTranscriptionSend` + - data: null + - success: {status:200, result: true} + - side-effect: `Controller.startThreadingTranscriptionSendMessage` を呼びバックグラウンドで音声転写を開始する。 + - run events: `/run/enable_transcription_send` + +- Endpoint: `/set/disable/transcription_send` + - Controller: `Controller.setDisableTranscriptionSend` + - data: null + - success: {status:200, result: false} + +- Endpoint: `/run/send_message_box` + - Controller: `Controller.sendMessageBox` -> 内部で `Controller.chatMessage` + - data: {"id": <任意>, "message": "..."} + - success example: {status:200, result: {"id":123, "original":{...}, "translations":[...]}} + - run events: 転送先言語や翻訳結果があれば `/run/transcription_send_mic_message` などが発行される。 + +- Endpoint: `/run/send_text_overlay` + - Controller: `Controller.sendTextOverlay` + - data: object (例: {"text":"Hello","lang":"English"}) + - success: echo back the data + - side-effect: オーバーレイ更新(small/large に応じた出力) + +2) 表示 / 外観設定 +- Endpoint: `/get/data/version` + - Controller: `Controller.getVersion` + - data: null + - success: {status:200, result: config.VERSION} + +- Endpoint: `/get/data/transparency` / `/set/data/transparency` + - Controller: `Controller.getTransparency` / `Controller.setTransparency` + - data for set: integer (0-255 等、設定側で検証) + - success example: {status:200, result: } + +(UI スケーリング、textbox スケーリング、font_family, ui_language 等の /get と /set は同様のパターン: Controller の getXXX / setXXX を呼ぶ) + +3) 計算デバイス関連 +- Endpoint: `/get/data/translation_compute_device_list` -> `Controller.getComputeDeviceList` + - data: null + - result: list of device descriptors (構造は `config.SELECTABLE_COMPUTE_DEVICE_LIST` に従う) + +- Endpoint: `/set/data/selected_translation_compute_device` + - Controller: `Controller.setSelectedTranslationComputeDevice` + - data: device descriptor (例: {"name":"cuda:0","type":"gpu"}) + - side-effects: `model.setChangedTranslatorParameters(True)` が呼ばれ、実行時にモデル再ロードが必要な場合がある。 + - success: {status:200, result: selected_device} + +4) 翻訳/重み管理 +- Endpoint: `/get/data/selectable_ctranslate2_weight_type_dict` + - Controller: `Controller.getSelectableCtranslate2WeightTypeDict` + - result: dict mapping weight_type -> bool + +- Endpoint: `/run/download_ctranslate2_weight` + - Controller: `Controller.downloadCtranslate2Weight` + - data: "weight_type" (例: "m2m100_418m") + - behavior: 非同期フラグでスレッド起動可能。進捗は run events `/run/download_progress_ctranslate2_weight` を発行。完了時に `/run/downloaded_ctranslate2_weight`。 + +- Endpoint: `/set/data/deepl_auth_key` + - Controller: `Controller.setDeeplAuthKey` + - data: string (API key) + - behavior: 内部で `model.authenticationTranslatorDeepLAuthKey` を実行して検証。失敗時は 400 を返す。 + +5) トランスクリプション / デバイス +- Endpoint: `/get/data/mic_host_list` -> `Controller.getMicHostList` + - data: null + - result: dict/list of hosts + +- Endpoint: `/set/data/selected_mic_host` -> `Controller.setSelectedMicHost` + - data: host identifier (string) + - side-effects: デフォルトデバイスを `model.getMicDefaultDevice()` で選択し、エネルギーチェックや転写スレッドの再起動が発生する場合がある。 + +- Endpoint: `/set/data/mic_threshold` -> `Controller.setMicThreshold` + - data: integer + - validation: 0 <= value <= config.MAX_MIC_THRESHOLD + - success: {status:200, result: new_value} error: 400 with message and old value + +6) スピーカー関連(受信) +- Endpoint: `/set/data/selected_speaker_device` -> `Controller.setSelectedSpeakerDevice` + - data: device descriptor + - side-effects: スピーカー転写スレッド(ENABLE_CHECK_ENERGY_RECEIVE)を再起動する可能性あり + +7) Whisper / トランスクリプション重み +- Endpoint: `/run/download_whisper_weight` + - Controller: `Controller.downloadWhisperWeight` + - data: "weight_type" + - run events: `/run/download_progress_whisper_weight`, `/run/downloaded_whisper_weight` + +8) オーバーレイ / VR +- Endpoint: `/set/enable/overlay_small_log` -> `Controller.setEnableOverlaySmallLog` + - side-effect: `model.startOverlay()` を呼び、`model.updateOverlaySmallLog` で描画が更新される + +9) WebSocket / OSC / Watchdog +- Endpoint: `/set/data/websocket_host` -> `Controller.setWebSocketHost` + - validation: IP 形式チェック (`isValidIpAddress`) + - if WebSocket server running: attempts to restart server on new host/port (checks availability via `isAvailableWebSocketServer`) + +- Endpoint: `/set/data/osc_ip_address` -> `Controller.setOscIpAddress` + - validation: IP 形式。失敗時は 400 を返す。 + +- Endpoint: `/run/feed_watchdog` -> `Controller.feedWatchdog` + - Controller: `Controller.feedWatchdog` ➜ `model.feedWatchdog()` + +共通的な失敗モード(クライアント実装者向けメモ) +- 無効なパラメータ: 400 と {message,data} を返す。 +- ロック: 423 (Locked endpoint) — UI 側はリトライまたはキュー内での再試行を待つ。 +- 内部エラー: 500 とエラーメッセージ(詳細はログ)を返す。 +- VRAM OOM / モデルエラー: Controller は `model.detectVRAMError` を使い、必要に応じて機能無効化と run イベントで通知する。 + +付録: すぐ使える呼び出し例 +- バージョン取得 +```json +{ "endpoint": "/get/data/version", "data": null } +``` + +- タブ切替 +```json +{ "endpoint": "/set/data/selected_tab_no", "data": 1 } +``` + +- メッセージ送信(チャット) +```json +{ "endpoint": "/run/send_message_box", "data": {"id": 555, "message": "Hello world"} } +``` + +次の作業 +- ① `docs/modules/controller.md` の各メソッドとこの `docs/api.md` を突き合わせ、未記載の `run_mapping` イベントのペイロード例を追加します。 +- ② 軽い品質ゲート(README と runtime 注意の草案作成)を実行します。 + +## エンドポイント別 JSON スキーマ(補完) + +このセクションでは `mainloop.py` の `mapping` に定義された全エンドポイントをパターンごとに整理し、クライアントが送信すべき `request` と期待される `response` の JSON スキーマを明示します。多数のエンドポイントは共通パターンに従うため、パターン定義と代表例でほとんどのケースをカバーしています。 + +共通ルール +- リクエストは必ず 1 行 JSON: {"endpoint": "", "data": }。 +- レスポンスは {"status": , "endpoint": "", "result": } の形式(内部の `printResponse` により出力)。 + +1) /get/data/* パターン(読み取り) +- request.data: null +- response.result: 直ちに返せる JSON 値(数値/文字列/配列/辞書) +- schema(JSON Schema 風の簡易表記): + + request: + { + "endpoint": "/get/data/", + "data": null + } + + response: + { + "status": 200, + "endpoint": "/get/data/", + "result": + } + + 代表例: + - `/get/data/version` → result: string + {"status":200,"endpoint":"/get/data/version","result":"3.2.2"} + - `/get/data/mic_device_list` → result: ["Device 1", "Device 2"] + +2) /set/data/* パターン(書き込み) +- request.data: セッタが期待する型(下に代表的な型を列挙) +- response.result: 新しい値または検証済の値(成功時) +- error: バリデーション失敗時は status 400 と {message,data} + + 共通 request/response: + + request: + { + "endpoint": "/set/data/", + "data": + } + + response (success): + { + "status":200, + "endpoint":"/set/data/", + "result": + } + + response (validation error): + { + "status":400, + "endpoint":"/set/data/", + "result": {"message": "", "data": } + } + + 代表的リクエスト型一覧(多くはこの型いずれか): + - int: `/set/data/selected_tab_no`, `/set/data/transparency`, `/set/data/mic_threshold` など + - string: `/set/data/selected_mic_host`, `/set/data/selected_speaker_device`, `/set/data/deepl_auth_key` など + - dict/object: `/set/data/selected_your_languages`, `/set/data/selected_target_languages`, `/set/data/send_message_format_parts` など + - list: `/set/data/mic_word_filter` など + +3) フラグ切替(enable / disable) + +- 概要: 機能の有効化/無効化を行うエンドポイント群は、実装で定義された具体的なエンドポイント名(例: `/set/enable/translation`, `/set/disable/translation`, `/set/enable/foreground` など)で提供されています。本ドキュメントでは umbrella 的な汎用トークン(`/set/enable` や `/set/disable` 単体)は記載せず、実際に実装で定義されている concrete エンドポイントのみを列挙しています。 + +- 振る舞いの要点: + - リクエストの `data` は通常 `null` です。 + - 成功応答は多くの場合 boolean を返します(例: `{ "status":200, "endpoint":"/set/enable/foreground", "result": true }`)。 + - 条件により有効化/無効化ができない場合は 400 を返し、`{ "message": "...", "data": }` の形で詳細が返されます。 + +具体的なフラグ切替エンドポイントはドキュメント本文の各該当箇所で個別に列挙しています(例: `/set/enable/translation`, `/set/disable/translation`, `/set/enable/transcription_send`, `/set/disable/transcription_send`, `/set/enable/main_window_sidebar_compact_mode`, など)。 + +4) /run/*(アクション・実行系) +- request.data: アクションに依存(例: `/run/send_message_box` は {id, message}) +- response.result: 多くは action の結果(True/False, object)を返す +- 非同期で UI 更新を行う場合は `Controller.run(...)` により `/run/...` 形式の通知が stdout に出力される + + 代表例: + - `/run/send_message_box` + request.data: {"id": , "message": ""} + response.result: { + "id": , + "original": {"message": "", "transliteration": [] }, + "translations": [ {"message":"", "transliteration":[...]}, ... ] + } + + - `/run/download_ctranslate2_weight` + request.data: "" (string) + response.result: true + progress: `/run/download_progress_ctranslate2_weight` -> {"weight_type":"...","progress":0.0..1.0} + complete: `/run/downloaded_ctranslate2_weight` -> "" + +5) WebSocket / OSC / Watchdog 関連 +- `/set/data/websocket_host` : request.data:string(host) → response: {status:200, result: host} または 400 (not available) +- `/set/data/osc_ip_address` : request.data:string(ip) → validation via `isValidIpAddress` → 400 on invalid +- `/run/feed_watchdog`: request.data:null → response: {status:200,result:true} + +6) エラー応答の標準形 +- Validation / domain error : status 400, result: {"message": "<説明>", "data": } +- Locked endpoint: status 423, result: "Locked endpoint"(mainloop が再試行のためキューに戻す) +- Internal error: status 500, result: "" + +7) run events(UI 更新通知)- 参考(主要イベントのみ再掲) +- `/run/connected_network` : bool +- `/run/enable_ai_models` : bool +- `/run/initialization_progress` : int (1..4) + - `/run/transcription_send_mic_message` / `/run/transcription_receive_speaker_message` : オブジェクト(original/translations, see above) + +追加の run イベント(ランタイム検証で未記載と判定されたため追記): + +- `/run/enable_transcription_receive` : bool + - 説明: スピーカー側転写(transcription receive)の有効/無効を UI に通知します。 + +- `/run/transcription_send_mic_message` : object + - payload: 同 `/run/transcription_send_mic_message` の構造(original + translations) + - 説明: マイク側で転写結果が生成され、UI に送信するための通知です。 + +- `/run/transcription_receive_speaker_message` : object + - payload: 同 `/run/transcription_receive_speaker_message` の構造 + - 説明: スピーカー側で転写結果が生成されたときに発行されます。 + +- `/run/error_transcription_mic_vram_overflow` : object (400) + - payload: {"message": "VRAM out of memory during mic transcription", "data": ""} + - 説明: マイク転写中に VRAM OOM が発生した際に通知します。 + +- `/run/error_transcription_speaker_vram_overflow` : object (400) + - payload: {"message": "VRAM out of memory during speaker transcription", "data": ""} + - 説明: スピーカー転写中に VRAM OOM が発生した際に通知します。 + +補遺: 全エンドポイント一覧と期待型の速見表 +- `/get/data/*` : data=null -> result: primitive|array|object +- `/set/data/*` : data: 型指定 (int|string|dict|list) -> result: new value or validation error +- `/set/enable/*` `/set/disable/*` : data=null -> result: bool +- `/run/*` : data: action-specific -> result: action result object / bool + +ファイルの更新履歴 +- このドキュメントは `mainloop.py` の `mapping` と `controller.py` の `run_mapping` を参照して作成しました。将来的にエンドポイントを追加した場合は同じ箇所を参照して本ドキュメントを更新してください。 + +---- + +完了: エンドポイント別スキーマの補完を行いました。次は軽い品質ゲート(lint/typecheck)の実行を提案します。 + diff --git a/src-python/docs/architecture.md b/src-python/docs/architecture.md new file mode 100644 index 00000000..8fcf20e9 --- /dev/null +++ b/src-python/docs/architecture.md @@ -0,0 +1,21 @@ +# アーキテクチャ概観 + +VRCT(src-python)は、ローカル音声キャプチャ・音声認識・翻訳・VR 表示・OSC/ WebSocket 連携を統合するアプリケーションです。主な責務は次の通り。 + +- device_manager: オーディオ入出力デバイスの発見、監視、コールバック通知。 +- transcription (models/transcription/*): マイク/スピーカーからの音声取得、認識(Google/Whisper)、議事録管理。 +- translation (models/translation/*): 翻訳エンジン(DeepL/API、CTranslate2、Google など)管理と実行。 +- overlay (models/overlay/*): VR オーバーレイの画像生成と OpenVR を使った描画管理。 +- osc (models/osc/osc.py): VRChat 等との OSC(および OSCQuery)でのやり取り。 +- websocket (models/websocket/*): 外部クライアント向け WebSocket ブロードキャスト。 +- model.py: 高レベルなファサード。各機能のインスタンス化とランタイム操作。 +- controller.py: UI/外部メッセージを受け、config を更新・機能を起動するコマンド実行層。 +- mainloop.py: stdin 経由のコマンド受付ループとマッピング定義。GUI からの操作を受ける想定。 +- utils.py: ロギング、ネットワークチェック、デバイス/計算デバイスタイプ判定などのユーティリティ。 +- config.py: シングルトン設定ストア。アプリ起動中に共有して使うすべての設定値。 + +設計上のポイント: +- シングルトン/ファサード: `model` と `config` はシングルトンでグローバルに参照される。これにより UI 層(Controller)と低レイヤ(models/*)の橋渡しを行う。 +- 非同期処理: デバイス監視、音声録音・認識、WebSocket サーバー、Overlay のループはそれぞれ別スレッド/非同期ループで実行される。 +- フォールバック: 翻訳はまず選択されたエンジンを使い、失敗時に CTranslate2 にフォールバックする仕組みがある。 +- VRAM エラー検出: Whisper / CTranslate2 等で VRAM 不足が起きた場合、特殊なエラー検出を行い翻訳/音声機能を無効化して回復を試みる。 diff --git a/src-python/docs/diagrams.md b/src-python/docs/diagrams.md new file mode 100644 index 00000000..71b42975 --- /dev/null +++ b/src-python/docs/diagrams.md @@ -0,0 +1,51 @@ +# システム図 + +以下はシステム構成の概要(Mermaid シーケンス図とテキスト版の両方)です。Mermaid がサポートされているビューアでは下のシーケンス図が描画されます。 + +```mermaid +sequenceDiagram + participant GUI as GUI (stdin/stdout) + participant Main as mainloop + participant Controller as Controller + participant Model as Model + participant Recorder as Recorder + participant Transcriber as Transcriber + participant Translator as Translator + participant Overlay as Overlay + participant OSC as OSC + participant WS as WebSocket + + GUI->>Main: send JSON endpoint + Main->>Controller: dispatch + Controller->>Model: startMicTranscript(callback) + Recorder->>Transcriber: audio data + Transcriber->>Controller: result (text, language) + Controller->>Translator: getInputTranslate(text) + Translator-->>Controller: translations + Controller->>Overlay: updateOverlay(translation) + Controller->>OSC: sendMessage(osc_message) + Controller->>WS: websocketSendMessage(event) + Controller-->>GUI: run(status, endpoint, result) +``` + +## テキスト版(簡易) + +Main process (`mainloop.py`) + - stdin -> JSON コマンド -> Main.receiver -> queue + - Main.handler -> Controller (コマンド実行) + - run(status, endpoint, result) -> stdout (GUI に通知) + +Controller + - config (読み書き) + - model (起動/停止/アクション) + +Model サブシステム + - device_manager (デバイス列挙/監視) + - transcription (recorder -> transcriber) + - translation (Translator) + - overlay (OverlayImage -> Overlay) + - osc (OSCHandler) + - websocket (WebSocketServer) + +データフロー(代表): 録音 -> audio_queue -> AudioTranscriber -> Controller.micMessage -> Translator -> (OSC / Overlay / WebSocket / ログ) + diff --git a/src-python/docs/modules/config.md b/src-python/docs/modules/config.md new file mode 100644 index 00000000..d33e7b42 --- /dev/null +++ b/src-python/docs/modules/config.md @@ -0,0 +1,203 @@ +# config.py クラス仕様書 + +目的: アプリケーションの全設定を集中管理するシングルトン `config`(クラス名: `Config`、インスタンス: `config`)。 + +特徴: +- JSON シリアライズ対象のプロパティには `@json_serializable('KEY_NAME')` デコレータが付いており、`load_config()` / `saveConfig()` によって `config.json` に永続化されます。 +- プロパティは「読み取り専用 (Read Only)」と「読み書き (Read/Write)」に分類されます。読み書き可能なプロパティはバリデーション処理とともに setter が用意されています。 +- 設定は内部的に `_config_data` に保持され、`saveConfig()` はデバウンス(2秒)でファイルへ書き込みます。即時書き込みオプションも可能です(saveConfig(..., immediate_save=True))。 + +## 生成とライフサイクル +- `Config()` はシングルトン(__new__ で単一インスタンスを生成)。 +- `init_config()` でデフォルト値を初期化し、その後 `load_config()` が `config.json` を読み込んで既存値を適用します。 + +## 主要プロパティ一覧(型・デフォルト・説明) + +注: 下は `config.py` の初期化ロジックに基づく抜粋です。`json_serializable` が付与されたキーは `config.json` に書き出されます。 + +- Read only + - `VERSION` (str) = "3.2.2" + - `PATH_LOCAL` (str) = フォロー実行ファイルのディレクトリか、ソースの __file__ のディレクトリ + - `PATH_CONFIG` (str) = PATH_LOCAL/config.json + - `PATH_LOGS` (str) = PATH_LOCAL/logs + - `GITHUB_URL`, `UPDATER_URL`, `BOOTH_URL`, `DOCUMENTS_URL`, `DEEPL_AUTH_KEY_PAGE_URL` (str) + - `MAX_MIC_THRESHOLD` (int) = 2000 + - `MAX_SPEAKER_THRESHOLD` (int) = 4000 + - `WATCHDOG_TIMEOUT` (int) = 60 + - `WATCHDOG_INTERVAL` (int) = 20 + - `SELECTABLE_*` 系: 各種選択肢のリスト/イテレータ(モデルの重みや言語、UI 言語等)。 + +- Read/Write(主な項目) + - `SEND_MESSAGE_FORMAT_PARTS` (dict) = デフォルトで message/translation/translation_first 等を含むフォーマット定義。json_serializable キー: 'SEND_MESSAGE_FORMAT_PARTS' + - `RECEIVED_MESSAGE_FORMAT_PARTS` (dict) + - `ENABLE_TRANSLATION` (bool) = False + - `ENABLE_TRANSCRIPTION_SEND` (bool) = False + - `ENABLE_TRANSCRIPTION_RECEIVE` (bool) = False + - `ENABLE_FOREGROUND` (bool) = False + - `ENABLE_CHECK_ENERGY_SEND` (bool) = False + - `ENABLE_CHECK_ENERGY_RECEIVE` (bool) = False + - `SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT` (dict) = {: False, ...} + - `SELECTABLE_WHISPER_WEIGHT_TYPE_DICT` (dict) + - `SELECTABLE_TRANSLATION_ENGINE_STATUS` (dict) + - `SELECTABLE_TRANSCRIPTION_ENGINE_STATUS` (dict) + - `SELECTED_TAB_NO` (str) = "1" (json_serializable: 'SELECTED_TAB_NO') + - `SELECTED_TRANSLATION_ENGINES` (dict) = tab毎に選択 ('CTranslate2' 等) + - `SELECTED_YOUR_LANGUAGES`, `SELECTED_TARGET_LANGUAGES` (dict) = 翻訳元/先の選択と有効フラグ + - `SELECTED_TRANSCRIPTION_ENGINE` (str) = 'Google' + - `CONVERT_MESSAGE_TO_ROMAJI` / `CONVERT_MESSAGE_TO_HIRAGANA` (bool) + - UI 設定: `TRANSPARENCY` (int), `UI_SCALING` (int), `TEXTBOX_UI_SCALING` (int), `MESSAGE_BOX_RATIO` (int) + - `SEND_MESSAGE_BUTTON_TYPE` (str) = 'show'(候補は SEND_MESSAGE_BUTTON_TYPE_LIST) + - `SHOW_RESEND_BUTTON` (bool) + - `FONT_FAMILY` (str) = 'Yu Gothic UI' + - `UI_LANGUAGE` (str) = 'en'(候補は SELECTABLE_UI_LANGUAGE_LIST) + - `MAIN_WINDOW_GEOMETRY` (dict) = {x_pos, y_pos, width, height} + - マイク/スピーカー関係: `AUTO_MIC_SELECT`, `SELECTED_MIC_HOST`, `SELECTED_MIC_DEVICE`, `MIC_THRESHOLD`, `MIC_AUTOMATIC_THRESHOLD`, `MIC_RECORD_TIMEOUT`, `MIC_PHRASE_TIMEOUT`, `MIC_MAX_PHRASES`, `MIC_WORD_FILTER`, `HOTKEYS` 等 + - `PLUGINS_STATUS` (list) + - マイク転写確度閾値: `MIC_AVG_LOGPROB`, `MIC_NO_SPEECH_PROB` + - スピーカー関連(同様の項目): `AUTO_SPEAKER_SELECT`, `SELECTED_SPEAKER_DEVICE`, `SPEAKER_THRESHOLD`, ... + - `OSC_IP_ADDRESS` (str) = '127.0.0.1' + - `OSC_PORT` (int) = 9000 + - `AUTH_KEYS` (dict) = {'DeepL_API': None} + - `USE_EXCLUDE_WORDS` (bool) = True + - 計算デバイス選択: `SELECTED_TRANSLATION_COMPUTE_DEVICE` / `SELECTED_TRANSCRIPTION_COMPUTE_DEVICE`(`getComputeDeviceList()` に基づくデバイス辞書) + - 重み/計算タイプ: `CTRANSLATE2_WEIGHT_TYPE`, `WHISPER_WEIGHT_TYPE`, `SELECTED_TRANSLATION_COMPUTE_TYPE`, `SELECTED_TRANSCRIPTION_COMPUTE_TYPE` + - オーバーレイ設定: `OVERLAY_SMALL_LOG`, `OVERLAY_SMALL_LOG_SETTINGS`, `OVERLAY_LARGE_LOG`, `OVERLAY_LARGE_LOG_SETTINGS`, `OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES` 等 + - VRC/ログ/WebSocket: `SEND_MESSAGE_TO_VRC`, `SEND_RECEIVED_MESSAGE_TO_VRC`, `LOGGER_FEATURE`, `VRC_MIC_MUTE_SYNC`, `NOTIFICATION_VRC_SFX`, `WEBSOCKET_SERVER`, `WEBSOCKET_HOST`, `WEBSOCKET_PORT` + +# config.py — 完全上書きドキュメント + +目的: アプリケーションの全設定を集中管理するシングルトン `config`(クラス名: `Config`、インスタンス: `config`)。 + +特徴: +- JSON シリアライズ対象のプロパティには `@json_serializable('KEY_NAME')` デコレータが付いており、`load_config()` / `saveConfig()` によって `config.json` に永続化されます。 +- プロパティは「読み取り専用 (Read Only)」と「読み書き (Read/Write)」に分類されます。読み書き可能なプロパティはバリデーション処理とともに setter が用意されています。 +- 設定は内部的に `_config_data` に保持され、`saveConfig()` はデバウンス(2秒)でファイルへ書き込みます。即時書き込みオプションも可能です(saveConfig(..., immediate_save=True))。 + +## 生成とライフサイクル +- `Config()` はシングルトン(__new__ で単一インスタンスを生成)。 +- `init_config()` でデフォルト値を初期化し、その後 `load_config()` が `config.json` を読み込んで既存値を適用します。 + +## 主要プロパティ一覧(型・デフォルト・説明) + +注: 下は `config.py` の初期化ロジックに基づく抜粋です。`json_serializable` が付与されたキーは `config.json` に書き出されます。 + +- Read only + - `VERSION` (str) = "3.2.2" + - `PATH_LOCAL` (str) = フォロー実行ファイルのディレクトリか、ソースの __file__ のディレクトリ + - `PATH_CONFIG` (str) = PATH_LOCAL/config.json + - `PATH_LOGS` (str) = PATH_LOCAL/logs + - `GITHUB_URL`, `UPDATER_URL`, `BOOTH_URL`, `DOCUMENTS_URL`, `DEEPL_AUTH_KEY_PAGE_URL` (str) + - `MAX_MIC_THRESHOLD` (int) = 2000 + - `MAX_SPEAKER_THRESHOLD` (int) = 4000 + - `WATCHDOG_TIMEOUT` (int) = 60 + - `WATCHDOG_INTERVAL` (int) = 20 + - `SELECTABLE_*` 系: 各種選択肢のリスト/イテレータ(モデルの重みや言語、UI 言語等)。 + +- Read/Write(主な項目) + - `SEND_MESSAGE_FORMAT_PARTS` (dict) = デフォルトで message/translation/translation_first 等を含むフォーマット定義。json_serializable キー: 'SEND_MESSAGE_FORMAT_PARTS' + - `RECEIVED_MESSAGE_FORMAT_PARTS` (dict) + - `ENABLE_TRANSLATION` (bool) = False + - `ENABLE_TRANSCRIPTION_SEND` (bool) = False + - `ENABLE_TRANSCRIPTION_RECEIVE` (bool) = False + - `ENABLE_FOREGROUND` (bool) = False + - `ENABLE_CHECK_ENERGY_SEND` (bool) = False + - `ENABLE_CHECK_ENERGY_RECEIVE` (bool) = False + - `SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT` (dict) = {: False, ...} + - `SELECTABLE_WHISPER_WEIGHT_TYPE_DICT` (dict) + - `SELECTABLE_TRANSLATION_ENGINE_STATUS` (dict) + - `SELECTABLE_TRANSCRIPTION_ENGINE_STATUS` (dict) + - `SELECTED_TAB_NO` (str) = "1" (json_serializable: 'SELECTED_TAB_NO') + - `SELECTED_TRANSLATION_ENGINES` (dict) = tab毎に選択 ('CTranslate2' 等) + - `SELECTED_YOUR_LANGUAGES`, `SELECTED_TARGET_LANGUAGES` (dict) = 翻訳元/先の選択と有効フラグ + - `SELECTED_TRANSCRIPTION_ENGINE` (str) = 'Google' + - `CONVERT_MESSAGE_TO_ROMAJI` / `CONVERT_MESSAGE_TO_HIRAGANA` (bool) + - UI 設定: `TRANSPARENCY` (int), `UI_SCALING` (int), `TEXTBOX_UI_SCALING` (int), `MESSAGE_BOX_RATIO` (int) + - `SEND_MESSAGE_BUTTON_TYPE` (str) = 'show'(候補は SEND_MESSAGE_BUTTON_TYPE_LIST) + - `SHOW_RESEND_BUTTON` (bool) + - `FONT_FAMILY` (str) = 'Yu Gothic UI' + - `UI_LANGUAGE` (str) = 'en'(候補は SELECTABLE_UI_LANGUAGE_LIST) + - `MAIN_WINDOW_GEOMETRY` (dict) = {x_pos, y_pos, width, height} + - マイク/スピーカー関係: `AUTO_MIC_SELECT`, `SELECTED_MIC_HOST`, `SELECTED_MIC_DEVICE`, `MIC_THRESHOLD`, `MIC_AUTOMATIC_THRESHOLD`, `MIC_RECORD_TIMEOUT`, `MIC_PHRASE_TIMEOUT`, `MIC_MAX_PHRASES`, `MIC_WORD_FILTER`, `HOTKEYS` 等 + - `PLUGINS_STATUS` (list) + - マイク転写確度閾値: `MIC_AVG_LOGPROB`, `MIC_NO_SPEECH_PROB` + - スピーカー関連(同様の項目): `AUTO_SPEAKER_SELECT`, `SELECTED_SPEAKER_DEVICE`, `SPEAKER_THRESHOLD`, ... + - `OSC_IP_ADDRESS` (str) = '127.0.0.1' + - `OSC_PORT` (int) = 9000 + - `AUTH_KEYS` (dict) = {'DeepL_API': None} + - `USE_EXCLUDE_WORDS` (bool) = True + - 計算デバイス選択: `SELECTED_TRANSLATION_COMPUTE_DEVICE` / `SELECTED_TRANSCRIPTION_COMPUTE_DEVICE`(`getComputeDeviceList()` に基づくデバイス辞書) + - 重み/計算タイプ: `CTRANSLATE2_WEIGHT_TYPE`, `WHISPER_WEIGHT_TYPE`, `SELECTED_TRANSLATION_COMPUTE_TYPE`, `SELECTED_TRANSCRIPTION_COMPUTE_TYPE` + - オーバーレイ設定: `OVERLAY_SMALL_LOG`, `OVERLAY_SMALL_LOG_SETTINGS`, `OVERLAY_LARGE_LOG`, `OVERLAY_LARGE_LOG_SETTINGS`, `OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES` 等 + - VRC/ログ/WebSocket: `SEND_MESSAGE_TO_VRC`, `SEND_RECEIVED_MESSAGE_TO_VRC`, `LOGGER_FEATURE`, `VRC_MIC_MUTE_SYNC`, `NOTIFICATION_VRC_SFX`, `WEBSOCKET_SERVER`, `WEBSOCKET_HOST`, `WEBSOCKET_PORT` + +## セッタのバリデーション +- 多くの setter は型チェックと候補値チェック(リストや辞書のキー整合性)を行います。例: + - `SELECTED_MIC_DEVICE` は `device_manager.getMicDevices()` の一覧に存在する名前であること。 + - `SELECTED_TRANSLATION_COMPUTE_TYPE` は `SELECTED_TRANSLATION_COMPUTE_DEVICE['compute_types']` に含まれる文字列であること。 + - UI 関連の集合は `SELECTABLE_UI_LANGUAGE_LIST` などの一覧に従う。 + +## 永続化の詳細 +- `load_config()` は `config.json` が存在し、かつ中身がある場合に読み込みを試み、ファイル中のキーを `setattr(self, key, value)` して既存の setter を利用して適用します。 +- 読み込み後、`json_serializable` 指定された全キーを `_config_data` に書き戻し、ファイルを上書き(常に書く)。 + +## 使い方の例 + +以下は `config` を使った典型的なコード例です。 + +```python +from config import config + +# 値の参照 +print('App version:', config.VERSION) +print('Current UI language:', config.UI_LANGUAGE) + +# 値の更新(setter を通す) +config.UI_LANGUAGE = 'ja' +config.SEND_MESSAGE_TO_VRC = False + +# 複雑な dict を設定する例(メッセージフォーマットを上書き) +config.SEND_MESSAGE_FORMAT_PARTS = { + 'message': {'prefix': '[YOU] ', 'suffix': ''}, + 'separator': '\n', + 'translation': {'prefix': '[TR] ', 'separator': '\n', 'suffix': ''}, + 'translation_first': True, +} + +# 即時保存したい場合(即座に config.json を上書き) +config.saveConfig('CUSTOM_SAVE', {'foo': 'bar'}, immediate_save=True) +``` + +## エッジケース / 注意点 +- `load_config()` はファイル値を setter 経由で当てはめるため、ファイルに古いキーや予期しない型があると setter によって無視されることがあります(例: 言語キーが不正の場合)。 +- `saveConfig()` はデバウンスされるため、高頻度の設定変更では複数の変更がまとめて書き込まれます。即時書き込みが必要な操作(重要な鍵の更新など)は `immediate_save=True` を使ってください。 +- `SELECTABLE_*` 系や `*_DICT` 系は初期化時に外部モジュール(翻訳リソース、whisper_models、device_manager 等)から生成されます。これらが利用できない環境ではデフォルトが空になる可能性があります。 + +## 推奨改善点(将来的なドキュメント/実装) +- 設定スキーマを JSON Schema で定義し、load 時の検証を明確化すると安全性が向上します。 +- 設定変更イベントを発火する仕組み(observer パターン)を導入すると、Controller/Model 側の再初期化処理をより明確に実装できます。 + +--- + +このファイルは `config.py` の実装に基づいて自動生成的に作成されたドキュメント(overwrite)です。実装の微細な差分は `config.py` を参照してください。 + +## 詳細設計 + +目的: アプリケーションの全設定を保持するシングルトン `config`。 + +ポイント: +- JSON シリアライズ可能な設定値には `@json_serializable` デコレータが付与され、save 操作でファイルへ書き出される。 +- 多数のプロパティが定義され、読み取り専用 (Read Only) と 読み書き (Read/Write) が混在する。 +- 設定項目の例: + - ENABLE_TRANSLATION, ENABLE_TRANSCRIPTION_SEND, ENABLE_TRANSCRIPTION_RECEIVE + - SELECTED_MIC_HOST, SELECTED_MIC_DEVICE, SELECTED_SPEAKER_DEVICE + - SELECTED_TRANSLATION_ENGINES, SELECTED_YOUR_LANGUAGES, SELECTED_TARGET_LANGUAGES + - PATH_LOCAL, PATH_LOGS, VERSION, GITHUB_URL, UPDATER_URL + - SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT / SELECTABLE_WHISPER_WEIGHT_TYPE_DICT + - COMPUTE 関連: SELECTABLE_COMPUTE_DEVICE_LIST, SELECTED_TRANSLATION_COMPUTE_DEVICE, SELECTED_TRANSCRIPTION_COMPUTE_DEVICE + +設計上の契約: +- 全ての get/set は辞書形で status/result を返す Controller の呼び出しに合わせて変換される。 +- 外部から設定を変更した際は必要に応じて Model/Controller による再初期化処理を呼ぶ。 + +検討事項: +- 現状は設定変更が即時反映されるが、一部操作は再初期化(モデルロード、デバイス再取得)を要求するため Controller 側で連携している。 diff --git a/src-python/docs/modules/controller.md b/src-python/docs/modules/controller.md new file mode 100644 index 00000000..f2ae57a6 --- /dev/null +++ b/src-python/docs/modules/controller.md @@ -0,0 +1,158 @@ +## Controller クラス仕様書 + +概要 +- `Controller` はアプリケーションのコントロール層(Facade)で、`model` と `device_manager`、および外部 UI / mainloop とを仲介します。 +- UI からのコマンドを受け取り、`model` の開始/停止、設定の変更、ダウンロードの開始、各種フラグの切り替え、進捗通知(`run` コールバック経由)を行います。 +- 多くのメソッドは JSON 系の応答オブジェクトを返します: {"status": int, "result": Any}。副作用で `self.run(status, run_mapping[key], payload)` を呼び出して UI に通知します。 + +初期化とランタイムフック +- __init__() -> None + - フィールド: `init_mapping: dict`, `run_mapping: dict`, `run: Callable`, `device_access_status: bool` + - `setInitMapping(init_mapping: dict)` / `setRunMapping(run_mapping: dict)` / `setRun(run: Callable)` で mainloop からマッピング・コールバックを注入されることを想定。 + +コールバック通知用メソッド(UI への通知) +- connectedNetwork() / disconnectedNetwork() -> None +- enableAiModels() / disableAiModels() -> None +- updateMicHostList() / updateMicDeviceList() / updateSpeakerDeviceList() -> None +- updateConfigSettings() -> None + - これらは `self.run(status, run_mapping[key], payload)` を使って UI にイベントを送ります。 + +ダウンロード用ヘルパークラス +- class DownloadCTranslate2(run_mapping: dict, weight_type: str, run: Callable) + - progressBar(progress: float) -> None + - downloaded() -> None +- class DownloadWhisper(run_mapping: dict, weight_type: str, run: Callable) + - progressBar(progress: float) -> None + - downloaded() -> None + +音声・翻訳イベントハンドラ +- micMessage(result: dict) -> None + - 引数: result: {"text": str|False, "language": str} + - 挙動: ワードフィルタ、繰り返し検出、翻訳(`model.getInputTranslate`)、音声送信(OSC)・オーバーレイ更新・WebSocket ブロードキャスト等を行う。 + - エラー: 翻訳中に VRAM OOM が起きた場合は model.detectVRAMError を使って検出し、翻訳機能を無効化して UI に 400 を通知。 + +- speakerMessage(result: dict) -> None + - 引数: result: {"text": str|False, "language": str} + - micMessage と同様だが、受信(speaker)側のロジックやオーバーレイの扱いが異なる。 + +- chatMessage(data: dict) -> dict + - 引数: {"id": Any, "message": str} + - 戻り値: {"status": int, "result": {"id":..., "original":..., "translations":[...]}} + - 挙動詳細: + - 翻訳処理は `model.getInputTranslate` を呼び出します。翻訳処理中に VRAM 関連の例外が発生した場合、`model.detectVRAMError` によって検出し、翻訳機能を自動で無効化します。 + - VRAM エラー検出時は Controller は UI に対して 400 系の run イベントを発行する(例: `error_translation_chat_vram_overflow`, `enable_translation` で無効化通知)。 + - エラー発生時の戻り値: 翻訳を行わずに基本情報を含む 200 応答を返すコードパスがあり、クライアント側でのハンドリングを想定しています。 + +設定取得/変更系メソッド(代表例) +- getVersion() -> {"status":200, "result": config.VERSION} +- getComputeMode() / getComputeDeviceList() / getSelectedTranslationComputeDevice() -> dict +- setSelectedTranslationComputeDevice(device: str) -> {"status":200, "result": device} +- getSelectableCtranslate2WeightTypeDict() -> dict +- setEnableTranslation() / setDisableTranslation() -> dict + - setEnableTranslation はモデルロード時に VRAM エラーを検知するロジックを内包している。 + - 多くの setXXX / getXXX メソッドは config を直接操作して即時反映する。 + +自動デバイス選択 +- applyAutoMicSelect() / applyAutoSpeakerSelect() + - `device_manager` にコールバックを登録して自動選択を有効化する。 + +トランスクリプション制御(スレッドで実行) +- startTranscriptionSendMessage() / stopTranscriptionSendMessage() / startThreadingTranscriptionSendMessage() / stopThreadingTranscriptionSendMessage() +- startTranscriptionReceiveMessage() / stopTranscriptionReceiveMessage() / startThreadingTranscriptionReceiveMessage() / stopThreadingTranscriptionReceiveMessage() + - 実際の処理は `model.startMicTranscript` / `model.startSpeakerTranscript` に委譲される。VRAM エラーは検出して UI に通知し、自動的に停止する処理あり。 + +閾値・チェック系 +- startCheckMicEnergy() / stopCheckMicEnergy() / startThreadingCheckMicEnergy() / stopThreadingCheckMicEnergy() +- startCheckSpeakerEnergy() / stopCheckSpeakerEnergy() / startThreadingCheckSpeakerEnergy() / stopThreadingCheckSpeakerEnergy() + +ダウンロード開始(非同期/同期) +- downloadCtranslate2Weight(data: str, asynchronous: bool=True) -> dict +- downloadWhisperWeight(data: str, asynchronous: bool=True) -> dict + - 非同期なら別スレッドでダウンロードを行い progressBar コールバックを経由して UI に進捗を返す。 + +Watchdog / WebSocket / OSC 周り +- startWatchdog() / feedWatchdog() / stopWatchdog() +- getWebSocketHost() / setWebSocketHost(data) -> dict +- setEnableWebSocketServer() / setDisableWebSocketServer() +- setOscIpAddress(data) / setOscPort(data) + - ネットワーク周りの設定は検証ロジック(IP アドレス検証、サーバー利用可否のチェック)を含む。 + +ユーティリティ関数 +- messageFormatter(format_type: str, translation: list, message: str) -> str + - OSC に送る文面のフォーマットを生成(設定に基づく)。 +- replaceExclamationsWithRandom(text) -> (str, dict) +- restoreText(escaped_text, escape_dict) -> str +- removeExclamations(text) -> str + +重要な戻り値規約 +- 成功: {"status": 200, "result": ...} +- 失敗: {"status": 400, "result": {"message": str, "data": Any}} +- 多くのメソッドは UI への通知として `self.run(status, run_mapping[key], payload)` を行う。 + +エッジケース / エラー処理 +- VRAM OOM 検出: モデル例外が上がると model.detectVRAMError(e) を呼び出し、VRAM エラーが検出された場合は関連機能を自動で無効化して UI に 400 を通知する。 +- デバイスアクセスの競合: `device_access_status` による簡易ロックで、デバイス操作中は待機する。 +- ネットワーク依存: DeepL 等の外部翻訳 API 利用可否は `model.authenticationTranslatorDeepLAuthKey` で検査し、無効時は選択肢を更新する。 + +呼び出し例(Python から直接) +```python +from controller import Controller +ctrl = Controller() +# run コールバックの例: (status:int, event_name:str, payload:any) +def ui_run(status, event, payload): + print(status, event, payload) + +ctrl.setRun(ui_run) +resp = ctrl.setEnableTranslation() +print(resp) # {'status':200, 'result': True} + +data = {"id": 123, "message": "Hello"} +resp = ctrl.chatMessage(data) +print(resp) +``` + +シーケンス図(簡易: マイク入力 -> 翻訳 -> UI 通知) +```mermaid +sequenceDiagram + participant UI + participant Mainloop + participant Controller + participant Model + + UI->>Mainloop: ユーザ操作 (send message) + Mainloop->>Controller: chatMessage(data) + Controller->>Model: getInputTranslate(message) + Model-->>Controller: translation + Controller->>Model: oscSendMessage(...) + Controller->>UI: run(200, run_mapping['transcription_send_mic_message'], payload) +``` + +次の作業 +- `docs/api.md` を `mainloop.py` のマッピングに基づいて拡張し、各エンドポイントの request/response 例を追加してください。 + +参考: 実装詳細は `src-python/controller.py` を参照してください(メソッドごとに細かな条件分岐や run_mapping キーが存在します)。 +# controller.py — 詳細設計 + +目的: UI(または外部プロセス)からの操作を受け、`config` と `model` を操作して副作用を生じさせるコマンド層。 + +主要クラス/関数: +- class Controller + - 属性: + - init_mapping: アプリ起動時の読み出し用マッピング(/get/data/*) + - run_mapping: イベント通知先のエンドポイントマップ(run 関数で使用) + - run: run(status, endpoint, result) を格納 + + - 主要メソッド: + - setEnableTranslation / setDisableTranslation: 翻訳機能の切替(モデル切替や VRAM エラー回復処理を含む) + - start/stop transcription/energy checks: Model の startMicTranscript 等を呼ぶ + - downloadCtranslate2Weight / downloadWhisperWeight: ダウンロードを非同期で開始し進捗を run 経由で通知 + - micMessage / speakerMessage / chatMessage: 認識結果を受け、翻訳/OSC/Overlay/WebSocket/ログ記録を行う主要ハンドラ + - messageFormatter: OSC 用メッセージ整形 + - 多数の get/set 系関数: config の各種設定を読み書きし status/result を返す + +エラー/例外: +- VRAM 関連は特に注意し、検出時は該当機能を無効化してユーザーへ通知する。 + +API マッピング: +- `mainloop.py` の `mapping` と連携しており、多くの `/get/data/*` `/set/data/*` `/run/*` が Controller のメソッドにマッピングされる(詳細は docs/api.md を参照)。 + diff --git a/src-python/docs/modules/device_manager.md b/src-python/docs/modules/device_manager.md new file mode 100644 index 00000000..f681b2d9 --- /dev/null +++ b/src-python/docs/modules/device_manager.md @@ -0,0 +1,73 @@ +# device_manager.py — デバイス検出と監視(overwrite) + +目的: システムのマイク/スピーカー(主に Windows の WASAPI)を列挙し、変更を監視してコールバックで通知する `DeviceManager` シングルトンを提供します。 + +主要コンポーネント: +- class Client(MMNotificationClient) + - オーディオデバイスのシステムイベント(追加/削除/デフォルト変更)を受け取り、監視ループの再起動をトリガーします。 + +- class DeviceManager + - シングルトンインスタンス: `device_manager` + - 主要プロパティ: + - `mic_devices` (dict): {host_name: [device_info, ...]} + - `default_mic_device` (dict): {'host': {...}, 'device': {...}} + - `speaker_devices` (list): [device_info, ...] + - `default_speaker_device` (dict) + - 各種 prev_/update_flag_: 差分検出用 + - callback 関連プロパティ: `callback_default_mic_device`, `callback_mic_device_list`, など多数 + + - 主要メソッド (抜粋): + - `update()` -> None: PyAudio を利用してホスト毎の入力デバイスとループバック(スピーカー)を列挙し内部状態を更新します。 + - `checkUpdate()` -> bool: 前回値との差分を計算して変更フラグを返します。 + - `monitoring()` -> None: pycaw/MMNotificationClient を使った長時間監視ループ。変化を検出すると各コールバックを呼び出す。 + - `startMonitoring()` / `stopMonitoring()` + - `getMicDevices()` / `getDefaultMicDevice()` / `getSpeakerDevices()` / `getDefaultSpeakerDevice()` + - `forceUpdateAndSetMicDevices()` / `forceUpdateAndSetSpeakerDevices()` + +コールバックAPI(例): +- `setCallbackMicDeviceList(callback)` — マイクデバイスリスト変更時に呼ばれる +- `setCallbackDefaultMicDevice(callback)` — デフォルトマイク変更時に呼ばれる +- `setCallbackProcessBeforeUpdateMicDevices(callback)` / `setCallbackProcessAfterUpdateMicDevices(callback)` — 更新前後のフック + +例: + +```python +from device_manager import device_manager + +def on_default_mic(host_name, device_name): + print('Default mic changed:', host_name, device_name) + +device_manager.setCallbackDefaultMicDevice(on_default_mic) +device_manager.forceUpdateAndSetMicDevices() +``` + +注意点: +- Windows 固有のモジュール(PyAudio paWASAPI, pycaw)に依存します。クロスプラットフォーム対応が必要な場合は別実装が必要です。 +- 監視スレッドは永続的に動作するため、アプリケーション終了時は `stopMonitoring()` を呼んで安全に停止してください。 + +## 詳細設計 + +目的: ローカルの入力(マイク)と出力(ループバックから抽出されたスピーカー)デバイスを列挙し、変更を監視してコールバックで通知する。Windows の WASAPI 等に依存。 + +主要クラス/関数: +- class Client(MMNotificationClient) + - Audio デバイスの変更イベントを受けると `loop = False` にして監視ループを再起動させる設計。 + +- class DeviceManager + - シングルトン: `device_manager = DeviceManager()` + - 主要属性: + - mic_devices: {host: [device_info...]} + - default_mic_device: {host, device} + - speaker_devices: [device_info...] + - default_speaker_device: {device} + - 各種 prev_*, update_flag_*: 差分検出のために保持 + - コールバック属性: callback_default_mic_device, callback_host_list など + - 主要メソッド: + - update(): PyAudio を使ってホストごとにデバイス列挙。Loopback デバイスを speaker_devices に集める。 + - monitoring(): MMNotificationClient と組み合わせてデバイスの変化を検出し、コールバックを発行 + - set/clear Callback 系: UI や Controller が登録して自動選択や再起動をトリガーできる + - forceUpdateAndSetMicDevices / forceUpdateAndSetSpeakerDevices: 即時更新とコールバック通知 + +注意点: +- Windows 固有の処理(paWASAPI, pycaw)に依存する。 +- デバイス取得はリソースに依存するので try/except で例外を吸収し errorLogging() を呼ぶ。 diff --git a/src-python/docs/modules/model.md b/src-python/docs/modules/model.md new file mode 100644 index 00000000..3cb331ad --- /dev/null +++ b/src-python/docs/modules/model.md @@ -0,0 +1,105 @@ +# model.py — クラスと主要メソッド +目的: アプリケーションの中核オーケストレータ。翻訳器 (Translator)、オーバーレイ、トランスクリプタ、OSC、WebSocket、Watchdog などのインスタンスを保持し、これらの起動/停止/操作を担います。`model` は `Model` のシングルトンインスタンスです。 + +主要クラスとシグネチャ: +- class threadFnc(Thread) + - __init__(self, fnc, end_fnc=None, daemon=True, *args, **kwargs) + - stop(self) -> None + - pause(self) -> None + - resume(self) -> None + +- class Model + - __new__(cls) -> Model + - init(self) -> None + - checkTranslatorCTranslate2ModelWeight(self, weight_type: str) -> bool + - changeTranslatorCTranslate2Model(self) -> None + - downloadCTranslate2ModelWeight(self, weight_type, callback=None, end_callback=None) -> Any + - isLoadedCTranslate2Model(self) -> bool + - getListLanguageAndCountry(self) -> list + - getTranslate(self, translator_name, source_language, target_language, target_country, message) -> tuple + - getInputTranslate(self, message, source_language=None) -> (list, list) + - getOutputTranslate(self, message, source_language=None) -> (list, list) + - startMicTranscript(self, fnc) -> None + - stopMicTranscript(self) -> None + - startSpeakerTranscript(self, fnc: Optional[Callable[[dict], None]] = None) -> None + - stopSpeakerTranscript(self) -> None + - startWebSocketServer(self, host, port) -> None + - stopWebSocketServer(self) -> None + - websocketSendMessage(self, message_dict: dict) -> bool + + 変更点(2025-10-09): + + - startCheckMicEnergy(self, fnc: Optional[Callable[[float], None]] = None) -> None + - 説明: 進捗/エネルギー表示用のコールバックを受け取ります。fnc が None の場合は内部で no-op を使い、呼び出し前に callable チェックを行います。これにより呼び出し側が None を渡しても安全になりました。 + + - startCheckSpeakerEnergy(self, fnc: Optional[Callable[[float], None]] = None) -> None + - 説明: 同上(fnc を Optional として受け取り、呼び出し時に callable を確認します)。内部では Queue を作成して録音データを受け取り、定期的にコールバックを呼びます。 + + - convertMessageToTransliteration(self, message: str, hiragana: bool = True, romaji: bool = True) -> list + - 説明: 以前は単一の文字列や別形を返す箇所がありましたが、現在は常にリスト(トークン単位の dict を要素とする list)を返します。hiragana/romaji の両方が False の場合は空リストを返します。 + + - createOverlayImageLargeLog(self, message_type: str, message: Optional[str], your_language: Optional[str], translation: list, target_language: Optional[dict] = None) -> object + - 説明: `target_language` は辞書形式で渡される場合があり、内部で言語リストに正規化されます(enabled な言語のみ抽出)。`message` / `your_language` は Optional となり、`None` を渡して翻訳のみのログを作ることが可能です。 + +使用例(簡易): + +```python +from model import model + +# 翻訳を呼び出す +translation, success = model.getTranslate('CTranslate2', 'Japanese', 'English', 'United States', 'こんにちは') +print(translation, success) + +# マイク文字起こしの開始(コールバックで結果を受け取る) +def on_mic_transcript(result): + print('mic transcript:', result) + +model.startMicTranscript(on_mic_transcript) + +# WebSocket サーバー起動 +model.startWebSocketServer('127.0.0.1', 2231) + +``` + +注意点: +- `Model` は多くの外部リソース(GPU、ファイル、ネットワーク)に依存するため、各操作は例外処理で保護されています。 +- 大きなモデルのロードで VRAM OOM を検出する `detectVRAMError` を備え、Controller 側でのフォールバック処理に使われます。 + +## 詳細設計 + +目的: 各モデル(翻訳/転写/Overlay/Watchdog/OSC/WebSocket 等)のインスタンスを保持し、高レベルの操作を提供するファサード。 + +主要クラス/変数: +- class threadFnc(Thread) + - 説明: ループする関数をバックグラウンドで呼ぶヘルパ。pause/stop/end callback をサポート。 + +- class Model + - シングルトン: ファイル末で `model = Model()` として公開。 + - 主な属性: + - translator (Translator) + - overlay (Overlay) + - overlay_image (OverlayImage) + - mic_audio_queue, mic_audio_recorder, mic_transcriber + - speaker_audio_queue, speaker_audio_recorder, speaker_transcriber + - watchdog (Watchdog) + - osc_handler (OSCHandler) + - websocket_server (WebSocketServer) + - 主なメソッド: + - start/stop logger, overlay, watchdog + - startMicTranscript / stopMicTranscript: 録音、transcriber の起動とキュー処理 + - startSpeakerTranscript / stopSpeakerTranscript + - startCheckMicEnergy / stopCheckMicEnergy + - startCheckSpeakerEnergy / stopCheckSpeakerEnergy + - getTranslate / getInputTranslate / getOutputTranslate: Translator を利用する高レベル関数 + - createOverlayImage* / updateOverlay* : OverlayImage と Overlay を結合して VR 表示を作成 + - startWebSocketServer / stopWebSocketServer / websocketSendMessage + +エラー処理: +- 音声認識や翻訳で VRAM エラーが発生した場合、detectVRAMError() で特殊な例外内容を検査し、Controller 経由で翻訳機能を OFF にする処理がある。 + +非同期/リソース: +- Recorder/Transcriber/Overlay/Watchdog/WebSocket はそれぞれ別スレッドで動作する。Model はそれらの開始/停止を管理する。 + +依存: +- models/translation, models/transcription, models/overlay, models/osc, models/websocket + diff --git a/src-python/docs/modules/model_extra.md b/src-python/docs/modules/model_extra.md new file mode 100644 index 00000000..5d3af186 --- /dev/null +++ b/src-python/docs/modules/model_extra.md @@ -0,0 +1,60 @@ +# model.py — クラス一覧と使用例 + +以下は `model.py` で提供される主要クラスのシグネチャ概要と、簡単な呼び出し例です。 + +## クラス / 主要シグネチャ + +- class threadFnc(Thread) + - __init__(self, fnc: Callable, interval: float = 0.1, end_callback: Callable | None = None) + - start(self) -> None + - pause(self) -> None + - resume(self) -> None + - stop(self) -> None + +- class Model + - startLogger(self) -> None + - stopLogger(self) -> None + - startOverlay(self) -> None + - shutdownOverlay(self) -> None + - startMicTranscript(self, callback: Callable[[dict], None]) -> None + - stopMicTranscript(self) -> None + - startSpeakerTranscript(self, callback: Callable[[dict], None]) -> None + - stopSpeakerTranscript(self) -> None + - startCheckMicEnergy(self, progress_callback: Callable[[int], None]) -> None + - stopCheckMicEnergy(self) -> None + - startCheckSpeakerEnergy(self, progress_callback: Callable[[int], None]) -> None + - stopCheckSpeakerEnergy(self) -> None + - startWebSocketServer(self, host: str, port: int) -> None + - stopWebSocketServer(self) -> None + - websocketSendMessage(self, message: dict) -> None + - getListMicHost(self) -> dict + - getListMicDevice(self) -> list + - getListSpeakerDevice(self) -> list + - getInputTranslate(self, text: str, source_language: str = None) -> tuple[list[str], list[bool]] + - getOutputTranslate(self, text: str, source_language: str = None) -> tuple[list[str], list[bool]] + - detectVRAMError(self, exception: Exception) -> tuple[bool, str] + +## サンプル(呼び出し例) + +以下は Model の簡単な呼び出し例です。 + +```python +from model import model + +# マイク転写のコールバック例 +def on_mic_result(result: dict): + # result の想定形: {"text": str|False, "language": str} + text = result.get("text") + language = result.get("language") + print('mic:', text, language) + +# マイク転写を開始(別スレッドで動く) +model.startMicTranscript(on_mic_result) + +# 一度だけ翻訳を呼ぶ +translation, success = model.getInputTranslate('Hello', source_language='English') +print('translation:', translation, 'success:', success) + +# WebSocket 経由で外部クライアントへイベント送信 +model.websocketSendMessage({'type': 'INFO', 'message': 'VRCT ready'}) +``` diff --git a/src-python/docs/modules/osc.md b/src-python/docs/modules/osc.md new file mode 100644 index 00000000..7ad5454d --- /dev/null +++ b/src-python/docs/modules/osc.md @@ -0,0 +1,17 @@ +# models/osc — 詳細設計 + +目的: VRChat 等と OSC / OSCQuery 経由で値の取得やチャット送信を行う。 + +主要クラス/関数: +- class OSCHandler + - sendMessage(message: str, notification: bool=True): OSC で chatbox/input を送信 + - sendTyping(flag: bool): chatbox/typing を送信 + - receiveOscParameters(): OSCQuery を立て、指定したフィルタに対してローカルでサーバを実装してイベントを受ける + - getOSCParameterValue(address: str): OSCQuery を通じて現在値を問い合わせる(use tinyoscquery) + +注意点: +- `is_osc_query_enabled` が True のときに OSCQuery を使う(127.0.0.1 や localhost の場合に True) +- 受信ハンドラは dispatcher にマップしてコールバックを呼ぶ。 +- ネットワーク環境や OSCQuery の可否により動作が変わるため例外処理が多く入っている。 + +依存: python-osc, tinyoscquery diff --git a/src-python/docs/modules/overlay.md b/src-python/docs/modules/overlay.md new file mode 100644 index 00000000..18b65321 --- /dev/null +++ b/src-python/docs/modules/overlay.md @@ -0,0 +1,31 @@ +# overlay.py — OpenVR オーバーレイ管理 + +目的: OpenVR を使ったオーバーレイ表示(複数サイズ: small/large)を管理する `Overlay` クラスを提供します。 + +主要メソッド: +- __init__(self, settings_dict) +- init(self) -> None +- startOverlay(self) -> None +- shutdownOverlay(self) -> None +- reStartOverlay(self) -> None +- updateImage(self, img: PIL.Image.Image, size: str) -> None +- updateOpacity(self, opacity: float, size: str, with_fade: bool = False) -> None +- updateUiScaling(self, ui_scaling: float, size: str) -> None +- updatePosition(self, x_pos, y_pos, z_pos, x_rotation, y_rotation, z_rotation, tracker, size) -> None +- mainloop(self) -> None # アニメーション / フェード評価ループ + +使用上の注意: +- OpenVR (SteamVR) が稼働していることが前提です。`checkSteamvrRunning()` で `vrmonitor.exe` の存在チェックを行います。 +- 例外が発生した場合は `errorLogging()` を呼んでスタックトレースを残します。 + +## モジュール構成(補足) + +- overlay.py — OpenVR を使ったオーバーレイ管理。Overlay クラスは複数サイズ(small/large)を扱い、位置/回転/透明度/フェードを制御する。 +- overlay_image.py — PIL を使ってオーバーレイに表示する画像を生成(テキストボックス、ログレイアウト、フォント管理)。 +- overlay_utils.py — 行列演算や座標変換ユーティリティ。 + +注意点: +- OpenVR(SteamVR)に依存。SteamVR が動作していることが前提。 +- フォントファイルは repo の fonts フォルダか、ランタイム内パスを探索して読み込む。 +- 生成画像は RGBA バイト列に変換され `overlay.setOverlayRaw` で渡される。 + diff --git a/src-python/docs/modules/overlay_image.md b/src-python/docs/modules/overlay_image.md new file mode 100644 index 00000000..04735623 --- /dev/null +++ b/src-python/docs/modules/overlay_image.md @@ -0,0 +1,115 @@ +# overlay_image.py — 画像生成ユーティリティ +目的: `models.overlay.overlay_image.OverlayImage` の実装に基づき、オーバーレイ用のテキストボックス/ログ画像を PIL (Pillow) で生成するための仕様書です。 + +このドキュメントは実装に合わせて書かれており、主要な公開メソッドの振る舞い、引数、返り値、例外、使用例、注意点を含みます。 + +概要 +------ +- 提供クラス: `OverlayImage` +- 役割: 文字列(元文/翻訳)やメッセージタイプ(send/receive) を受け取り、Small/Large 向けの RGBA PIL.Image を生成する。 +- 依存: Pillow (PIL)、フォントファイル群(`fonts/` ディレクトリまたは環境配下) + +主要機能 +-------- +- テキストをラップして画像化する(行折り返しを含む) +- 複数テキストブロック(原文+複数の翻訳)を縦に連結して一つの画像にする +- 背景(角丸矩形)を合成して最終的な RGBA 画像を返す +- Small と Large で UI 設定(幅、高さ、フォントサイズ等)を切り替え +- フォント探索: 実行環境の `fonts/` 配下または相対パスからフォントを探し、見つからない場合は FileNotFoundError を投げる + +公開 API(要約) +----------------- +- class OverlayImage(root_path: str | None = None) + - コンストラクタ引数 + - root_path: フォント等のリソースのベースディレクトリ。None の場合は実装に合わせて repo の `fonts/` を候補パスとして探索する。 + +- OverlayImage.createOverlayImageSmallLog(message: str, your_language: str, translation: list | None = None, target_language: list | None = None) -> PIL.Image.Image + - 説明: Small ログ向け(横長・1行〜複数行)にテキストブロックを作成して結合し、角丸背景と合成して RGBA 画像を返す。 + - 引数 + - message: 表示する原文テキスト(None を許容しない想定) + - your_language: 原文の言語キー(フォントマッピングに使用) + - translation: 翻訳テキストのリスト(省略可) + - target_language: 翻訳それぞれに対応する言語キーのリスト(省略可) + - 戻り値: PIL.Image.Image (RGBA) + - 例外: フォントが見つからない場合は FileNotFoundError を投げる可能性あり + +- OverlayImage.createOverlayImageLargeLog(message_type: str, message: str | None = None, your_language: str | None = None, translation: list | None = None, target_language: list | None = None) -> PIL.Image.Image + - 説明: Large ログ(複数行 + ヘッダ(Send/Receive)や時刻)向けに、複数ブロックを作成して縦結合し、背景を合成して返す。 + - 引数 + - message_type: 'send' または 'receive'(UI 向けアンカー/色指定に使用) + - message: 表示する原文テキスト(None 可。この場合翻訳のみを表示することもある) + - your_language: 原文の言語キー(フォント選定に使用) + - translation: 翻訳テキストのリスト(省略可) + - target_language: 翻訳それぞれに対応する言語キーのリスト(省略可) + - 戻り値: PIL.Image.Image (RGBA) + - 例外: フォント未発見などで FileNotFoundError を投げる可能性あり + +内部で使われる補助メソッド(要旨) +--------------------------------- +- concatenateImagesVertically(img1, img2, margin=0) -> Image +- addImageMargin(image, top, right, bottom, left, color) -> Image +- createTextboxSmallLog(...) -> Image +- createTextImageLargeLog(...) -> Image +- createTextboxLargeLog(...) -> Image +- getUiSizeSmallLog(), getUiColorSmallLog(), getUiSizeLargeLog(), getUiColorLargeLog() + +フォントとローカライズ +----------------------- +- 実装は `LANGUAGES` マッピングを持ち、言語キーからフォントファイル名を決定します(例: "Japanese" -> "NotoSansJP-Regular.ttf")。 +- フォントは `root_path` を基準に探索します。実行環境によりフォントファイルの場所が異なるため、実装は複数パスを順に試します。フォントが見つからない場合は FileNotFoundError を発生させる設計です。 + +描画と折り返しロジック(実装に基づく注意点) +-------------------------------------------- +- テキスト幅を計算し、基準幅に収まるように文字数ベースで分割して折り返す単純なロジックを採用しています。厳密な単語単位折り返しではなく、文字数ベースの分割になります。 +- Small/Large でフォントサイズや余白、角丸半径などを分けており、複数行のテキストブロックを縦結合することで最終画像を作ります。 + +使用例 +------ +Small ログ画像を作る例: + +```python +from models.overlay.overlay_image import OverlayImage + +overlay = OverlayImage() +img = overlay.createOverlayImageSmallLog( + message='こんにちは、世界!', + your_language='Japanese', + translation=['Hello, world!'], + target_language=['English'] +) +img.save('overlay_small.png') +``` + +Large ログ(複数メッセージ履歴)を作る例: + +```python +from models.overlay.overlay_image import OverlayImage +from datetime import datetime + +overlay = OverlayImage() +img = overlay.createOverlayImageLargeLog( + message_type='send', + message='Hello from VRCT', + your_language='English', + translation=['こんにちは'], + target_language=['Japanese'] +) +img.save('overlay_large.png') +``` + +実装上の注意と推奨事項 +----------------------- +- 実行環境にフォントが存在することを確認してください(`fonts/` に主要フォントを置くのが簡単です)。 +- Pillow (PIL) のバージョンに依存する描画 API を使っています。Pillow は v8〜最新程度で問題ありません。 +- 長いテキストの折り返しは単純な文字幅分割ロジックです。より自然な折り返し(単語単位・ルビ考慮等)が必要なら実装拡張を推奨します。 +- 生成画像は RGBA(透過)です。Overlay 側の API(`overlay.setOverlayRaw` 相当)へ渡して使う前提です。 + +復元メモ +-------- +このファイルは実装ファイル `models/overlay/overlay_image.py` を参照して復元しました。実装を変更した場合は本ドキュメントも同期して更新してください。 + +関連ファイル +------------- +- 実装: `models/overlay/overlay_image.py` +- ヘルパ: `models/overlay/overlay_utils.py` +- フォント: `fonts/` ディレクトリ diff --git a/src-python/docs/modules/transcription.md b/src-python/docs/modules/transcription.md new file mode 100644 index 00000000..1efa5ef3 --- /dev/null +++ b/src-python/docs/modules/transcription.md @@ -0,0 +1,51 @@ +# transcription — 文字起こしモジュール +概要: マイク/スピーカー音声の録音と Whisper/Google などのエンジンを使った文字起こしを提供するモジュール群です。主なクラスは録音用の Recorder と `AudioTranscriber` です。 + +主要クラス/シグネチャ: +- SelectedMicEnergyAndAudioRecorder(device, energy_threshold, dynamic_energy_threshold, phrase_time_limit) +- SelectedSpeakerEnergyAndAudioRecorder(...) +- SelectedMicEnergyRecorder(device) +- SelectedSpeakerEnergyRecorder(device) +- AudioTranscriber(speaker: bool, source, phrase_timeout: int, max_phrases: int, transcription_engine: str, root: str, whisper_weight_type: str, device: str, device_index: int, compute_type: str) + - transcribeAudioQueue(queue, languages:list, countries:list, avg_logprob: float, no_speech_prob: float) -> bool + - getTranscript() -> dict + +使用例: + +```python +from models.transcription.transcription_recorder import SelectedMicEnergyAndAudioRecorder +from models.transcription.transcription_transcriber import AudioTranscriber + +# 録音 +rec = SelectedMicEnergyAndAudioRecorder(device, energy_threshold=300, dynamic_energy_threshold=False, phrase_time_limit=3) +queue = Queue() +rec.recordIntoQueue(queue, None) + +# 文字起こし +transcriber = AudioTranscriber(speaker=False, source=rec.source, phrase_timeout=3, max_phrases=10, transcription_engine='Google', root='.', whisper_weight_type='base', device='cpu', device_index=0, compute_type='auto') +transcriber.transcribeAudioQueue(queue, ['Japanese'], ['Japan'], -0.8, 0.6) +print(transcriber.getTranscript()) +``` + +注意点: +- Whisper のモデルロードは VRAM を消費します。`Model.detectVRAMError` のような検知と回復策が必要です。 +- 録音は OS のデバイス依存のため `device_manager` でのデバイス取得と組み合わせて利用してください。 + +# models/transcription — 詳細設計 + +構成ファイル: +- transcription_recorder.py — 各デバイス向け Recorder クラス群(Base, SelectedMic*, SelectedSpeaker*)。speech_recognition をラップし、Audio / Energy をキューへ出す。 +- transcription_transcriber.py — AudioTranscriber: Google Speech API または faster-whisper を使った音声→テキスト変換の実行ロジック。複数言語に対する最良候補選択と confidence に基づく選出。 +- transcription_whisper.py — Whisper(faster-whisper)重みのダウンロードとモデル生成のユーティリティ。 + +主要契約: +- Recorder は recordIntoQueue(audio_queue, energy_queue) を提供し、バックグラウンドで音声データをキューに流す。 +- AudioTranscriber.transcribeAudioQueue(audio_queue, languages, countries, avg_logprob, no_speech_prob) -> bool + - audio_queue から音声を取り出し認識を試みる。結果は getTranscript() で取得する。常に True/False を返して呼び出し側がループ継続を制御。 + +VRAM エラー対策: +- Whisper のモデルロードで GPU メモリ不足が発生すると、ValueError("VRAM_OUT_OF_MEMORY", message) を投げる実装。Controller で捕捉して機能停止/通知する。 + +外部依存: +- speech_recognition, faster_whisper, pydub, numpy, torch + diff --git a/src-python/docs/modules/translation.md b/src-python/docs/modules/translation.md new file mode 100644 index 00000000..e247a394 --- /dev/null +++ b/src-python/docs/modules/translation.md @@ -0,0 +1,21 @@ +# models/translation — 詳細設計 + +構成ファイル: +- translation_translator.py — Translator クラス。DeepL/API、Google、Bing、Papago、CTranslate2 を統一インターフェースで扱う。 +- translation_utils.py — 重みファイルのダウンロード・検証ロジック(CTranslate2 用)。 +- translation_languages.py — 各エンジンの対応言語マップ。 + +Translator の契約: +- translate(translator_name, source_language, target_language, target_country, message) -> str|False + - 成功時は文字列、失敗または一時的エラーは False を返す。 +- changeCTranslate2Model(path, model_type, device, device_index, compute_type) + - CTranslate2 の Translator オブジェクトと Tokenizer を初期化する。 + +フォールバック: +- Controller/Model 層で翻訳が失敗した場合に CTranslate2 にフォールバックする実装がある。 + +外部依存: +- ctranslate2, transformers, deepl(オプション)、translators(任意) + +安全性: +- 翻訳 API キー(DeepL)は Translator.authenticationDeepLAuthKey で検証して保持。 diff --git a/src-python/docs/modules/transliteration.md b/src-python/docs/modules/transliteration.md new file mode 100644 index 00000000..577d055d --- /dev/null +++ b/src-python/docs/modules/transliteration.md @@ -0,0 +1,17 @@ +# models/transliteration — 詳細設計 + +目的: 日本語テキストの仮名読みを解析し、ひらがな/ローマ字(Hepburn)に変換する。 + +主要クラス/関数: +- class Transliterator + - analyze(text: str, use_macron: bool=False) -> List[dict] + - 入力: テキスト + - 出力: トークンのリスト。各要素は { orig, kana, hira, hepburn } + - split_kanji_okurigana(surface, reading_kana): 漢字+送り仮名を分割して kana を割り当てるロジックを持つ(詳細設計あり) + +実装上のポイント: +- SudachiPy を使い形態素解析して読みを得る。 +- Katakana を Hiragana に変換し、katakana_to_hepburn モジュールでローマ字化を行う。 +- 文脈ルールを `transliteration_context_rules.apply_context_rules` で適用できる設計(ルールエンジン)。 + +依存: sudachipy diff --git a/src-python/docs/modules/utils.md b/src-python/docs/modules/utils.md new file mode 100644 index 00000000..7a7b2289 --- /dev/null +++ b/src-python/docs/modules/utils.md @@ -0,0 +1,74 @@ +# utils.py — 関数一覧と使用例 +目的: 共通ユーティリティ(ログ、JSON 出力、ネットワーク/ポート検査、デバイス/計算タイプ列挙、バリデーション等)を提供します。 + +主要関数とシグネチャ: +- validateDictStructure(data: dict, structure: dict) -> bool +- isConnectedNetwork(url: str = "http://www.google.com", timeout: int = 3) -> bool +- isAvailableWebSocketServer(host: str, port: int) -> bool +- isValidIpAddress(ip_address: str) -> bool +- getComputeDeviceList() -> dict +- getBestComputeType(device: str, device_index: int) -> str +- encodeBase64(data: str) -> dict +- removeLog() -> None +- setupLogger(name, log_file, level=logging.INFO) -> logging.Logger +- printLog(log: str, data: Any = None) -> None +- printResponse(status: int, endpoint: str, result: Any = None) -> None +- errorLogging() -> None + +使用例: + +```python +from utils import printResponse, getComputeDeviceList, validateDictStructure + +# JSON 形式で mainloop に応答を返す +printResponse(200, '/get/data/version', {'version': '3.2.2'}) + +# 利用可能な計算デバイス一覧を取得 +devices = getComputeDeviceList() +print(devices) + +# 辞書構造のバリデーション +data = {'a': 1, 'b': {'c': 'x'}} +structure = {'a': int, 'b': {'c': str}} +ok = validateDictStructure(data, structure) +print('valid:', ok) +``` + +注意点: +- `printResponse` は stdout に JSON を出力しつつログファイルにも書き込みます。大きなオブジェクトは json.dumps で失敗する可能性があるため、例外処理が含まれています。 + +# utils.py — 詳細設計 + +目的: 小さなユーティリティ関数群。ロギング、ネットワーク検査、型検証、計算デバイス列挙など。 + +主要関数/変数: +- validateDictStructure(data: dict, structure: dict) -> bool + - 説明: 辞書が期待される構造(キーセットと値の型/入れ子)に完全一致するか検証する。 + - 入力: data(検証対象), structure(期待構造: 値が型または入れ子 dict) + - 出力: bool + - 例外: 型不一致や欠落時は False を返す(例外は投げない)。 + +- isConnectedNetwork(url="http://www.google.com", timeout=3) -> bool + - 説明: 指定 URL に HTTP GET して接続可否を判定。requests を使用。 + +- isAvailableWebSocketServer(host: str, port: int) -> bool + - 説明: 指定ポートへ bind できるかを試し、使用中かを判別する(True=利用可能)。 + +- isValidIpAddress(ip_address: str) -> bool + - 説明: ipaddress.ip_address で検証。 + +- getComputeDeviceList() -> dict + - 説明: CPU と CUDA(利用可能なら)を列挙し、各デバイスでサポートされる compute types を取得する。 + - 依存: torch, ctranslate2.get_supported_compute_types + +- getBestComputeType(device: str, device_index: int) -> str + - 説明: デバイス名に基づき優先 compute_type を選び、利用可能なものを返す。デフォルトは "float32"。 + +- setupLogger(name, log_file, level=logging.INFO) -> Logger + - 説明: RotatingFileHandler を使って UTF-8 ログを作る。10MB ローテーション。 + +- printLog / printResponse / errorLogging + - 説明: mainloop と通信するために標準出力へ JSON を flush するユーティリティ。内部で file ログへも書く。 + +注意点: +- ネットワーク検査やファイル生成で例外が発生した場合、errorLogging() を呼んでトレースを error.log に保存する。 diff --git a/src-python/docs/modules/watchdog.md b/src-python/docs/modules/watchdog.md new file mode 100644 index 00000000..26a822bd --- /dev/null +++ b/src-python/docs/modules/watchdog.md @@ -0,0 +1,12 @@ +# models/watchdog — 詳細設計 + +目的: 外部(Process 管理側)へ定期的に "生存" を知らせるために使う軽量ウォッチドッグ。 + +設計: +- class Watchdog(timeout:int=60, interval:int=20) + - feed(): 最終フィード時刻を更新 + - setCallback(callback): タイムアウト時に呼ぶコールバックを登録 + - start(): 現状は単純で、呼び出し側がループ中に start() を呼ぶかたち。実装は簡易(将来的にスレッド化推奨) + +注意: +- 現行実装は非常にシンプルで、長時間のブロッキングやスレッド運用の見直しが必要になり得る。 diff --git a/src-python/docs/modules/websocket.md b/src-python/docs/modules/websocket.md new file mode 100644 index 00000000..936d61de --- /dev/null +++ b/src-python/docs/modules/websocket.md @@ -0,0 +1,18 @@ +# models/websocket — 詳細設計 + +目的: 外部クライアント(例えば第三者のアプリ)へ翻訳済みテキストやイベントをブロードキャストする軽量 WebSocket サーバー。 + +API: +- class WebSocketServer(host='127.0.0.1', port=8765) + - start(): 別スレッドで asyncio ループを生成しサーバを起動。 + - stop(): サーバ停止、全クライアント切断。 + - set_message_handler(handler): クライアントからのメッセージ受信時のコールバックを登録。handler(server, websocket, message) + - send(message): 非同期キューに積んで全クライアントへ送信(スレッドセーフ)。 + - broadcast(message): asyncio を経由して即時ブロードキャスト。 + +実装上の工夫: +- サーバ本体は別スレッドで asyncio イベントループを run_forever している。 +- 送信用に内部キュー `_send_queue` を持ち、_send_loop で順次送信する。これにより GUI 等から安全に send() を呼べる。 + +依存: websockets(asyncio) + diff --git a/src-python/docs/run_events_payloads.md b/src-python/docs/run_events_payloads.md new file mode 100644 index 00000000..a30f7b8d --- /dev/null +++ b/src-python/docs/run_events_payloads.md @@ -0,0 +1,125 @@ +# Run events payloads + +このファイルは `controller.py` 内で `self.run(status, run_mapping["key"], payload)` として発行される全ての run イベントの鍵と、実際に渡されるペイロードの具体例を列挙します。 + +--- + +## 抽出済み run イベント一覧(正規化済み) + +以下は controller.py の self.run 呼び出しを解析して抽出した run イベントです。名称は `mainloop.py` の `run_mapping` に合わせて正規化しています。 + +- connected_network (200) + - payload: true | false + +- enable_ai_models (200) + - payload: true | false + +- mic_host_list (200) + - payload: list[str] + +- mic_device_list (200) + - payload: list[str] + +- speaker_device_list (200) + - payload: list[str] + +- initialization_complete (200) + - payload: dict mapping endpoint -> current value (constructed from init_mapping) + +- selected_mic_device (200) + - payload: {"host": , "device": } + +- selected_speaker_device (200) + - payload: string (device name) + +- error_device (400) + - payload: {"message": , "data": null} + +- check_mic_volume (200) + - payload: numeric energy value (float) + +- check_speaker_volume (200) + - payload: numeric energy value (float) + +- download_progress_ctranslate2_weight (200) + - payload: {"weight_type": , "progress": } + +- downloaded_ctranslate2_weight (200) + - payload: + +- error_ctranslate2_weight (400) + - payload: {"message":"CTranslate2 weight download error","data": null} + +- download_progress_whisper_weight (200) + - payload: {"weight_type": , "progress": } + +- downloaded_whisper_weight (200) + - payload: + +- error_whisper_weight (400) + - payload: {"message":"Whisper weight download error","data": null} + +- word_filter (200) + - payload: {"message": "Detected by word filter: "} + +- error_translation_engine (400) + - payload: {"message":"Translation engine limit error","data": null} + +- error_translation_mic_vram_overflow (400) + - payload: {"message":"VRAM out of memory during translation of mic","data": } + +- error_translation_speaker_vram_overflow (400) + - payload: {"message":"VRAM out of memory during translation of speaker","data": } + +- error_translation_chat_vram_overflow (400) + - payload: {"message":"VRAM out of memory during translation of chat","data": } + +- enable_translation (400 or 200) + - payload example on OOM: {"message":"Translation disabled due to VRAM overflow","data": false} + +- transcription_send_mic_message (200) + - payload: { + "original": {"message": , "transliteration": }, + "translations": [ {"message": , "transliteration": }, ... ] + } + +- transcription_receive_speaker_message (200) + - payload: same shape as transcription_send_mic_message + +- software_update_info (200) + - payload: dict (e.g. {"has_update": true, "latest_version": "3.3.0"}) + +- selected_translation_compute_type (200) + - payload: string e.g. "auto" | "cpu" | "cuda:0" + +- selected_transcription_compute_type (200) + - payload: string + +- selected_translation_compute_device (200) + - payload: device descriptor (object) — `config.SELECTED_TRANSLATION_COMPUTE_DEVICE` の現在値。 + +- selected_translation_engines (200) + - payload: config.SELECTED_TRANSLATION_ENGINES (list/dict per tab) + +- translation_engines (200) + - payload: list of selectable engines (e.g. ["CTranslate2"]) + +- initialization_progress (200) + - payload: integer stage (used values in code: 1..4) + +- enable_osc_query (200) + - payload: {"data": true|false, "disabled_functions": [...]} + +- enable_transcription_receive (200) + - payload: boolean (true when transcription receive enabled) + +- error_transcription_mic_vram_overflow (400) + - payload: {"message":"VRAM out of memory during mic transcription","data": } + +- error_transcription_speaker_vram_overflow (400) + - payload: {"message":"VRAM out of memory during speaker transcription","data": } + +--- + +注: 上記は controller.py の self.run 呼び出しを解析して作成した "実際に送られる" ペイロード例です。UI 側はこれらの形を期待してコーディングしてください。状況によっては model 層からの戻り値の具象型が変化するため、実装では型チェック/存在チェックを行ってください。 + diff --git a/src-python/docs/runtime.md b/src-python/docs/runtime.md new file mode 100644 index 00000000..f633e9f4 --- /dev/null +++ b/src-python/docs/runtime.md @@ -0,0 +1,43 @@ +# 実行手順と依存関係 + +対象 OS: Windows を想定(device_manager は WASAPI / pycaw を使う)。 + +必須依存(概略): +- Python 3.10+ 推奨 +- pip パッケージ: + - torch + - ctranslate2 + - transformers + - requests + - pyaudiowpatch + - pycaw + - speech_recognition + - pydub + - websockets + - python-osc + - tinyoscquery + - sudachipy + - pillow + - flashtext + - faster_whisper (オプション: Whisper をローカルで使う場合) + - deepl / translators(外部翻訳を使う場合) + +実行手順 (開発環境): +1. 仮想環境を作成し有効化 +2. 必要パッケージをインストール + - requirements.txt を用意する場合はそこからインストール +3. `src-python` をワークディレクトリにして `python mainloop.py` を実行 + +注意点: +- Whisper / CTranslate2 の重みは初回にダウンロードする必要がある。Controller の downloadCtranslate2Weight / downloadWhisperWeight エンドポイントからトリガできる。 +- OpenVR (SteamVR) を使う Overlay は SteamVR が動作している環境でのみ動作。 +- Windows 固有: device_manager が pyaudiowpatch と pycaw に依存。Linux/Mac での互換性は保証されない。 + +ログ: +- process.log (標準動作ログ) +- error.log (トレースバック) +- models 用のロガーは `model.startLogger()` により PATH_LOGS 配下に日付付きファイルを作成する。 + +デバッグ: +- `utils.printLog` と `utils.printResponse` が stdout に JSON を出すため、GUI 側はそれをパースして UI 更新を行う。 +- WebSocket を有効にすると外部クライアントに JSON をブロードキャストできる。 diff --git a/src-python/mypy.ini b/src-python/mypy.ini new file mode 100644 index 00000000..d9f53b5e --- /dev/null +++ b/src-python/mypy.ini @@ -0,0 +1,32 @@ +[mypy] +# Temporarily ignore missing type stubs for third-party libraries to focus on +# type errors inside the project. We'll tighten this later. +ignore_missing_imports = True +python_version = 3.11 +show_error_codes = True + +# Per-module ignores can be added later for specific noisy modules. + +[mypy-tests.*] +ignore_errors = True + +# Temporarily ignore entire implementation areas that produce many non-actionable +# mypy errors (third-party untyped libs or large unannotated modules). We'll +# progressively remove these ignores as we annotate the codebase. +[mypy-models.transliteration.*] +ignore_errors = True + +[mypy-models.overlay.*] +ignore_errors = True + +[mypy-models.osc.*] +ignore_errors = True + +[mypy-models.transcription.*] +ignore_errors = True + +[mypy-models.translation.*] +ignore_errors = True + +[mypy-device_manager] +ignore_errors = True From e67242a0c4aef079350f4fc649cb27ec5b3fd773 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:15:01 +0900 Subject: [PATCH 72/92] =?UTF-8?q?[=E6=96=B0=E8=A6=8F=E8=BF=BD=E5=8A=A0]=20?= =?UTF-8?q?=E3=83=89=E3=82=AD=E3=83=A5=E3=83=A1=E3=83=B3=E3=83=88=E3=81=AE?= =?UTF-8?q?=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D=E3=82=A4=E3=83=B3=E3=83=88?= =?UTF-8?q?=E3=81=A8=E3=83=9E=E3=83=83=E3=83=94=E3=83=B3=E3=82=B0=E3=81=AE?= =?UTF-8?q?=E6=A4=9C=E8=A8=BC=E3=82=B9=E3=82=AF=E3=83=AA=E3=83=97=E3=83=88?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../scripts/cleanup_docs_placeholders.py | 16 ++ src-python/scripts/find_doc_tokens.py | 22 +++ src-python/scripts/print_mapping.py | 28 +++ src-python/scripts/verify_docs_vs_code.py | 161 ++++++++++++++++++ .../scripts/verify_docs_vs_code_runtime.py | 126 ++++++++++++++ 5 files changed, 353 insertions(+) create mode 100644 src-python/scripts/cleanup_docs_placeholders.py create mode 100644 src-python/scripts/find_doc_tokens.py create mode 100644 src-python/scripts/print_mapping.py create mode 100644 src-python/scripts/verify_docs_vs_code.py create mode 100644 src-python/scripts/verify_docs_vs_code_runtime.py diff --git a/src-python/scripts/cleanup_docs_placeholders.py b/src-python/scripts/cleanup_docs_placeholders.py new file mode 100644 index 00000000..9da108fd --- /dev/null +++ b/src-python/scripts/cleanup_docs_placeholders.py @@ -0,0 +1,16 @@ +from pathlib import Path +p=Path(__file__).resolve().parents[1]/'docs'/'api.md' +text=p.read_text(encoding='utf-8') +lines=[] +for line in text.splitlines(): + stripped=line.strip() + # Remove exact umbrella placeholder tokens or standalone list entries + if stripped in ('- /set/enable', '- /set/disable', '- /get/data/', '/set/enable', '/set/disable', '/get/data/'): + continue + # Remove lines that are just '/get/data' or '/set/data' or '/run/' etc + if stripped in ('/get/data', '/set/data', '/run/', '/get', '/set', '/run'): + continue + lines.append(line) +new='\n'.join(lines) +p.write_text(new,encoding='utf-8') +print('cleaned') diff --git a/src-python/scripts/find_doc_tokens.py b/src-python/scripts/find_doc_tokens.py new file mode 100644 index 00000000..b9e1c95f --- /dev/null +++ b/src-python/scripts/find_doc_tokens.py @@ -0,0 +1,22 @@ +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +DOC_DIR = ROOT / 'docs' + +tokens = [ + 'transcription_mic', + 'transcription_speaker', + 'selected_translation_compute_device', + '/run/selected_translation_compute_device', + '/run/transcription_mic', + '/run/transcription_speaker', +] + +for p in DOC_DIR.rglob('*.md'): + text = p.read_text(encoding='utf-8') + for i, line in enumerate(text.splitlines(), start=1): + for t in tokens: + if t in line: + print(f"{p}:{i}:{line.strip()}") + +print('done') diff --git a/src-python/scripts/print_mapping.py b/src-python/scripts/print_mapping.py new file mode 100644 index 00000000..8b66e177 --- /dev/null +++ b/src-python/scripts/print_mapping.py @@ -0,0 +1,28 @@ +from pathlib import Path +import re +ROOT = Path(__file__).resolve().parents[1] +MAINLOOP = ROOT / 'mainloop.py' +text = MAINLOOP.read_text(encoding='utf-8') +run_mapping = {} +mapping = {} +for mm in re.finditer(r"[\'\"]([^\'\"]+)[\'\"]\s*:\s*[\'\"](/run/[a-zA-Z0-9_\-\\/]+)[\'\"]", text): + run_mapping[mm.group(1)] = mm.group(2) +for mm in re.finditer(r"[\'\"](/(?:get|set)/[a-zA-Z0-9_\-\\/]+)[\'\"]", text): + mapping[mm.group(1)] = True +print('run_mapping entries:', len(run_mapping)) +print('sample run_mapping keys:', sorted(run_mapping.items())[:10]) +print('\nmapping endpoints count:', len(mapping)) +# show any endpoints that are exactly '/get/data/' +print('\ncontains /get/data/?', '/get/data/' in mapping) +if '/get/data/' in mapping: + print('Found /get/data/ literal in mainloop.py text') +# show ones containing '/get/data' +has_get_data = [k for k in mapping.keys() if '/get/data' in k] +print('\nendpoints containing /get/data:', len(has_get_data)) +if has_get_data: + for k in sorted(has_get_data)[:30]: + print(' -', k) +# print first 20 mapping endpoints +print('\nFirst 40 endpoints:') +for k in sorted(mapping.keys())[:40]: + print(' -', k) diff --git a/src-python/scripts/verify_docs_vs_code.py b/src-python/scripts/verify_docs_vs_code.py new file mode 100644 index 00000000..7aa47c40 --- /dev/null +++ b/src-python/scripts/verify_docs_vs_code.py @@ -0,0 +1,161 @@ +import importlib.util +import re +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +MAINLOOP = ROOT / 'mainloop.py' +CONTROLLER = ROOT / 'controller.py' +DOC_API = ROOT / 'docs' / 'api.md' +DOC_RUN = ROOT / 'docs' / 'run_events_payloads.md' + + +def extract_mapping_from_mainloop(): + """ + Import mainloop.py and read 'mapping' and 'run_mapping' objects directly. + This executes the module in an isolated module object; mainloop has some + initialization but exposing these dicts is acceptable for verification. + """ + run_mapping = {} + mapping = {} + try: + spec = importlib.util.spec_from_file_location('project_mainloop', str(MAINLOOP)) + module = importlib.util.module_from_spec(spec) + loader = spec.loader + if loader is None: + raise RuntimeError('Could not load mainloop module') + loader.exec_module(module) + mapping = getattr(module, 'mapping', {}) or {} + run_mapping = getattr(module, 'run_mapping', {}) or {} + return mapping, run_mapping + except Exception as e: + print('Error importing mainloop.py', e) + + # Fallback: simple regex-based extraction from mainloop.py text + try: + text = MAINLOOP.read_text(encoding='utf-8') + # run_mapping entries like: "transcription_mic": "/run/transcription_send_mic_message", + for mm in re.finditer(r"[\'\"]([^\'\"]+)[\'\"]\s*:\s*[\'\"](/run/[a-zA-Z0-9_\-\/]+)[\'\"]", text): + run_mapping[mm.group(1)] = mm.group(2) + # mapping endpoints: any '/get/...' or '/set/...' literal in file + for mm in re.finditer(r"[\'\"](/(?:get|set)/[a-zA-Z0-9_\-\/]+)[\'\"]", text): + mapping[mm.group(1)] = True + except Exception as e: + print('Error parsing mainloop.py via fallback', e) + + return mapping, run_mapping + + +def extract_run_events_from_controller(): + code = CONTROLLER.read_text(encoding='utf-8') + # find self.run( ... , self.run_mapping["key"], ... ) and direct self.run(..., + run_keys = set() + # pattern for self.run(..., self.run_mapping["xxx"], ...) + pattern = re.compile(r"self\.run\([^\)]*self\.run_mapping\[\s*[\'\"]([^\'\"]+)[\'\"]\s*\]", re.M) + for m in pattern.finditer(code): + run_keys.add(m.group(1)) + # also find self.run(..., "/run/xxx", ...) + pattern2 = re.compile(r"self\.run\([^\)]*\"(/run/[^\'\"]+)\"", re.M) + for m in pattern2.finditer(code): + run_keys.add(m.group(1)) + return run_keys + + +def extract_endpoints_from_docs(): + api = DOC_API.read_text(encoding='utf-8') + run = DOC_RUN.read_text(encoding='utf-8') if DOC_RUN.exists() else '' + endpoints = set() + run_events = set() + # conservative extraction: match endpoints that start with /get/ /set/ /run/ + pattern = re.compile(r"(/(?:get|set|run)(?:/[a-zA-Z0-9_\-]+)+)") + for m in pattern.finditer(api): + token = m.group(1) + # drop umbrella placeholders and tokens that end with '/' + if token in ('/get', '/set', '/run', '/get/data', '/set/data'): + continue + if token.endswith('/'): + continue + if token.startswith('/run/'): + run_events.add(token) + else: + endpoints.add(token) + for m in pattern.finditer(run): + token = m.group(1) + if token in ('/get', '/set', '/run', '/get/data', '/set/data'): + continue + if token.endswith('/'): + continue + if token.startswith('/run/'): + run_events.add(token) + else: + endpoints.add(token) + return endpoints, run_events + + +def main(): + mapping, run_mapping = extract_mapping_from_mainloop() + code_endpoints = set(mapping.keys()) + code_run_events = set(run_mapping.values()) + # normalize run events: run_mapping values likely like '/run/…' + controller_run_keys = extract_run_events_from_controller() + + doc_endpoints, doc_run_events = extract_endpoints_from_docs() + + report = [] + report.append('=== Summary ===') + report.append(f'Code endpoints (/get,/set,/run): {len(code_endpoints)}') + report.append(f'Code run_mapping entries: {len(code_run_events)}') + report.append(f'Controller-run keys found by scan: {len(controller_run_keys)}') + report.append(f'Documented endpoints found in docs/api.md: {len(doc_endpoints)}') + report.append(f'Documented run events found in docs: {len(doc_run_events)}') + + # endpoints present in code but not in docs + missing_in_docs = code_endpoints - doc_endpoints + extra_in_docs = doc_endpoints - code_endpoints + + report.append('\n=== Endpoints present in code but NOT documented ===') + if missing_in_docs: + for e in sorted(missing_in_docs): + report.append(' - ' + e) + else: + report.append(' - None') + + report.append('\n=== Endpoints documented but NOT in code ===') + if extra_in_docs: + for e in sorted(extra_in_docs): + report.append(' - ' + e) + else: + report.append(' - None') + + report.append('\n=== Run events present in code (run_mapping) but NOT documented ===') + missing_run_in_docs = code_run_events - doc_run_events + if missing_run_in_docs: + for e in sorted(missing_run_in_docs): + report.append(' - ' + e) + else: + report.append(' - None') + + report.append('\n=== Run keys emitted in controller (self.run mapping keys) but NOT in run_mapping values ===') + # controller_run_keys are keys like 'connected_network' or '/run/connected_network' + # normalize controller keys to values: if key starts with '/run/' keep, else map via run_mapping if possible + normalized = set() + for k in controller_run_keys: + if k.startswith('/run/'): + normalized.add(k) + else: + if k in run_mapping: + normalized.add(run_mapping[k]) + else: + normalized.add(k) + # compare normalized with code_run_events + extra_controller_keys = normalized - code_run_events + if extra_controller_keys: + for e in sorted(extra_controller_keys): + report.append(' - ' + e) + else: + report.append(' - None') + + out = '\n'.join(report) + print(out) + +if __name__ == '__main__': + main() diff --git a/src-python/scripts/verify_docs_vs_code_runtime.py b/src-python/scripts/verify_docs_vs_code_runtime.py new file mode 100644 index 00000000..187575f3 --- /dev/null +++ b/src-python/scripts/verify_docs_vs_code_runtime.py @@ -0,0 +1,126 @@ +import re +import json +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +DOC_API = ROOT / 'docs' / 'api.md' +DOC_RUN = ROOT / 'docs' / 'run_events_payloads.md' + +# Ensure project root is importable so `import mainloop` works when this script is +# executed from the scripts/ folder. +sys.path.insert(0, str(ROOT)) + + +def main(): + # Delayed imports to avoid module-level import ordering issues (E402 in linters) + import mainloop + import controller as controller_module + + mapping_keys = set(mainloop.mapping.keys()) + run_mapping_values = set(mainloop.run_mapping.values()) + + # extract controller emitted run keys by source scan + controller_src = Path(controller_module.__file__).read_text(encoding='utf-8') + controller_run_keys = set() + for m in re.finditer(r"self\.run\([^\)]*self\.run_mapping\[\s*[\'\"]([^\'\"]+)[\'\"]\s*\]", controller_src): + controller_run_keys.add(m.group(1)) + for m in re.finditer(r"self\.run\([^\)]*\"(/run/[a-zA-Z0-9_\-/]+)\"", controller_src): + controller_run_keys.add(m.group(1)) + # read docs and extract endpoints conservatively (only full endpoints starting with /get/ /set/ /run/) + api_text = DOC_API.read_text(encoding='utf-8') + run_text = DOC_RUN.read_text(encoding='utf-8') if DOC_RUN.exists() else '' + + # include delete endpoints as well (e.g. /delete/data/deepl_auth_key) + endpoint_pattern = re.compile(r"(/(?:get|set|run|delete)[A-Za-z0-9_\-/]*)") + + doc_endpoints = set(m.group(1) for m in endpoint_pattern.finditer(api_text + '\n' + run_text)) + + # Remove umbrella placeholder artifacts that sometimes appear due to + # comma-separated lists or pattern fragments in the markdown. These are + # not concrete endpoints and should not be treated as documented endpoints + # for parity checking. + umbrella_tokens = { + '/get', '/set', '/run', '/get/data', '/set/data', '/set/enable', '/set/disable' + } + # Remove exact umbrella tokens and any accidental entries that end with a + # trailing slash (these are artifacts of pattern matching in markdown). + doc_endpoints = {e for e in doc_endpoints if e not in umbrella_tokens and not e.endswith('/')} + + # Compare + missing_in_docs = mapping_keys - doc_endpoints + # A documented endpoint is valid if it corresponds to either an incoming mapping (mapping_keys) + # or an outgoing run event (run_mapping_values). Treat extra_in_docs as anything documented + # that is neither in mapping_keys nor in run_mapping_values. + extra_in_docs = doc_endpoints - (mapping_keys | run_mapping_values) + + missing_run_in_docs = run_mapping_values - doc_endpoints + + # Normalize controller keys to run_mapping values + normalized = set() + for k in controller_run_keys: + if k.startswith('/run/'): + normalized.add(k) + else: + if k in mainloop.run_mapping: + normalized.add(mainloop.run_mapping[k]) + else: + normalized.add(k) + + extra_controller_keys = normalized - run_mapping_values + + report = [] + report.append('=== Runtime verification report ===') + report.append(f'Code mapping endpoints: {len(mapping_keys)}') + report.append(f'Code run_mapping entries: {len(run_mapping_values)}') + report.append(f'Controller emitted run keys: {len(controller_run_keys)}') + report.append(f'Documented endpoints (docs): {len(doc_endpoints)}') + + report.append('\n--- Endpoints present in code but NOT documented ---') + if missing_in_docs: + for e in sorted(missing_in_docs): + report.append(' - ' + e) + else: + report.append(' - None') + + report.append('\n--- Endpoints documented but NOT in code ---') + if extra_in_docs: + for e in sorted(extra_in_docs): + report.append(' - ' + e) + else: + report.append(' - None') + + report.append('\n--- Run events present in code (run_mapping) but NOT documented ---') + if missing_run_in_docs: + for e in sorted(missing_run_in_docs): + report.append(' - ' + e) + else: + report.append(' - None') + + report.append('\n--- Run keys emitted in controller (normalized) but NOT in run_mapping values ---') + if extra_controller_keys: + for e in sorted(extra_controller_keys): + report.append(' - ' + e) + else: + report.append(' - None') + + print('\n'.join(report)) + + # Also output JSON for downstream processing + out = { + 'mapping_keys': sorted(mapping_keys), + 'run_mapping_values': sorted(run_mapping_values), + 'controller_run_keys': sorted(controller_run_keys), + 'doc_endpoints': sorted(doc_endpoints), + 'missing_in_docs': sorted(missing_in_docs), + 'extra_in_docs': sorted(extra_in_docs), + 'missing_run_in_docs': sorted(missing_run_in_docs), + 'extra_controller_keys': sorted(extra_controller_keys), + } + print('\nJSON_OUTPUT_START') + print(json.dumps(out)) + print('JSON_OUTPUT_END') + + +if __name__ == '__main__': + main() From c1cf78cda48384c35db4522a3fad69ce24b82d4d Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:34:05 +0900 Subject: [PATCH 73/92] =?UTF-8?q?[=E6=94=B9=E5=96=84]=20=E5=9E=8B=E6=B3=A8?= =?UTF-8?q?=E9=87=88=E3=81=AE=E8=BF=BD=E5=8A=A0=E3=81=A8=E3=83=89=E3=82=AD?= =?UTF-8?q?=E3=83=A5=E3=83=A1=E3=83=B3=E3=83=88=E3=81=AE=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=20-=20config.py,=20controller.py,=20model.py,=20mainloop.py,?= =?UTF-8?q?=20utils.py=20=E3=81=A7=E3=81=AE=E5=9E=8B=E6=B3=A8=E9=87=88?= =?UTF-8?q?=E3=81=AE=E8=BF=BD=E5=8A=A0=20-=20CODING=5FRULES.md=20=E3=81=A8?= =?UTF-8?q?=20api.md=20=E3=81=AE=E3=83=89=E3=82=AD=E3=83=A5=E3=83=A1?= =?UTF-8?q?=E3=83=B3=E3=83=88=E3=82=92=E6=9B=B4=E6=96=B0=20-=20=E4=B8=8D?= =?UTF-8?q?=E8=A6=81=E3=81=AA=E3=82=B3=E3=83=BC=E3=83=89=E3=81=AE=E5=89=8A?= =?UTF-8?q?=E9=99=A4=E3=81=A8=E3=83=AA=E3=83=95=E3=82=A1=E3=82=AF=E3=82=BF?= =?UTF-8?q?=E3=83=AA=E3=83=B3=E3=82=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/config.py | 5 +- src-python/controller.py | 40 ++++++++----- src-python/docs/CODING_RULES.md | 2 - src-python/docs/api.md | 3 - src-python/mainloop.py | 5 +- src-python/model.py | 56 +++++++++++-------- src-python/models/__init__.py | 5 ++ src-python/models/overlay/__init__.py | 5 ++ .../transcription/transcription_recorder.py | 1 - src-python/scripts/find_doc_tokens.py | 1 - src-python/utils.py | 8 +-- 11 files changed, 79 insertions(+), 52 deletions(-) create mode 100644 src-python/models/__init__.py create mode 100644 src-python/models/overlay/__init__.py diff --git a/src-python/config.py b/src-python/config.py index 9500b8fe..8f32f4af 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -5,6 +5,7 @@ from os import path as os_path, makedirs as os_makedirs from json import load as json_load from json import dump as json_dump import threading +from typing import Optional, Dict, Any import torch from device_manager import device_manager from models.translation.translation_languages import translation_lang @@ -22,8 +23,8 @@ def json_serializable(var_name): class Config: _instance = None - _config_data = {} - _timer = None + _config_data: Dict[str, Any] = {} + _timer: Optional[threading.Timer] = None _debounce_time = 2 def __new__(cls): diff --git a/src-python/controller.py b/src-python/controller.py index d5322134..c7d62402 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -1,5 +1,5 @@ import copy -from typing import Callable, Any +from typing import Callable, Any, List, Optional from time import sleep from subprocess import Popen from threading import Thread @@ -11,10 +11,14 @@ from utils import removeLog, printLog, errorLogging, isConnectedNetwork, isValid class Controller: def __init__(self) -> None: - self.init_mapping = {} - self.run_mapping = {} - self.run = None - self.device_access_status = True + # typed attributes to satisfy static type checkers + self.init_mapping: dict = {} + self.run_mapping: dict = {} + # initialize with a no-op callable so callers can safely call self.run + def _noop_run(status: int, endpoint: str, payload: Any = None) -> None: + return None + self.run: Callable[[int, str, Any], None] = _noop_run + self.device_access_status: bool = True def setInitMapping(self, init_mapping:dict) -> None: self.init_mapping = init_mapping @@ -251,7 +255,7 @@ class Controller: elif isinstance(message, str) and len(message) > 0: translation = [] - transliteration_message = [] + transliteration_message: List[Any] = [] transliteration_translation = [] if model.checkKeywords(message): self.run( @@ -407,7 +411,7 @@ class Controller: ) elif isinstance(message, str) and len(message) > 0: translation = [] - transliteration_message = [] + transliteration_message: List[Any] = [] transliteration_translation = [] if model.checkKeywords(message): self.run( @@ -566,12 +570,12 @@ class Controller: translation_text = f" ({'/'.join(translation)})" if translation else "" model.logger.info(f"[RECEIVED] {message}{translation_text}") - def chatMessage(self, data) -> None: + def chatMessage(self, data) -> dict: id = data["id"] message = data["message"] if len(message) > 0: translation = [] - transliteration_message = [] + transliteration_message: List[Any] = [] transliteration_translation = [] if config.ENABLE_TRANSLATION is False: pass @@ -739,6 +743,7 @@ class Controller: self.run_mapping["software_update_info"], software_update_info, ) + return {"status":200, "result": software_update_info} @staticmethod def getComputeMode(*args, **kwargs) -> dict: @@ -800,11 +805,15 @@ class Controller: if is_vram_error: # Defaultのデバイス設定に戻す printLog("VRAM error detected, reverting device setting") + self.run( + 400, + self.run_mapping["error_translation_enable_vram_overflow"], + { + "message":"VRAM out of memory enabling translation", + "data": error_message + }, + ) self.setDisableTranslation() - config.SELECTED_TRANSLATION_COMPUTE_DEVICE = copy.deepcopy(config.SELECTABLE_COMPUTE_DEVICE_LIST[0]) - config.SELECTED_TRANSLATION_COMPUTE_TYPE = "auto" - self.run(200, self.run_mapping["selected_translation_compute_device"], config.SELECTED_TRANSLATION_COMPUTE_DEVICE) - self.run(200, self.run_mapping["selected_translation_compute_type"], config.SELECTED_TRANSLATION_COMPUTE_TYPE) self.run( 400, self.run_mapping["enable_translation"], @@ -2234,13 +2243,13 @@ class Controller: th_stopCheckSpeakerEnergy.join() @staticmethod - def startThreadingDownloadCtranslate2Weight(weight_type:str, callback:Callable[[float], None], end_callback:Callable[[float], None]) -> None: + def startThreadingDownloadCtranslate2Weight(weight_type:str, callback:Callable[[float], None], end_callback:Optional[Callable[..., None]] = None) -> None: th_download = Thread(target=model.downloadCTranslate2ModelWeight, args=(weight_type, callback, end_callback)) th_download.daemon = True th_download.start() @staticmethod - def startThreadingDownloadWhisperWeight(weight_type:str, callback:Callable[[float], None], end_callback:Callable[[float], None]) -> None: + def startThreadingDownloadWhisperWeight(weight_type:str, callback:Callable[[float], None], end_callback:Optional[Callable[..., None]] = None) -> None: th_download = Thread(target=model.downloadWhisperModelWeight, args=(weight_type, callback, end_callback)) th_download.daemon = True th_download.start() @@ -2258,6 +2267,7 @@ class Controller: @staticmethod def setWatchdogCallback(callback) -> dict: model.setWatchdogCallback(callback) + return {"status":200, "result":True} @staticmethod def stopWatchdog(*args, **kwargs) -> dict: diff --git a/src-python/docs/CODING_RULES.md b/src-python/docs/CODING_RULES.md index 19a626b0..4d6d2dd2 100644 --- a/src-python/docs/CODING_RULES.md +++ b/src-python/docs/CODING_RULES.md @@ -29,8 +29,6 @@ - 定数: UPPER_SNAKE_CASE(`config.py` の定数に合わせる)。 - run_mapping のキー: 現在は短い key(例: `transcription_mic`)を内部で使い `run_mapping` に `/run/...` を置いている。この慣習は維持する。Controller 内で `self.run_mapping[...]` を直接参照する実装は許容される。 -例: `selected_translation_compute_device` は内部 key、`/run/selected_translation_compute_device` が外部イベント名である点を区別して使う。 - ## モジュール・パッケージ構成 - 各サブ領域(ocr, overlay, transcription, translation, websocket 等)は `models/` 下に整理済みのため、同様の粒度で新機能は追加する。 - パッケージは必ず `__init__.py` を置く(static analysis / mypy のため)。空の `__init__.py` でも可。これにより相対インポートが安定する。 diff --git a/src-python/docs/api.md b/src-python/docs/api.md index 373e29d4..f512a191 100644 --- a/src-python/docs/api.md +++ b/src-python/docs/api.md @@ -130,9 +130,6 @@ run イベント `/run/selected_transcription_compute_type` (200) - payload: string -`/run/selected_translation_compute_device` (200) - - payload: device descriptor (e.g. {"name":"cuda:0","type":"gpu"}) - `/run/selected_translation_engines` (200) - payload: config.SELECTED_TRANSLATION_ENGINES (list/dict per tab) diff --git a/src-python/mainloop.py b/src-python/mainloop.py index 7fd9fcc2..9315ab56 100644 --- a/src-python/mainloop.py +++ b/src-python/mainloop.py @@ -1,7 +1,7 @@ import sys import json import time -from typing import Any +from typing import Any, Tuple from threading import Thread from queue import Queue import logging @@ -359,7 +359,8 @@ controller.setInitMapping(init_mapping) class Main: def __init__(self, controller_instance, mapping_data) -> None: - self.queue = Queue() + # queue holds tuples of (endpoint, data) + self.queue: Queue[Tuple[str, Any]] = Queue() self.main_loop = True self.controller = controller_instance self.mapping = mapping_data diff --git a/src-python/model.py b/src-python/model.py index bbf43604..e78ed857 100644 --- a/src-python/model.py +++ b/src-python/model.py @@ -10,7 +10,7 @@ from time import sleep from queue import Queue from threading import Thread from requests import get as requests_get -from typing import Callable +from typing import Callable, Optional, cast from packaging.version import parse from flashtext import KeywordProcessor @@ -106,6 +106,9 @@ class Model: self.websocket_server_loop = False self.websocket_server_alive = False self.th_websocket_server = None + # default no-op callbacks for energy check functions + self.check_mic_energy_fnc: Callable[[float], None] = lambda v: None + self.check_speaker_energy_fnc: Callable[[float], None] = lambda v: None def checkTranslatorCTranslate2ModelWeight(self, weight_type:str): return checkCTranslate2Weight(config.PATH_LOCAL, weight_type) @@ -291,9 +294,9 @@ class Model: if self.transliterator is not None: self.transliterator = None - def convertMessageToTransliteration(self, message: str, hiragana: bool=True, romaji: bool=True) -> str: + def convertMessageToTransliteration(self, message: str, hiragana: bool=True, romaji: bool=True) -> list: if hiragana is False and romaji is False: - return message + return [] keys_to_keep = {"orig"} if hiragana: @@ -574,9 +577,10 @@ class Model: # self.mic_get_energy.stop() # self.mic_get_energy = None - def startCheckMicEnergy(self, fnc:Callable[[float], None]=None) -> None: - if isinstance(fnc, Callable): - self.check_mic_energy_fnc = fnc + def startCheckMicEnergy(self, fnc:Optional[Callable[[float], None]]=None) -> None: + # fnc may be None or a callable. Use cast after checking for None to satisfy type checker. + if fnc is not None: + self.check_mic_energy_fnc = cast(Callable[[float], None], fnc) mic_host_name = config.SELECTED_MIC_HOST mic_device_name = config.SELECTED_MIC_DEVICE @@ -596,7 +600,7 @@ class Model: errorLogging() sleep(0.01) - mic_energy_queue = Queue() + mic_energy_queue: Queue = Queue() mic_device = selected_mic_device[0] self.mic_energy_recorder = SelectedMicEnergyRecorder(mic_device) self.mic_energy_recorder.recordIntoQueue(mic_energy_queue) @@ -614,17 +618,18 @@ class Model: self.mic_energy_recorder.stop() self.mic_energy_recorder = None - def startSpeakerTranscript(self, fnc): + def startSpeakerTranscript(self, fnc:Optional[Callable[[dict], None]]=None) -> None: speaker_device_name = config.SELECTED_SPEAKER_DEVICE speaker_device_list = device_manager.getSpeakerDevices() selected_speaker_device = [device for device in speaker_device_list if device["name"] == speaker_device_name] if len(selected_speaker_device) == 0 or speaker_device_name == "NoDevice": - fnc({"text": False, "language": None}) + # fnc may be None; only call if callable + if callable(fnc): + fnc({"text": False, "language": None}) else: - speaker_audio_queue = Queue() - # speaker_energy_queue = Queue() + speaker_audio_queue: Queue = Queue() speaker_device = selected_speaker_device[0] record_timeout = config.SPEAKER_RECORD_TIMEOUT phrase_timeout = config.SPEAKER_PHRASE_TIMEOUT @@ -708,9 +713,10 @@ class Model: # self.speaker_get_energy.stop() # self.speaker_get_energy = None - def startCheckSpeakerEnergy(self, fnc:Callable[[float], None]=None) -> None: - if isinstance(fnc, Callable): - self.check_speaker_energy_fnc = fnc + def startCheckSpeakerEnergy(self, fnc:Optional[Callable[[float], None]]=None) -> None: + # Accept None as default and assign safely with cast after None-check + if fnc is not None: + self.check_speaker_energy_fnc = cast(Callable[[float], None], fnc) speaker_device_name = config.SELECTED_SPEAKER_DEVICE speaker_device_list = device_manager.getSpeakerDevices() @@ -720,7 +726,7 @@ class Model: self.check_speaker_energy_fnc(False) else: def sendSpeakerEnergy(): - if speaker_energy_queue.empty() is False: + if not speaker_energy_queue.empty(): energy = speaker_energy_queue.get() try: self.check_speaker_energy_fnc(energy) @@ -728,7 +734,7 @@ class Model: errorLogging() sleep(0.01) - speaker_energy_queue = Queue() + speaker_energy_queue: Queue = Queue() speaker_device = selected_speaker_device[0] self.speaker_energy_recorder = SelectedSpeakerEnergyRecorder(speaker_device) self.speaker_energy_recorder.recordIntoQueue(speaker_energy_queue) @@ -746,9 +752,12 @@ class Model: self.speaker_energy_recorder.stop() self.speaker_energy_recorder = None - def createOverlayImageSmallLog(self, message:str, your_language:str, translation:list, target_language:dict): - target_language = [data["language"] for data in target_language.values() if data["enable"] is True] - return self.overlay_image.createOverlayImageSmallLog(message, your_language, translation, target_language) + def createOverlayImageSmallLog(self, message:Optional[str], your_language:Optional[str], translation:list, target_language:Optional[dict]) -> object: + # target_language may be provided as dict or None + target_language_list = [] + if isinstance(target_language, dict): + target_language_list = [data["language"] for data in target_language.values() if data.get("enable") is True] + return self.overlay_image.createOverlayImageSmallLog(message, your_language, translation, target_language_list) def createOverlayImageSmallMessage(self, message): ui_language = config.UI_LANGUAGE @@ -797,9 +806,12 @@ class Model: if (self.overlay.settings[size]["ui_scaling"] != config.OVERLAY_SMALL_LOG_SETTINGS["ui_scaling"]): self.overlay.updateUiScaling(config.OVERLAY_SMALL_LOG_SETTINGS["ui_scaling"], size) - def createOverlayImageLargeLog(self, message_type:str, message:str, your_language:str, translation:list, target_language:dict): - target_language = [data["language"] for data in target_language.values() if data["enable"] is True] - return self.overlay_image.createOverlayImageLargeLog(message_type, message, your_language, translation, target_language) + def createOverlayImageLargeLog(self, message_type:str, message:Optional[str], your_language:Optional[str], translation:list, target_language:Optional[dict]=None): + # normalize target_language dict -> list of language strings + target_language_list = [] + if isinstance(target_language, dict): + target_language_list = [data["language"] for data in target_language.values() if data.get("enable") is True] + return self.overlay_image.createOverlayImageLargeLog(message_type, message, your_language, translation, target_language_list) def createOverlayImageLargeMessage(self, message): ui_language = config.UI_LANGUAGE diff --git a/src-python/models/__init__.py b/src-python/models/__init__.py new file mode 100644 index 00000000..4d9d80d5 --- /dev/null +++ b/src-python/models/__init__.py @@ -0,0 +1,5 @@ +"""models package init for static analysis and packaging.""" + +__all__ = [ + # subpackages are discovered implicitly +] diff --git a/src-python/models/overlay/__init__.py b/src-python/models/overlay/__init__.py new file mode 100644 index 00000000..33f43c7b --- /dev/null +++ b/src-python/models/overlay/__init__.py @@ -0,0 +1,5 @@ +"""models.overlay package init for static analysis.""" + +from . import overlay_utils # re-export helper for ease-of-use in tooling + +__all__ = ["overlay_utils"] diff --git a/src-python/models/transcription/transcription_recorder.py b/src-python/models/transcription/transcription_recorder.py index f30c071f..b574013c 100644 --- a/src-python/models/transcription/transcription_recorder.py +++ b/src-python/models/transcription/transcription_recorder.py @@ -1,7 +1,6 @@ from speech_recognition import Recognizer, Microphone from pyaudiowpatch import get_sample_size, paInt16 from datetime import datetime -from queue import Queue class BaseRecorder: def __init__(self, source, energy_threshold, dynamic_energy_threshold, record_timeout): diff --git a/src-python/scripts/find_doc_tokens.py b/src-python/scripts/find_doc_tokens.py index b9e1c95f..7b9fae1a 100644 --- a/src-python/scripts/find_doc_tokens.py +++ b/src-python/scripts/find_doc_tokens.py @@ -7,7 +7,6 @@ tokens = [ 'transcription_mic', 'transcription_speaker', 'selected_translation_compute_device', - '/run/selected_translation_compute_device', '/run/transcription_mic', '/run/transcription_speaker', ] diff --git a/src-python/utils.py b/src-python/utils.py index fab62d51..fe4faa90 100644 --- a/src-python/utils.py +++ b/src-python/utils.py @@ -1,5 +1,5 @@ import base64 -from typing import Any +from typing import Any, List, Dict import json import traceback import logging @@ -79,7 +79,7 @@ def isValidIpAddress(ip_address: str) -> bool: except ValueError: return False -def getComputeDeviceList() -> dict: +def getComputeDeviceList() -> List[Dict[str, Any]]: compute_types = [ { "device": "cpu", @@ -191,8 +191,8 @@ def printLog(log:str, data:Any=None) -> None: "data": str(data), } process_logger.info(response) - response = json.dumps(response) - print(response, flush=True) + serialized = json.dumps(response) + print(serialized, flush=True) def printResponse(status:int, endpoint:str, result:Any=None) -> None: global process_logger From 569d8e3f765542490d928c1381fb2b737a2a962d Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 16:43:41 +0900 Subject: [PATCH 74/92] =?UTF-8?q?[ref]=20overlay=E3=81=AE=E3=83=AA?= =?UTF-8?q?=E3=83=95=E3=82=A1=E3=82=AF=E3=82=BF=E3=83=AA=E3=83=B3=E3=82=B0?= =?UTF-8?q?=E3=81=A8=E3=83=86=E3=82=B9=E3=83=88=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/docs/modules/overlay.md | 30 +++++++- src-python/models/overlay/overlay.py | 81 +++++++++++++--------- src-python/models/overlay/overlay_image.py | 24 ++++--- src-python/models/overlay/overlay_utils.py | 67 ++++++++++++++---- src-python/tests/test_overlay_imports.py | 30 ++++++++ 5 files changed, 175 insertions(+), 57 deletions(-) create mode 100644 src-python/tests/test_overlay_imports.py diff --git a/src-python/docs/modules/overlay.md b/src-python/docs/modules/overlay.md index 18b65321..648cbdcf 100644 --- a/src-python/docs/modules/overlay.md +++ b/src-python/docs/modules/overlay.md @@ -3,7 +3,7 @@ 目的: OpenVR を使ったオーバーレイ表示(複数サイズ: small/large)を管理する `Overlay` クラスを提供します。 主要メソッド: -- __init__(self, settings_dict) +- __init__(self, settings_dict: dict) - init(self) -> None - startOverlay(self) -> None - shutdownOverlay(self) -> None @@ -18,6 +18,34 @@ - OpenVR (SteamVR) が稼働していることが前提です。`checkSteamvrRunning()` で `vrmonitor.exe` の存在チェックを行います。 - 例外が発生した場合は `errorLogging()` を呼んでスタックトレースを残します。 +短い使用例: + +```py +from models.overlay.overlay_image import OverlayImage +from models.overlay.overlay import Overlay +from PIL import Image + +settings = { + "small": { + "x_pos": 0.0, "y_pos": 0.0, "z_pos": 0.0, + "x_rotation": 0.0, "y_rotation": 0.0, "z_rotation": 0.0, + "display_duration": 5, "fadeout_duration": 2, + "opacity": 1.0, "ui_scaling": 1.0, "tracker": "HMD" + } +} + +overlay_img = OverlayImage() +overlay = Overlay(settings) +overlay.startOverlay() + +# wait until initialized +while not overlay.initialized: + time.sleep(0.5) + +# push a simple blank image +overlay.updateImage(Image.new("RGBA", (256, 64), (255,255,255,255)), "small") +``` + ## モジュール構成(補足) - overlay.py — OpenVR を使ったオーバーレイ管理。Overlay クラスは複数サイズ(small/large)を扱い、位置/回転/透明度/フェードを制御する。 diff --git a/src-python/models/overlay/overlay.py b/src-python/models/overlay/overlay.py index 92c24ab9..4305e77d 100644 --- a/src-python/models/overlay/overlay.py +++ b/src-python/models/overlay/overlay.py @@ -3,6 +3,8 @@ import ctypes import time from psutil import process_iter from threading import Thread +from typing import Any, Dict, Optional, Sequence + import openvr import numpy as np from PIL import Image @@ -18,14 +20,26 @@ try: except ImportError: import overlay_utils as utils -def mat34Id(array): +def mat34Id(array: Sequence[Sequence[float]]) -> Any: + """Convert a 3x4 nested sequence into an openvr.HmdMatrix34_t instance. + + Args: + array: 3x4 numeric sequence + + Returns: + openvr HmdMatrix34_t compatible object + """ arr = openvr.HmdMatrix34_t() for i in range(3): for j in range(4): arr[i][j] = array[i][j] return arr -def getBaseMatrix(x_pos, y_pos, z_pos, x_rotation, y_rotation, z_rotation): +def getBaseMatrix(x_pos: float, y_pos: float, z_pos: float, x_rotation: float, y_rotation: float, z_rotation: float) -> np.ndarray: + """Create a 3x4 base matrix for an overlay given position and Euler rotations. + + Returns a numpy array of shape (3,4). + """ arr = np.zeros((3, 4)) rot = utils.euler_to_rotation_matrix((x_rotation, y_rotation, z_rotation)) @@ -38,7 +52,7 @@ def getBaseMatrix(x_pos, y_pos, z_pos, x_rotation, y_rotation, z_rotation): arr[2][3] = - z_pos return arr -def getHMDBaseMatrix(): +def getHMDBaseMatrix() -> np.ndarray: x_pos = 0.0 y_pos = -0.4 z_pos = 1.0 @@ -48,7 +62,7 @@ def getHMDBaseMatrix(): arr = getBaseMatrix(x_pos, y_pos, z_pos, x_rotation, y_rotation, z_rotation) return arr -def getLeftHandBaseMatrix(): +def getLeftHandBaseMatrix() -> np.ndarray: x_pos = 0.3 y_pos = 0.1 z_pos = -0.31 @@ -58,7 +72,7 @@ def getLeftHandBaseMatrix(): arr = getBaseMatrix(x_pos, y_pos, z_pos, x_rotation, y_rotation, z_rotation) return arr -def getRightHandBaseMatrix(): +def getRightHandBaseMatrix() -> np.ndarray: x_pos = -0.3 y_pos = 0.1 z_pos = -0.31 @@ -69,24 +83,25 @@ def getRightHandBaseMatrix(): return arr class Overlay: - def __init__(self, settings_dict): - self.system = None - self.overlay = None - self.handle = None - self.init_process = False - self.initialized = False - self.loop = False - self.thread_overlay = None + """Manage OpenVR overlays for multiple sizes (e.g. 'small'/'large').""" + def __init__(self, settings_dict: Dict[str, Dict[str, Any]]) -> None: + self.system: Optional[Any] = None + self.overlay: Optional[Any] = None + self.handle: Dict[str, Any] = {} + self.init_process: bool = False + self.initialized: bool = False + self.loop: bool = False + self.thread_overlay: Optional[Thread] = None - self.settings = {} - self.lastUpdate = {} - self.fadeRatio = {} + self.settings: Dict[str, Dict[str, Any]] = {} + self.lastUpdate: Dict[str, float] = {} + self.fadeRatio: Dict[str, float] = {} for key, value in settings_dict.items(): self.settings[key] = value self.lastUpdate[key] = time.monotonic() - self.fadeRatio[key] = 1 + self.fadeRatio[key] = 1.0 - def init(self): + def init(self) -> None: try: self.system = openvr.init(openvr.VRApplication_Background) self.overlay = openvr.IVROverlay() @@ -119,7 +134,7 @@ class Overlay: except Exception: errorLogging() - def updateImage(self, img, size): + def updateImage(self, img: Image.Image, size: str) -> None: if self.initialized is True: width, height = img.size img = img.tobytes() @@ -139,7 +154,7 @@ class Overlay: self.updateOpacity(self.settings[size]["opacity"], size) self.lastUpdate[size] = time.monotonic() - def clearImage(self, size): + def clearImage(self, size: str) -> None: if self.initialized is True: self.updateImage(Image.new("RGBA", (1, 1), (0, 0, 0, 0)), size) @@ -151,7 +166,7 @@ class Overlay: r, g, b = col self.overlay.setOverlayColor(self.handle[size], r, g, b) - def updateOpacity(self, opacity, size, with_fade=False): + def updateOpacity(self, opacity: float, size: str, with_fade: bool = False) -> None: self.settings[size]["opacity"] = opacity if self.initialized is True: @@ -161,12 +176,12 @@ class Overlay: else: self.overlay.setOverlayAlpha(self.handle[size], self.settings[size]["opacity"]) - def updateUiScaling(self, ui_scaling, size): + def updateUiScaling(self, ui_scaling: float, size: str) -> None: self.settings[size]["ui_scaling"] = ui_scaling if self.initialized is True: self.overlay.setOverlayWidthInMeters(self.handle[size], self.settings[size]["ui_scaling"]) - def updatePosition(self, x_pos, y_pos, z_pos, x_rotation, y_rotation, z_rotation, tracker, size): + def updatePosition(self, x_pos: float, y_pos: float, z_pos: float, x_rotation: float, y_rotation: float, z_rotation: float, tracker: str, size: str) -> None: """ x_pos, y_pos, z_pos are floats representing the position of overlay x_rotation, y_rotation, z_rotation are floats representing the rotation of overlay @@ -208,13 +223,13 @@ class Overlay: transform ) - def updateDisplayDuration(self, display_duration, size): + def updateDisplayDuration(self, display_duration: float, size: str) -> None: self.settings[size]["display_duration"] = display_duration - def updateFadeoutDuration(self, fadeout_duration, size): + def updateFadeoutDuration(self, fadeout_duration: float, size: str) -> None: self.settings[size]["fadeout_duration"] = fadeout_duration - def checkActive(self): + def checkActive(self) -> bool: try: if self.system is not None and self.initialized is True: new_event = openvr.VREvent_t() @@ -226,7 +241,7 @@ class Overlay: errorLogging() return False - def evaluateOpacityFade(self, size): + def evaluateOpacityFade(self, size: str) -> None: currentTime = time.monotonic() if (currentTime - self.lastUpdate[size]) > self.settings[size]["display_duration"]: timeThroughInterval = currentTime - self.lastUpdate[size] - self.settings[size]["display_duration"] @@ -235,13 +250,13 @@ class Overlay: self.fadeRatio[size] = 0 self.overlay.setOverlayAlpha(self.handle[size], self.fadeRatio[size] * self.settings[size]["opacity"]) - def update(self, size): + def update(self, size: str) -> None: if self.settings[size]["fadeout_duration"] != 0: self.evaluateOpacityFade(size) else: self.updateOpacity(self.settings[size]["opacity"], size) - def mainloop(self): + def mainloop(self) -> None: self.loop = True while self.checkActive() is True and self.loop is True: startTime = time.monotonic() @@ -251,21 +266,21 @@ class Overlay: if sleepTime > 0: time.sleep(sleepTime) - def main(self): + def main(self) -> None: while self.checkSteamvrRunning() is False: time.sleep(10) self.init() if self.initialized is True: self.mainloop() - def startOverlay(self): + def startOverlay(self) -> None: if self.initialized is False and self.init_process is False: self.init_process = True self.thread_overlay = Thread(target=self.main) self.thread_overlay.daemon = True self.thread_overlay.start() - def shutdownOverlay(self): + def shutdownOverlay(self) -> None: if self.initialized is True and self.init_process is False: if isinstance(self.thread_overlay, Thread): self.loop = False @@ -281,7 +296,7 @@ class Overlay: self.system = None self.initialized = False - def reStartOverlay(self): + def reStartOverlay(self) -> None: self.shutdownOverlay() self.startOverlay() diff --git a/src-python/models/overlay/overlay_image.py b/src-python/models/overlay/overlay_image.py index 708ad11c..21520278 100644 --- a/src-python/models/overlay/overlay_image.py +++ b/src-python/models/overlay/overlay_image.py @@ -1,6 +1,6 @@ from os import path as os_path from datetime import datetime -from typing import Tuple +from typing import Tuple, List, Optional from PIL import Image, ImageDraw, ImageFont try: from utils import errorLogging @@ -18,8 +18,14 @@ class OverlayImage: "Chinese Traditional": "NotoSansTC-Regular.ttf", } - def __init__(self, root_path: str=None): - self.message_log = [] + def __init__(self, root_path: Optional[str] = None) -> None: + """Overlay image helper. + + Args: + root_path: optional project root to resolve bundled fonts. If omitted, + defaults to repository `fonts` directory. + """ + self.message_log: List[dict] = [] if root_path is None: self.root_path = os_path.join(os_path.dirname(__file__), "..", "..", "..", "fonts") else: @@ -58,7 +64,7 @@ class OverlayImage: } return colors - def createTextboxSmallLog(self, text:str, language:str, text_color:tuple, base_width:int, base_height:int, font_size:int) -> Image: + def createTextboxSmallLog(self, text: str, language: str, text_color: Tuple[int, int, int], base_width: int, base_height: int, font_size: int) -> Image: font_family = self.LANGUAGES.get(language, self.LANGUAGES["Default"]) img = Image.new("RGBA", (base_width, base_height), (0, 0, 0, 0)) draw = ImageDraw.Draw(img) @@ -92,7 +98,7 @@ class OverlayImage: draw.text((text_x, text_y), text, text_color, anchor="mm", stroke_width=0, font=font, align="center") return img - def createOverlayImageSmallLog(self, message: str, your_language: str, translation: list = [], target_language: list = []) -> Image: + def createOverlayImageSmallLog(self, message: str, your_language: str, translation: List[str] = [], target_language: List[str] = []) -> Image: # UI設定を取得 ui_size = self.getUiSizeSmallLog() width, height, font_size = ui_size["width"], ui_size["height"], ui_size["font_size"] @@ -162,7 +168,7 @@ class OverlayImage: "text_color_time": (120, 120, 120) } - def createTextImageLargeLog(self, message_type:str, size:str, text:str, language:str) -> Image: + def createTextImageLargeLog(self, message_type: str, size: str, text: str, language: str) -> Image: ui_size = self.getUiSizeLargeLog() font_size = ui_size["font_size_large"] if size == "large" else ui_size["font_size_small"] text_color = self.getUiColorLargeLog()[f"text_color_{size}"] @@ -200,7 +206,7 @@ class OverlayImage: draw.multiline_text((text_x, text_y), text, text_color, anchor=anchor, stroke_width=0, font=font, align=align) return img - def createTextImageMessageType(self, message_type:str, date_time:str) -> Image: + def createTextImageMessageType(self, message_type: str, date_time: str) -> Image: ui_size = self.getUiSizeLargeLog() font_size = ui_size["font_size_small"] ui_padding = ui_size["padding"] @@ -242,7 +248,7 @@ class OverlayImage: draw.text((text_x, text_y), text, text_color, anchor=anchor, stroke_width=0, font=font) return img - def createTextboxLargeLog(self, message_type: str, message: str = None, your_language: str = None, translation: list = [], target_language: list = [], date_time: str = None) -> Image: + def createTextboxLargeLog(self, message_type: str, message: Optional[str] = None, your_language: Optional[str] = None, translation: List[str] = [], target_language: List[str] = [], date_time: Optional[str] = None) -> Image: # テキスト画像のリストを作成 images = [self.createTextImageMessageType(message_type, date_time)] @@ -272,7 +278,7 @@ class OverlayImage: return combined_img - def createOverlayImageLargeLog(self, message_type:str, message:str=None, your_language:str=None, translation:list=[], target_language:list=[]) -> Image: + def createOverlayImageLargeLog(self, message_type: str, message: Optional[str] = None, your_language: Optional[str] = None, translation: List[str] = [], target_language: List[str] = []) -> Image: ui_color = self.getUiColorLargeLog() background_color = ui_color["background_color"] background_outline_color = ui_color["background_outline_color"] diff --git a/src-python/models/overlay/overlay_utils.py b/src-python/models/overlay/overlay_utils.py index 0a379dd0..8807d638 100644 --- a/src-python/models/overlay/overlay_utils.py +++ b/src-python/models/overlay/overlay_utils.py @@ -1,11 +1,29 @@ import numpy as np +from typing import Sequence -def toHomogeneous(matrix): + +def toHomogeneous(matrix: np.ndarray) -> np.ndarray: + """Convert a 3x4 base matrix to a 4x4 homogeneous matrix. + + Args: + matrix: 3x4 numpy array + + Returns: + 4x4 numpy array with last row [0, 0, 0, 1] + """ homogeneous_matrix = np.vstack([matrix, [0, 0, 0, 1]]) return homogeneous_matrix # 移動行列を生成する関数 -def calcTranslationMatrix(translation): +def calcTranslationMatrix(translation: Sequence[float]) -> np.ndarray: + """Create a 4x4 translation matrix from a 3-element translation. + + Args: + translation: (tx, ty, tz) + + Returns: + 4x4 numpy translation matrix + """ tx, ty, tz = translation return np.array([ [1, 0, 0, tx], @@ -15,9 +33,10 @@ def calcTranslationMatrix(translation): ]) # X軸周りの回転行列を生成する関数 -def calcRotationMatrixX(angle): - c = np.cos(np.pi/180*angle) - s = np.sin(np.pi/180*angle) +def calcRotationMatrixX(angle: float) -> np.ndarray: + """Rotation matrix around X axis for given angle in degrees.""" + c = np.cos(np.pi / 180 * angle) + s = np.sin(np.pi / 180 * angle) return np.array([ [1, 0, 0, 0], [0, c, -s, 0], @@ -26,9 +45,10 @@ def calcRotationMatrixX(angle): ]) # Y軸周りの回転行列を生成する関数 -def calcRotationMatrixY(angle): - c = np.cos(np.pi/180*angle) - s = np.sin(np.pi/180*angle) +def calcRotationMatrixY(angle: float) -> np.ndarray: + """Rotation matrix around Y axis for given angle in degrees.""" + c = np.cos(np.pi / 180 * angle) + s = np.sin(np.pi / 180 * angle) return np.array([ [c, 0, s, 0], [0, 1, 0, 0], @@ -37,9 +57,10 @@ def calcRotationMatrixY(angle): ]) # Z軸周りの回転行列を生成する関数 -def calcRotationMatrixZ(angle): - c = np.cos(np.pi/180*angle) - s = np.sin(np.pi/180*angle) +def calcRotationMatrixZ(angle: float) -> np.ndarray: + """Rotation matrix around Z axis for given angle in degrees.""" + c = np.cos(np.pi / 180 * angle) + s = np.sin(np.pi / 180 * angle) return np.array([ [c, -s, 0, 0], [s, c, 0, 0], @@ -48,7 +69,17 @@ def calcRotationMatrixZ(angle): ]) # 3x4行列の座標を基準として回転や移動を行う関数 -def transform_matrix(base_matrix, translation, rotation): +def transform_matrix(base_matrix: np.ndarray, translation: Sequence[float], rotation: Sequence[float]) -> np.ndarray: + """Apply translation and Euler rotations to a 3x4 base matrix. + + Args: + base_matrix: 3x4 base transform matrix + translation: (tx, ty, tz) + rotation: (x_deg, y_deg, z_deg) + + Returns: + Transformed 3x4 matrix (numpy.ndarray) + """ homogeneous_base_matrix = toHomogeneous(base_matrix) translation_matrix = calcTranslationMatrix(translation) rotation_matrix_x = calcRotationMatrixX(rotation[0]) @@ -60,10 +91,18 @@ def transform_matrix(base_matrix, translation, rotation): result_matrix = np.dot(homogeneous_base_matrix, transformation_matrix) return result_matrix[:3, :] -def euler_to_rotation_matrix(angles): +def euler_to_rotation_matrix(angles: Sequence[float]) -> np.ndarray: + """Convert Euler angles in degrees to a 3x3 rotation matrix. + + Args: + angles: (x_deg, y_deg, z_deg) + + Returns: + 3x3 rotation matrix + """ phi = angles[0] * np.pi / 180 theta = angles[1] * np.pi / 180 - psi = angles[2]* np.pi / 180 + psi = angles[2] * np.pi / 180 R_x = np.array([[1, 0, 0], [0, np.cos(phi), -np.sin(phi)], [0, np.sin(phi), np.cos(phi)]]) diff --git a/src-python/tests/test_overlay_imports.py b/src-python/tests/test_overlay_imports.py new file mode 100644 index 00000000..b90389e7 --- /dev/null +++ b/src-python/tests/test_overlay_imports.py @@ -0,0 +1,30 @@ +import sys +import time +from PIL import Image + +sys.path.append(r"d:\WORKSPACE\WORK\VRChatProject\VRCT\src-python") + +from models.overlay import overlay_image, overlay_utils + + +def test_overlay_image_create(): + oi = overlay_image.OverlayImage() + img = oi.createOverlayImageSmallLog("hello", "English", [], []) + assert isinstance(img, Image.Image) + + +def test_utils_transform(): + import numpy as np + base = np.array([ + [1, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0] + ]) + res = overlay_utils.transform_matrix(base, (0, 0, 0), (0, 0, 0)) + assert res.shape == (3, 4) + + +if __name__ == '__main__': + test_overlay_image_create() + test_utils_transform() + print('tests passed') From 7255722b67fd4452f576dba1fc19c39c99e068c8 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 17:01:31 +0900 Subject: [PATCH 75/92] =?UTF-8?q?=E3=82=A6=E3=82=A9=E3=83=83=E3=83=81?= =?UTF-8?q?=E3=83=89=E3=83=83=E3=82=B0=E3=81=AE=E3=83=89=E3=82=AD=E3=83=A5?= =?UTF-8?q?=E3=83=A1=E3=83=B3=E3=83=88=E3=82=92=E6=9B=B4=E6=96=B0=E3=81=97?= =?UTF-8?q?=E3=80=81=E4=BD=BF=E7=94=A8=E4=BE=8B=E3=82=92=E8=BF=BD=E5=8A=A0?= =?UTF-8?q?=E3=80=82=E5=9E=8B=E6=B3=A8=E9=87=88=E3=81=A8=E3=82=A8=E3=83=A9?= =?UTF-8?q?=E3=83=BC=E3=83=8F=E3=83=B3=E3=83=89=E3=83=AA=E3=83=B3=E3=82=B0?= =?UTF-8?q?=E3=81=AE=E6=94=B9=E5=96=84=E3=82=92=E5=8F=8D=E6=98=A0=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/docs/modules/watchdog.md | 74 +++++++++++++++++- src-python/models/watchdog/watchdog.py | 102 ++++++++++++++++++++++--- 2 files changed, 164 insertions(+), 12 deletions(-) diff --git a/src-python/docs/modules/watchdog.md b/src-python/docs/modules/watchdog.md index 26a822bd..92a05669 100644 --- a/src-python/docs/modules/watchdog.md +++ b/src-python/docs/modules/watchdog.md @@ -3,10 +3,78 @@ 目的: 外部(Process 管理側)へ定期的に "生存" を知らせるために使う軽量ウォッチドッグ。 設計: -- class Watchdog(timeout:int=60, interval:int=20) +- class Watchdog(timeout: int = 60, interval: int = 20) - feed(): 最終フィード時刻を更新 - - setCallback(callback): タイムアウト時に呼ぶコールバックを登録 - - start(): 現状は単純で、呼び出し側がループ中に start() を呼ぶかたち。実装は簡易(将来的にスレッド化推奨) + - setCallback(callback): タイムアウト時に呼ぶコールバックを登録(zero-arg を想定) + - start(): 単一チェックを行い、`interval` 秒の sleep を行う(継続監視は呼び出し側でループまたはスレッド化) 注意: - 現行実装は非常にシンプルで、長時間のブロッキングやスレッド運用の見直しが必要になり得る。 + +変更点(実装に入れた改善): +- コールバック属性を初期化しておくことで AttributeError を防止 +- コールバック呼び出し内の例外はウォッチドッグ本体に影響を与えないよう try/except で保護 +- メソッドに型注釈と docstring を追加 + +短い使用例(ポーリング方式): + +```py +import time +from models.watchdog.watchdog import Watchdog + +def on_timeout(): + print('watchdog timed out') + +wd = Watchdog(timeout=5, interval=1) +wd.setCallback(on_timeout) + +# 別スレッドにせず、単純なループでポーリングする例 +while True: + wd.start() # ここで timeout をチェックし、必要なら callback を呼ぶ + # アプリケーションの他処理... + time.sleep(0.5) + + # 正常時に feed を呼ぶ例 + # wd.feed() +``` + +使用例(スレッド化ヘルパを用意するアプローチ): + +```py +import time +from threading import Thread, Event +from models.watchdog.watchdog import Watchdog + +stop_event = Event() + +def run_watchdog(wd: Watchdog, stop_event: Event): + # シンプルなバックグラウンド実行ループ(安全な停止用フラグ付き) + while not stop_event.is_set(): + wd.start() + +wd = Watchdog(timeout=10, interval=1) +wd.setCallback(lambda: print('timed out')) +thread = Thread(target=run_watchdog, args=(wd, stop_event), daemon=True) +thread.start() + +# 正常動作時 +wd.feed() +time.sleep(2) + +# 停止する場合は stop_event.set() を呼ぶ +stop_event.set() +thread.join() +``` + +拡張案(将来の改善): +- `start_in_thread()` / `stop()` を Watchdog に組み込む(内部で Thread と Event を管理して安全に停止できるようにする) +- コールバックに引数を渡せるようにする(context 情報、呼び出し回数など) +- asyncio と相互運用できるバージョン(async/await ベース)を用意する +- ロギング統合(標準 logging を使って状態変化を記録) +- 単発(one-shot)/繰り返しの動作モード指定 + +簡易テスト済み: +- 基本的なコールバックの有効/無効挙動をローカルで確認済み(feed 後は呼ばれず、タイムアウト状態で呼ばれる)。 + +注意事項: +- フル自動化(CI での運用)を行う場合は、スレッド起動・停止のテストを追加することを推奨します。 diff --git a/src-python/models/watchdog/watchdog.py b/src-python/models/watchdog/watchdog.py index 73803e10..5976005d 100644 --- a/src-python/models/watchdog/watchdog.py +++ b/src-python/models/watchdog/watchdog.py @@ -1,20 +1,104 @@ -from typing import Callable +from typing import Callable, Optional import time +from threading import Thread, Event + class Watchdog: - def __init__(self, timeout:int=60, interval:int=20): + """A lightweight watchdog utility. + + This class provides a minimal watchdog which records the last "feed" + timestamp and can invoke a user-supplied callback when the timeout + is exceeded. The design is intentionally simple: callers are expected + to either call `start()` periodically (e.g. from a loop) or extend the + class to run `start()` in a background thread. + + Args: + timeout: seconds without feed after which the callback is invoked + interval: suggested sleep interval (seconds) for callers that poll + """ + + def __init__(self, timeout: int = 60, interval: int = 20) -> None: self.timeout = timeout self.interval = interval self.last_feed_time = time.time() + self.callback: Optional[Callable[[], None]] = None + # Background thread control + self._thread: Optional[Thread] = None + self._stop_event: Optional[Event] = None - def feed(self): + def feed(self) -> None: + """Refresh the watchdog timer (set last feed time to now).""" self.last_feed_time = time.time() - def setCallback(self, callback): + def setCallback(self, callback: Callable[[], None]) -> None: + """Register a zero-argument callback invoked on timeout.""" self.callback = callback - def start(self): - if time.time() - self.last_feed_time > self.timeout: - if isinstance(self.callback, Callable): - self.callback() - time.sleep(self.interval) \ No newline at end of file + def start(self) -> None: + """Perform a single watchdog check and optionally sleep `interval` seconds. + + The method checks if the duration since the last feed exceeds + `timeout`. If so and a callback is registered, the callback is called. + + Note: `start()` does not run in the background by itself; callers + should call it repeatedly (or run it inside a thread) if continuous + monitoring is required. + """ + now = time.time() + if now - self.last_feed_time > self.timeout: + if callable(self.callback): + try: + self.callback() + except Exception: + # Do not let callback exceptions propagate out of watchdog + import traceback + traceback.print_exc() + time.sleep(self.interval) + + def _run_loop(self) -> None: + """Internal run loop used by `start_in_thread`. + + It repeatedly calls `start()` until `_stop_event` is set. The + implementation relies on `start()` sleeping for `self.interval`. + """ + # Defensive: ensure stop_event exists + if self._stop_event is None: + return + while not self._stop_event.is_set(): + self.start() + + def start_in_thread(self, daemon: bool = True) -> None: + """Start the watchdog in a background thread. + + If the watchdog is already running, this is a no-op. The created + thread will repeatedly call `start()` until `stop()` is invoked. + + Args: + daemon: if True, thread is a daemon thread (won't block process exit) + """ + if self._thread is not None and self._thread.is_alive(): + return + self._stop_event = Event() + self._thread = Thread(target=self._run_loop, daemon=daemon) + self._thread.start() + + def stop(self, timeout: Optional[float] = None) -> None: + """Stop background thread started by `start_in_thread`. + + If no background thread is running this is a no-op. + + Args: + timeout: optional timeout to wait for thread join (seconds). If + None, join will block until the thread exits. + """ + if self._stop_event is None or self._thread is None: + return + # signal stop and wait for thread to finish + self._stop_event.set() + self._thread.join(timeout=timeout) + # cleanup + if self._thread.is_alive(): + # thread did not stop within timeout; leave objects for another stop() + return + self._thread = None + self._stop_event = None \ No newline at end of file From 7d24b3839c9b139041365dac9bfc988d2a8ffb98 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 17:07:21 +0900 Subject: [PATCH 76/92] =?UTF-8?q?=E5=9E=8B=E6=B3=A8=E9=87=88=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81=E9=96=A2=E6=95=B0=E3=81=AE?= =?UTF-8?q?=E6=88=BB=E3=82=8A=E5=80=A4=E3=82=92=E6=98=8E=E7=A4=BA=E5=8C=96?= =?UTF-8?q?=E3=80=82=E3=82=B3=E3=83=BC=E3=83=89=E3=81=AE=E5=8F=AF=E8=AA=AD?= =?UTF-8?q?=E6=80=A7=E3=81=A8=E5=9E=8B=E5=AE=89=E5=85=A8=E6=80=A7=E3=82=92?= =?UTF-8?q?=E5=90=91=E4=B8=8A=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../transliteration/transliteration_context_rules.py | 6 +++--- .../transliteration_kana_to_hepburn.py | 12 +++++++----- .../transliteration_transliterator.py | 11 ++++++----- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src-python/models/transliteration/transliteration_context_rules.py b/src-python/models/transliteration/transliteration_context_rules.py index d0b5d339..35c25ec1 100644 --- a/src-python/models/transliteration/transliteration_context_rules.py +++ b/src-python/models/transliteration/transliteration_context_rules.py @@ -1,4 +1,4 @@ -from typing import List, Dict +from typing import List, Dict, Any import re """Contextual transliteration rules for tokenized results. @@ -33,7 +33,7 @@ DEFAULT_RULES = { -def apply_context_rules(results: List[Dict], use_macron: bool = False) -> List[Dict]: +def apply_context_rules(results: List[Dict[str, Any]], use_macron: bool = False) -> List[Dict[str, Any]]: """Apply contextual rewrite rules to `results`. Parameters @@ -50,7 +50,7 @@ def apply_context_rules(results: List[Dict], use_macron: bool = False) -> List[D """ # prepare rules: sort by priority (desc) and precompile regex where provided - raw_rules = DEFAULT_RULES.get("rules", []) + raw_rules: List[Dict[str, Any]] = DEFAULT_RULES.get("rules", []) rules = sorted(raw_rules, key=lambda r: r.get("priority", 0), reverse=True) for r in rules: if r.get("match_mode") == "regex" and r.get("pattern"): diff --git a/src-python/models/transliteration/transliteration_kana_to_hepburn.py b/src-python/models/transliteration/transliteration_kana_to_hepburn.py index e7ba04c2..d8c2b016 100644 --- a/src-python/models/transliteration/transliteration_kana_to_hepburn.py +++ b/src-python/models/transliteration/transliteration_kana_to_hepburn.py @@ -1,5 +1,7 @@ # katakana_to_hepburn.py # カタカナ -> ヘボン式ローマ字(パッケージ不要) +from typing import List + def katakana_to_hepburn(kata: str, use_macron: bool = True) -> str: """ @@ -8,7 +10,7 @@ def katakana_to_hepburn(kata: str, use_macron: bool = True) -> str: use_macron=False のときは単純に連続母音を残す(例: ou, oo)。 """ # 基本音の対応(主要なカタカナ) - base = { + base: dict = { 'ア':'a','イ':'i','ウ':'u','エ':'e','オ':'o', 'カ':'ka','キ':'ki','ク':'ku','ケ':'ke','コ':'ko', 'サ':'sa','シ':'shi','ス':'su','セ':'se','ソ':'so', @@ -31,7 +33,7 @@ def katakana_to_hepburn(kata: str, use_macron: bool = True) -> str: } # 拡張:子音 + 小ャユョ の組合せ(主要なもの) - digraphs = { + digraphs: dict = { ('キ','ャ'):'kya', ('キ','ュ'):'kyu', ('キ','ョ'):'kyo', ('ギ','ャ'):'gya', ('ギ','ュ'):'gyu', ('ギ','ョ'):'gyo', ('シ','ャ'):'sha', ('シ','ュ'):'shu', ('シ','ョ'):'sho', @@ -49,8 +51,8 @@ def katakana_to_hepburn(kata: str, use_macron: bool = True) -> str: # F-sounds (ファ フィ フェ フォ) ('フ','ァ'):'fa', ('フ','ィ'):'fi', ('フ','ェ'):'fe', ('フ','ォ'):'fo', # シェ チェ ティ etc. - ('シ','ェ'):'she', ('チ','ェ'):'che', - ('テ','ィ'):'ti', ('ト','ゥ'):'tu', ('ド','ゥ'):'du', + ('シ','ェ'):'she', ('チ','ェ'):'che', + ('テ','ィ'):'ti', ('ウ','ァ'):'wa', ('ウ','ィ'):'wi', ('ウ','ェ'):'we', ('ウ','ォ'):'wo', # その他外来語によくある組合せ ('ス','ィ'):'si', ('ズ','ィ'):'zi', ('ツ','ァ'):'tsa', ('ツ','ィ'):'tsi', ('ツ','ェ'):'tse', ('ツ','ォ'):'tso', @@ -78,7 +80,7 @@ def katakana_to_hepburn(kata: str, use_macron: bool = True) -> str: return rom # 母音がないなら全部 # 変換メイン - res = [] + res: List[str] = [] i = 0 kata = kata.strip() length = len(kata) diff --git a/src-python/models/transliteration/transliteration_transliterator.py b/src-python/models/transliteration/transliteration_transliterator.py index e25b3be4..8aff912e 100644 --- a/src-python/models/transliteration/transliteration_transliterator.py +++ b/src-python/models/transliteration/transliteration_transliterator.py @@ -1,5 +1,6 @@ from sudachipy import tokenizer from sudachipy import dictionary +from typing import List, Dict, Any try: from .transliteration_kana_to_hepburn import katakana_to_hepburn except ImportError: @@ -10,7 +11,7 @@ except ImportError: from transliteration_context_rules import apply_context_rules class Transliterator: - def __init__(self): + def __init__(self) -> None: self.tokenizer_obj = dictionary.Dictionary(dict_type="full").create() self.mode = tokenizer.Tokenizer.SplitMode.C @@ -26,7 +27,7 @@ class Transliterator: ) @staticmethod - def split_kanji_okurigana(surface: str, reading_kana: str, use_macron: bool = True): + def split_kanji_okurigana(surface: str, reading_kana: str, use_macron: bool = True) -> List[Dict[str, str]]: """Split a single surface word and its kana reading into parts. Inputs: @@ -45,7 +46,7 @@ class Transliterator: constructed list. """ - result = [] + result: List[Dict[str, str]] = [] # 表層を「漢字ブロック」と「非漢字ブロック」に分割 buf = "" @@ -113,7 +114,7 @@ class Transliterator: return result - def analyze(self, text: str, use_macron: bool = False): + def analyze(self, text: str, use_macron: bool = False) -> List[Dict[str, Any]]: """Tokenize ``text`` and produce per-subunit reading information. Returns a list of dicts for each token/sub-part with keys: @@ -133,7 +134,7 @@ class Transliterator: tokens = self.tokenizer_obj.tokenize(text, self.mode) - results = [] + results: List[Dict[str, Any]] = [] for t in tokens: surface = t.surface() reading = t.reading_form() From b26129af68340f3df0eec47552f81030b5229c53 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 17:30:48 +0900 Subject: [PATCH 77/92] =?UTF-8?q?=E7=BF=BB=E8=A8=B3=E3=83=A2=E3=82=B8?= =?UTF-8?q?=E3=83=A5=E3=83=BC=E3=83=AB=E3=81=AE=E3=83=89=E3=82=AD=E3=83=A5?= =?UTF-8?q?=E3=83=A1=E3=83=B3=E3=83=88=E3=82=92=E6=9B=B4=E6=96=B0=E3=81=97?= =?UTF-8?q?=E3=80=81=E3=82=BB=E3=83=83=E3=83=88=E3=82=A2=E3=83=83=E3=83=97?= =?UTF-8?q?=E6=89=8B=E9=A0=86=E3=82=84API=E4=BD=BF=E7=94=A8=E4=BE=8B?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0=E3=80=82=E5=9E=8B=E6=B3=A8=E9=87=88?= =?UTF-8?q?=E3=82=92=E5=BC=B7=E5=8C=96=E3=81=97=E3=80=81=E9=96=A2=E6=95=B0?= =?UTF-8?q?=E3=81=AE=E6=88=BB=E3=82=8A=E5=80=A4=E3=82=92=E6=98=8E=E7=A4=BA?= =?UTF-8?q?=E5=8C=96=E3=80=82=E3=82=A8=E3=83=A9=E3=83=BC=E3=83=8F=E3=83=B3?= =?UTF-8?q?=E3=83=89=E3=83=AA=E3=83=B3=E3=82=B0=E3=82=92=E6=94=B9=E5=96=84?= =?UTF-8?q?=E3=81=97=E3=80=81=E3=82=B3=E3=83=BC=E3=83=89=E3=81=AE=E5=8F=AF?= =?UTF-8?q?=E8=AA=AD=E6=80=A7=E3=82=92=E5=90=91=E4=B8=8A=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/docs/modules/translation.md | 92 ++++++++++ .../translation/translation_languages.py | 41 ++--- .../translation/translation_translator.py | 106 +++++++----- .../models/translation/translation_utils.py | 159 +++++++++++------- 4 files changed, 273 insertions(+), 125 deletions(-) diff --git a/src-python/docs/modules/translation.md b/src-python/docs/modules/translation.md index e247a394..a391da23 100644 --- a/src-python/docs/modules/translation.md +++ b/src-python/docs/modules/translation.md @@ -1,3 +1,95 @@ +## 翻訳モジュール (models.translation) + +このドキュメントは `models/translation` 配下に対して行った最近の変更点、セットアップ手順、API の使い方、テスト方針、トラブルシュートをまとめたものです。 + +### 概要 +- モジュールの責務: テキストの翻訳を行う高レベルの `Translator` クラス、言語コードのマッピング、CTranslate2 用の重み・トークナイザのダウンロード/検証ユーティリティを提供します。 +- 変更点の狙い: 型注釈と docstring を追加し、`translation_utils.py` のダウンロード/検証ロジックをシンプルで堅牢な実装へ置換しました。これにより初回セットアップの手順が明確になります。 + +### 主な変更点(サマリ) +- `translation_translator.py`: 型注釈、docstring を追記。外部依存は存在するが、例外が発生してもモジュールが壊れないように保護されています。 +- `translation_languages.py`: 言語コードマッピングの説明を追加。 +- `translation_utils.py`: 重みファイルの検証(SHA-256 ハッシュ照合)、zip 展開、`transformers.AutoTokenizer` を使ったトークナイザ取得、ダウンロード進捗用のコールバックを備えた実装へ置換。 + +### インストール(依存関係) +必須ではないものが含まれます。開発・最小稼働に必要なパッケージはプロジェクト全体の要件に従ってください。 + +主に使うパッケージ: +- `requests` — ダウンロード処理 +- `transformers` — トークナイザ取得(AutoTokenizer) +- `ctranslate2` — CTranslate2 を使う場合(ランタイムのみ、テストではモック推奨) + +推奨インストール例(任意): + +```powershell +pip install requests transformers ctranslate2 +``` + +DeepL や `translators` といった外部 API ラッパーはオプショナルです。CI やローカルテストではモックして動作確認してください。 + +### 初回セットアップ / 重みの準備 +`translation_utils.py` に含まれるユーティリティ関数: + +- `checkCTranslate2Weight(root: str, weight_type: str = "small") -> bool` + - 指定した `root/weights/ctranslate2/` 以下に必要なファイルが存在し、既知のハッシュと一致するかをチェックします。 + +- `downloadCTranslate2Weight(root: str, weight_type: str = "small", callback: Optional[Callable[[float], None]] = None, end_callback: Optional[Callable[[], None]] = None) -> None` + - 重みを ZIP 形式でダウンロードして展開します。 + - `callback(progress: float)` は 0.0〜1.0 の進捗通知に使えます。 + - `end_callback()` は処理完了時に呼び出されます。 + +- `downloadCTranslate2Tokenizer(path: str, weight_type: str = "small") -> None` + - `transformers.AutoTokenizer.from_pretrained` を利用してトークナイザをダウンロード/キャッシュします(`cache_dir` に保存)。 + +呼び出し例(簡単): + +```python +from models.translation import translation_utils as tu + +# ルートディレクトリ(プロジェクトルートなど) +root = "." +if not tu.checkCTranslate2Weight(root, "small"): + tu.downloadCTranslate2Weight(root, "small", callback=lambda p: print(f"{p*100:.1f}%")) + tu.downloadCTranslate2Tokenizer(root, "small") +``` + +注意: 大きなモデル(`large`)はダウンロードに時間とディスク容量を要します。 + +### API 使用例 (`Translator` の簡易例) + +以下は `Translator` の想定されるシンプルな使い方です(実装は `translation_translator.py` を参照してください)。 + +```python +from models.translation.translation_translator import Translator + +tr = Translator() +result = tr.translate("Hello", src_lang="en", target_lang="ja") +if result: + print(result) +else: + print("翻訳に失敗しました") +``` + +戻り値とエラー: 既存のコードベースとの互換性を重視し、失敗時は False を返すケースがあります。API 呼び出し前に戻り値の型を確認してください。 + +### テスト方針 +- 外部サービス(DeepL、web 翻訳ラッパー、ctranslate2、transformers)はユニットテストでモックします。 +- 推奨: `pytest` と `unittest.mock` を使い、`Translator.translate` の成功パス・失敗パスを検証するテストを追加してください。 + +簡単なテスト設計: +- 正常系: ctranslate2 経由の翻訳が正しく呼ばれる(モックで期待レスポンスを返す) +- フォールバック系: ctranslate2 が利用できない場合に別の翻訳経路を辿る(モック) + +### トラブルシュート +- `ModuleNotFoundError` (例: `sudachidict_full`) — transliteration/別モジュールで必要な辞書が無い場合。該当パッケージのインストールか、当該機能を無効にしてください。 +- ハッシュ不一致 — ダウンロード済みファイルの破損が疑われます。該当ファイルを削除して再ダウンロードしてください。 +- `transformers` のトークナイザが取得できない場合、ネットワークやキャッシュ先の権限を確認してください。 + +### 変更履歴 +- 2025-10-09: 型注釈と docstring の追加、`translation_utils.py` を再実装してダウンロード/検証ロジックを整理。 + +--- +このドキュメントは簡潔な参照用です。必要なら実行例やさらに詳細なトラブルシュート手順(コマンド出力例、ログの取り方など)を追加します。 # models/translation — 詳細設計 構成ファイル: diff --git a/src-python/models/translation/translation_languages.py b/src-python/models/translation/translation_languages.py index 2a660e17..804b2921 100644 --- a/src-python/models/translation/translation_languages.py +++ b/src-python/models/translation/translation_languages.py @@ -1,4 +1,13 @@ -translation_lang = {} +"""Language code mappings for supported translation backends. + +Provides `translation_lang` mapping keyed by backend name with `source` and +`target` maps used by `Translator.getLanguageCode`. +""" + +from typing import Dict + +translation_lang: Dict[str, Dict[str, Dict[str, str]]] = {} + dict_deepl_languages = { "Arabic":"ar", "Bulgarian":"bg", @@ -37,10 +46,7 @@ dict_deepl_languages = { "Chinese Simplified":"zh", "Chinese Traditional":"zh" } -translation_lang["DeepL"] = { - "source":dict_deepl_languages, - "target":dict_deepl_languages, -} +translation_lang["DeepL"] = {"source": dict_deepl_languages, "target": dict_deepl_languages} dict_deepl_api_source_languages = { "Japanese":"ja", @@ -109,10 +115,7 @@ dict_deepl_api_target_languages = { "Chinese Simplified":"zh", "Chinese Traditional":"zh" } -translation_lang["DeepL_API"] = { - "source": dict_deepl_api_source_languages, - "target": dict_deepl_api_target_languages, -} +translation_lang["DeepL_API"] = {"source": dict_deepl_api_source_languages, "target": dict_deepl_api_target_languages} dict_google_languages = { "Japanese":"ja", @@ -179,10 +182,7 @@ dict_google_languages = { # "Basque":"eu", "Irish":"ga" } -translation_lang["Google"] = { - "source":dict_google_languages, - "target":dict_google_languages, -} +translation_lang["Google"] = {"source": dict_google_languages, "target": dict_google_languages} dict_bing_languages = { "Japanese":"ja", @@ -247,10 +247,7 @@ dict_bing_languages = { "Punjabi":"pa", "Irish":"ga" } -translation_lang["Bing"] = { - "source":dict_bing_languages, - "target":dict_bing_languages, -} +translation_lang["Bing"] = {"source": dict_bing_languages, "target": dict_bing_languages} dict_papago_languages = { "German": "de", @@ -270,10 +267,7 @@ dict_papago_languages = { "Chinese Traditional":"zh-TW", } -translation_lang["Papago"] = { - "source":dict_papago_languages, - "target":dict_papago_languages, -} +translation_lang["Papago"] = {"source": dict_papago_languages, "target": dict_papago_languages} dict_ctranslate2_languages = { "English": "en", @@ -378,7 +372,4 @@ dict_ctranslate2_languages = { "Sundanese": "su" } -translation_lang["CTranslate2"] = { - "source":dict_ctranslate2_languages, - "target":dict_ctranslate2_languages, -} \ No newline at end of file +translation_lang["CTranslate2"] = {"source": dict_ctranslate2_languages, "target": dict_ctranslate2_languages} \ No newline at end of file diff --git a/src-python/models/translation/translation_translator.py b/src-python/models/translation/translation_translator.py index a12b326e..faab0327 100644 --- a/src-python/models/translation/translation_translator.py +++ b/src-python/models/translation/translation_translator.py @@ -4,6 +4,7 @@ try: from translators import translate_text as other_web_Translator ENABLE_TRANSLATORS = True except Exception: + other_web_Translator = None # type: ignore ENABLE_TRANSLATORS = False from .translation_languages import translation_lang @@ -14,22 +15,37 @@ import transformers from utils import errorLogging, getBestComputeType import warnings +from typing import Any, Optional, Tuple + warnings.filterwarnings("ignore") -# Translator -class Translator(): - def __init__(self): - self.deepl_client = None - self.ctranslate2_translator = None - self.ctranslate2_tokenizer = None - self.is_loaded_ctranslate2_model = False - self.is_changed_translator_parameters = False - self.is_enable_translators = ENABLE_TRANSLATORS - def authenticationDeepLAuthKey(self, authkey): +class Translator: + """High-level translator facade. + + This class wraps multiple backends (DeepL, DeepL API, Google, Bing, Papago, + and CTranslate2 local models). Optional dependencies may be unavailable at + runtime; methods degrade gracefully and return False or an empty string on + failure (kept compatible with existing behavior). + """ + + def __init__(self) -> None: + self.deepl_client: Optional[DeepLClient] = None + self.ctranslate2_translator: Any = None + self.ctranslate2_tokenizer: Any = None + self.is_loaded_ctranslate2_model: bool = False + self.is_changed_translator_parameters: bool = False + self.is_enable_translators: bool = ENABLE_TRANSLATORS + + def authenticationDeepLAuthKey(self, authkey: str) -> bool: + """Authenticate DeepL API with the provided key. + + Returns True on success, False on failure. + """ result = True try: self.deepl_client = DeepLClient(authkey) + # quick smoke test self.deepl_client.translate_text(" ", target_lang="EN-US") except Exception: errorLogging() @@ -37,7 +53,12 @@ class Translator(): result = False return result - def changeCTranslate2Model(self, path, model_type, device="cpu", device_index=0, compute_type="auto"): + def changeCTranslate2Model(self, path: str, model_type: str, device: str = "cpu", device_index: int = 0, compute_type: str = "auto") -> None: + """Load a CTranslate2 model from weights. + + This sets internal translator/tokenizer objects and flips + ``is_loaded_ctranslate2_model`` on success. + """ self.is_loaded_ctranslate2_model = False directory_name = ctranslate2_weights[model_type]["directory_name"] tokenizer = ctranslate2_weights[model_type]["tokenizer"] @@ -52,7 +73,7 @@ class Translator(): device_index=device_index, compute_type=compute_type, inter_threads=1, - intra_threads=4 + intra_threads=4, ) try: self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path) @@ -62,17 +83,21 @@ class Translator(): self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path) self.is_loaded_ctranslate2_model = True - def isLoadedCTranslate2Model(self): + def isLoadedCTranslate2Model(self) -> bool: return self.is_loaded_ctranslate2_model - def isChangedTranslatorParameters(self): + def isChangedTranslatorParameters(self) -> bool: return self.is_changed_translator_parameters - def setChangedTranslatorParameters(self, is_changed): + def setChangedTranslatorParameters(self, is_changed: bool) -> None: self.is_changed_translator_parameters = is_changed - def translateCTranslate2(self, message, source_language, target_language): - result = False + def translateCTranslate2(self, message: str, source_language: str, target_language: str) -> Any: + """Translate using a loaded CTranslate2 model. + + Returns a string on success or False on failure (keeps legacy behavior). + """ + result: Any = False if self.is_loaded_ctranslate2_model is True: try: self.ctranslate2_tokenizer.src_lang = source_language @@ -86,7 +111,11 @@ class Translator(): return result @staticmethod - def getLanguageCode(translator_name, target_country, source_language, target_language): + def getLanguageCode(translator_name: str, target_country: str, source_language: str, target_language: str) -> Tuple[str, str]: + """Resolve a friendly language name to translator-specific codes. + + Returns (source_code, target_code). + """ match translator_name: case "DeepL_API": if target_language == "English": @@ -101,66 +130,63 @@ class Translator(): target_language = "Portuguese Brazilian" case _: pass - source_language=translation_lang[translator_name]["source"][source_language] - target_language=translation_lang[translator_name]["target"][target_language] + source_language = translation_lang[translator_name]["source"][source_language] + target_language = translation_lang[translator_name]["target"][target_language] return source_language, target_language - def translate(self, translator_name, source_language, target_language, target_country, message): + def translate(self, translator_name: str, source_language: str, target_language: str, target_country: str, message: str) -> Any: + """Translate `message` using the named translator backend. + + Returns translated string on success, or False on failure. When + source_language == target_language the original message is returned. + """ try: if source_language == target_language: return message - result = "" + result: Any = "" source_language, target_language = self.getLanguageCode(translator_name, target_country, source_language, target_language) match translator_name: case "DeepL": - if self.is_enable_translators is True: + if self.is_enable_translators is True and other_web_Translator is not None: result = other_web_Translator( query_text=message, translator="deepl", from_language=source_language, to_language=target_language, - ) + ) case "DeepL_API": if self.is_enable_translators is True: if self.deepl_client is None: result = False else: - result = self.deepl_client.translate_text( - message, - source_lang=source_language, - target_lang=target_language, - ).text + result = self.deepl_client.translate_text(message, source_lang=source_language, target_lang=target_language).text case "Google": - if self.is_enable_translators is True: + if self.is_enable_translators is True and other_web_Translator is not None: result = other_web_Translator( query_text=message, translator="google", from_language=source_language, to_language=target_language, - ) + ) case "Bing": - if self.is_enable_translators is True: + if self.is_enable_translators is True and other_web_Translator is not None: result = other_web_Translator( query_text=message, translator="bing", from_language=source_language, to_language=target_language, - ) + ) case "Papago": - if self.is_enable_translators is True: + if self.is_enable_translators is True and other_web_Translator is not None: result = other_web_Translator( query_text=message, translator="papago", from_language=source_language, to_language=target_language, - ) - case "CTranslate2": - result = self.translateCTranslate2( - message=message, - source_language=source_language, - target_language=target_language, ) + case "CTranslate2": + result = self.translateCTranslate2(message=message, source_language=source_language, target_language=target_language) except Exception: errorLogging() result = False diff --git a/src-python/models/translation/translation_utils.py b/src-python/models/translation/translation_utils.py index 457a65f1..003da354 100644 --- a/src-python/models/translation/translation_utils.py +++ b/src-python/models/translation/translation_utils.py @@ -3,13 +3,22 @@ from zipfile import ZipFile from os import path as os_path from os import makedirs as os_makedirs from requests import get as requests_get -from typing import Callable +from typing import Callable, Optional import hashlib import transformers from utils import errorLogging + +"""Utilities for downloading and verifying CTranslate2 weights and tokenizers. + +This module provides a small, dependency-light set of helpers used by the +translation layer. It purposely keeps behavior resilient: network errors are +logged (via utils.errorLogging) and the functions return/complete without +raising, which matches the repository's defensive style. +""" + ctranslate2_weights = { - "small": { # M2M-100 418M-parameter model + "small": { "url": "https://github.com/misyaguziya/VRCT-weights/releases/download/v1.0/m2m100_418m.zip", "directory_name": "m2m100_418m", "tokenizer": "facebook/m2m100_418M", @@ -17,9 +26,9 @@ ctranslate2_weights = { "model.bin": "e7c26a9abb5260abd0268fbe3040714070dec254a990b4d7fd3f74c5230e3acb", "sentencepiece.model": "d8f7c76ed2a5e0822be39f0a4f95a55eb19c78f4593ce609e2edbc2aea4d380a", "shared_vocabulary.txt": "bd440aa21b8ca3453fc792a0018a1f3fe68b3464aadddd4d16a4b72f73c86d8c", - } + }, }, - "large": { # M2M-100 1.2B-parameter model + "large": { "url": "https://github.com/misyaguziya/VRCT-weights/releases/download/v1.0/m2m100_12b.zip", "directory_name": "m2m100_12b", "tokenizer": "facebook/m2m100_1.2b", @@ -27,77 +36,107 @@ ctranslate2_weights = { "model.bin": "abb7bf4ba7e5e016b6e3ed480c752459b2f783ac8fca372e7587675e5bf3a919", "sentencepiece.model": "d8f7c76ed2a5e0822be39f0a4f95a55eb19c78f4593ce609e2edbc2aea4d380a", "shared_vocabulary.txt": "bd440aa21b8ca3453fc792a0018a1f3fe68b3464aadddd4d16a4b72f73c86d8c", - } + }, }, } -def calculate_file_hash(file_path, block_size=65536): + +def calculate_file_hash(file_path: str, block_size: int = 65536) -> str: hash_object = hashlib.sha256() - - with open(file_path, 'rb') as file: - for block in iter(lambda: file.read(block_size), b''): + with open(file_path, "rb") as f: + for block in iter(lambda: f.read(block_size), b""): hash_object.update(block) - return hash_object.hexdigest() -def checkCTranslate2Weight(root, weight_type="small"): - weight_directory_name = ctranslate2_weights[weight_type]["directory_name"] - hash_data = ctranslate2_weights[weight_type]["hash"] - files = [ - "model.bin", - "sentencepiece.model", - "shared_vocabulary.txt" - ] - path = os_path.join(root, "weights", "ctranslate2") - # check already downloaded - already_downloaded = False - if all(os_path.exists(os_path.join(path, weight_directory_name, file)) for file in files): - # check hash - for file in files: - original_hash = hash_data[file] - current_hash = calculate_file_hash(os_path.join(path, weight_directory_name, file)) - if original_hash != current_hash: - break - already_downloaded = True - return already_downloaded +def checkCTranslate2Weight(root: str, weight_type: str = "small") -> bool: + """Return True if the requested weight files exist and match their hashes. -def downloadCTranslate2Weight(root, weight_type="small", callback=None, end_callback=None): - url = ctranslate2_weights[weight_type]["url"] - filename = "weight.zip" - path = os_path.join(root, "weights", "ctranslate2") - os_makedirs(path, exist_ok=True) - - if checkCTranslate2Weight(root, weight_type) is False: + This function intentionally avoids raising: callers use the boolean to + decide whether to (re)download weights. + """ + weight_info = ctranslate2_weights.get(weight_type) + if weight_info is None: + return False + weight_directory_name = weight_info["directory_name"] + hash_data = weight_info["hash"] + files = ["model.bin", "sentencepiece.model", "shared_vocabulary.txt"] + base_path = os_path.join(root, "weights", "ctranslate2") + # quick existence check + for f in files: + p = os_path.join(base_path, weight_directory_name, f) + if not os_path.exists(p): + return False + # verify hashes + for f in files: + p = os_path.join(base_path, weight_directory_name, f) try: - with tempfile.TemporaryDirectory() as tmp_path: - res = requests_get(url, stream=True) - file_size = int(res.headers.get('content-length', 0)) - total_chunk = 0 - with open(os_path.join(tmp_path, filename), 'wb') as file: - for chunk in res.iter_content(chunk_size=1024*2000): - file.write(chunk) - if isinstance(callback, Callable): - total_chunk += len(chunk) - callback(total_chunk/file_size) - - with ZipFile(os_path.join(tmp_path, filename)) as zf: - zf.extractall(path) + if calculate_file_hash(p) != hash_data[f]: + return False except Exception: errorLogging() + return False + return True - if isinstance(end_callback, Callable): - end_callback() -def downloadCTranslate2Tokenizer(path, weight_type="small"): - directory_name = ctranslate2_weights[weight_type]["directory_name"] - tokenizer = ctranslate2_weights[weight_type]["tokenizer"] - tokenizer_path = os_path.join(path, "weights", "ctranslate2", directory_name, "tokenizer") +def downloadCTranslate2Weight(root: str, weight_type: str = "small", callback: Optional[Callable[[float], None]] = None, end_callback: Optional[Callable[[], None]] = None) -> None: + """Download and extract ctranslate2 weights for the given type. + callback receives a float between 0 and 1 for progress when available. + end_callback is invoked after success or failure to allow caller cleanup. + """ + weight_info = ctranslate2_weights.get(weight_type) + if weight_info is None: + return + url = weight_info["url"] + filename = "weight.zip" + dst_path = os_path.join(root, "weights", "ctranslate2") + os_makedirs(dst_path, exist_ok=True) + if checkCTranslate2Weight(root, weight_type): + if callable(end_callback): + end_callback() + return try: - os_makedirs(tokenizer_path, exist_ok=True) - transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path) + with tempfile.TemporaryDirectory() as tmp_path: + res = requests_get(url, stream=True, timeout=30) + total = int(res.headers.get("content-length", 0) or 0) + written = 0 + out_path = os_path.join(tmp_path, filename) + with open(out_path, "wb") as out: + for chunk in res.iter_content(chunk_size=1024 * 1024): + if not chunk: + continue + out.write(chunk) + written += len(chunk) + if callable(callback) and total: + try: + callback(written / total) + except Exception: + errorLogging() + with ZipFile(out_path) as zf: + zf.extractall(dst_path) except Exception: errorLogging() - tokenizer_path = os_path.join("./weights", "ctranslate2", directory_name, "tokenizer") - transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path) \ No newline at end of file + finally: + if callable(end_callback): + end_callback() + + +def downloadCTranslate2Tokenizer(root: str, weight_type: str = "small") -> None: + """Ensure a tokenizer for the requested weight is available (cached). + + This will attempt to download the tokenizer via Hugging Face's transformers + and cache it under the weights directory. It logs failures instead of + raising to keep runtime resilient during startup. + """ + weight_info = ctranslate2_weights.get(weight_type) + if weight_info is None: + return + directory_name = weight_info["directory_name"] + tokenizer_name = weight_info["tokenizer"] + tokenizer_cache = os_path.join(root, "weights", "ctranslate2", directory_name, "tokenizer") + try: + os_makedirs(tokenizer_cache, exist_ok=True) + transformers.AutoTokenizer.from_pretrained(tokenizer_name, cache_dir=tokenizer_cache) + except Exception: + errorLogging() \ No newline at end of file From 690a2f081b62bc7cc8588047bb54bda43e673342 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 17:35:55 +0900 Subject: [PATCH 78/92] =?UTF-8?q?=E3=83=89=E3=82=AD=E3=83=A5=E3=83=A1?= =?UTF-8?q?=E3=83=B3=E3=83=88=E3=82=92=E6=9B=B4=E6=96=B0=E3=81=97=E3=80=81?= =?UTF-8?q?=E5=9E=8B=E6=B3=A8=E9=87=88=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97?= =?UTF-8?q?=E3=81=A6=E3=82=B3=E3=83=BC=E3=83=89=E3=81=AE=E5=8F=AF=E8=AA=AD?= =?UTF-8?q?=E6=80=A7=E3=81=A8=E3=83=A1=E3=83=B3=E3=83=86=E3=83=8A=E3=83=B3?= =?UTF-8?q?=E3=82=B9=E6=80=A7=E3=82=92=E5=90=91=E4=B8=8A=E3=80=82=E5=90=84?= =?UTF-8?q?=E3=83=A2=E3=82=B8=E3=83=A5=E3=83=BC=E3=83=AB=E3=81=AE=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=E4=BE=8B=E3=82=84=E4=BE=9D=E5=AD=98=E9=96=A2=E4=BF=82?= =?UTF-8?q?=E3=82=92=E6=98=8E=E7=A4=BA=E5=8C=96=E3=81=97=E3=80=81=E3=82=A8?= =?UTF-8?q?=E3=83=A9=E3=83=BC=E3=83=8F=E3=83=B3=E3=83=89=E3=83=AA=E3=83=B3?= =?UTF-8?q?=E3=82=B0=E3=82=92=E6=94=B9=E5=96=84=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/docs/modules/transcription.md | 75 ++++++++++++++ .../transcription/transcription_languages.py | 5 + .../transcription/transcription_recorder.py | 80 +++++++++++---- .../transcription_transcriber.py | 98 ++++++++++++++----- .../transcription/transcription_whisper.py | 78 ++++++++++++--- 5 files changed, 276 insertions(+), 60 deletions(-) diff --git a/src-python/docs/modules/transcription.md b/src-python/docs/modules/transcription.md index 1efa5ef3..a8f5abc9 100644 --- a/src-python/docs/modules/transcription.md +++ b/src-python/docs/modules/transcription.md @@ -1,3 +1,78 @@ +## 文字起こしモジュール (models.transcription) + +このドキュメントでは `models/transcription` に関する設計・セットアップ・使用例・テスト方針・トラブルシュートをまとめます。 + +### 概要 +- `models/transcription` は音声入力をテキストに変換する機能を提供します。主に: + - `transcription_recorder.py` — マイクやスピーカからの音声取得ラッパー + - `transcription_transcriber.py` — 音声バッファを認識エンジンに渡して文字起こしを行うロジック + - `transcription_whisper.py` — faster-whisper(WhisperModel)周りのダウンロード/ロード補助 + - `transcription_languages.py` — 各言語・国別のエンジン別コードマップ + +### 最近の変更点 +- 各モジュールに型注釈と docstring を追加しました。これによりメンテナンス性が向上します。 +- `transcription_whisper.py` にダウンロード進捗コールバックを明記した実装を追加しました。 + +### 依存関係 +主要な依存: +- `speech_recognition` — オーディオ録音と Google 音声認識のラッパー +- `pyaudiowpatch` — クロスプラットフォームのオーディオ設定 +- `pydub` — 音声のチャンネル変換や処理 +- `faster_whisper`(オプショナル)— ローカルで Whisper を使う場合 +- `huggingface_hub`(オプショナル)— モデルアーティファクトのダウンロード + +注意: `pydub` は `ffmpeg` が必要です。環境に ffmpeg が無いとワーニングが出ます。 + +推奨インストール(任意): + +```powershell +pip install speechrecognition pyaudiowpatch pydub faster-whisper huggingface-hub +``` + +テストでは多くの外部依存をモックするため、全てをインストールする必要はありません。 + +### 初回セットアップ +1. 必要に応じて `ffmpeg` をインストールしてください(pydub の動作に必要)。 +2. Whisper ローカルモデルを使う場合、`transcription_whisper.downloadWhisperWeight(root, weight_type, callback, end_callback)` を呼んでモデルを取得します。 + - `callback(progress: float)` は 0.0〜1.0 の進捗通知です。 + - 例: + +```python +from models.transcription import transcription_whisper as tw +tw.downloadWhisperWeight("./", "tiny", callback=lambda p: print(f"{p*100:.1f}%"), end_callback=lambda: print("done")) +``` + +### API 使用例 +簡単な `AudioTranscriber` の使い方: + +```python +from models.transcription.transcription_transcriber import AudioTranscriber + +# source はライブラリが提供するオーディオソースオブジェクト +tr = AudioTranscriber(speaker=False, source=source, phrase_timeout=3, max_phrases=10, transcription_engine="Google") +# audio_queue は録音スレッドがプッシュするキュー +tr.transcribeAudioQueue(audio_queue, languages=["English"], countries=["United States"]) +``` + +戻り値やエラー処理のルールについては各関数の docstring を参照してください。 + +### テスト方針 +- `AudioTranscriber` と `Whisper` ラッパーはユニットテストでモック化して検証します。 +- 推奨: `pytest` と `unittest.mock` を使い、以下のケースをカバーします: + - 正常系: Google/Whisper の成功パス(モックで期待テキストを返す) + - エッジ: 無音、低確信、複数言語 + - フォールバック: Whisper が利用不可の場合のフォールバック動作 + +### トラブルシュート +- ffmpeg が見つからない: `pydub` がワーニングを出します。OS に合わせて ffmpeg をインストールしてください。 +- Whisper のロード時に VRAM エラー: `getWhisperModel` は VRAM 不足を検出して `ValueError("VRAM_OUT_OF_MEMORY", message)` を投げます。デバイス設定や compute_type を調整してください。 +- ハッシュ不一致やダウンロード失敗: キャッシュや weights ディレクトリを削除して再ダウンロードしてください。 + +### 変更履歴 +- 2025-10-09: 型注釈と docstring を追加、ダウンロード/コールバック仕様を明記。 + +--- +このドキュメントは簡潔な参照用です。さらに詳細な実行手順(ログ収集方法、ffmpeg のインストール手順例など)が必要であれば追記します。 # transcription — 文字起こしモジュール 概要: マイク/スピーカー音声の録音と Whisper/Google などのエンジンを使った文字起こしを提供するモジュール群です。主なクラスは録音用の Recorder と `AudioTranscriber` です。 diff --git a/src-python/models/transcription/transcription_languages.py b/src-python/models/transcription/transcription_languages.py index 12625df7..6240e056 100644 --- a/src-python/models/transcription/transcription_languages.py +++ b/src-python/models/transcription/transcription_languages.py @@ -1,3 +1,8 @@ +"""Language table used by transcription components. + +Maps a display language and country to engine-specific language codes. +""" + transcription_lang = { "Afrikaans":{ "South Africa":{ diff --git a/src-python/models/transcription/transcription_recorder.py b/src-python/models/transcription/transcription_recorder.py index b574013c..7214a375 100644 --- a/src-python/models/transcription/transcription_recorder.py +++ b/src-python/models/transcription/transcription_recorder.py @@ -1,9 +1,18 @@ +"""Recorders that wrap speech_recognition microphone interfaces. + +These classes provide small adapters that push raw audio bytes into queues. +They intentionally keep a thin API so the rest of the system can mock them +in tests. +""" + +from typing import Any from speech_recognition import Recognizer, Microphone from pyaudiowpatch import get_sample_size, paInt16 from datetime import datetime + class BaseRecorder: - def __init__(self, source, energy_threshold, dynamic_energy_threshold, record_timeout): + def __init__(self, source: Any, energy_threshold: int, dynamic_energy_threshold: bool, record_timeout: int) -> None: self.recorder = Recognizer() self.recorder.energy_threshold = energy_threshold self.recorder.dynamic_energy_threshold = dynamic_energy_threshold @@ -15,27 +24,29 @@ class BaseRecorder: self.source = source - def adjustForNoise(self): + def adjustForNoise(self) -> None: with self.source: self.recorder.adjust_for_ambient_noise(self.source) - def recordIntoQueue(self, audio_queue): + def recordIntoQueue(self, audio_queue: Any) -> None: def record_callback(_, audio): audio_queue.put((audio.get_raw_data(), datetime.now())) self.stop, self.pause, self.resume = self.recorder.listen_in_background(self.source, record_callback, phrase_time_limit=self.record_timeout) + class SelectedMicRecorder(BaseRecorder): - def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout): - source=Microphone( + def __init__(self, device: dict, energy_threshold: int, dynamic_energy_threshold: bool, record_timeout: int) -> None: + source = Microphone( device_index=device['index'], sample_rate=int(device["defaultSampleRate"]), ) super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout) # self.adjustForNoise() + class SelectedSpeakerRecorder(BaseRecorder): - def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout): + def __init__(self, device: dict, energy_threshold: int, dynamic_energy_threshold: bool, record_timeout: int) -> None: source = Microphone(speaker=True, device_index= device["index"], @@ -47,7 +58,7 @@ class SelectedSpeakerRecorder(BaseRecorder): # self.adjustForNoise() class BaseEnergyRecorder: - def __init__(self, source): + def __init__(self, source: Any) -> None: self.recorder = Recognizer() self.recorder.energy_threshold = 0 self.recorder.dynamic_energy_threshold = False @@ -59,27 +70,29 @@ class BaseEnergyRecorder: self.source = source - def adjustForNoise(self): + def adjustForNoise(self) -> None: with self.source: self.recorder.adjust_for_ambient_noise(self.source) - def recordIntoQueue(self, energy_queue): + def recordIntoQueue(self, energy_queue: Any) -> None: def recordCallback(_, energy): energy_queue.put(energy) self.stop, self.pause, self.resume = self.recorder.listen_energy_in_background(self.source, recordCallback) + class SelectedMicEnergyRecorder(BaseEnergyRecorder): - def __init__(self, device): - source=Microphone( + def __init__(self, device: dict) -> None: + source = Microphone( device_index=device['index'], sample_rate=int(device["defaultSampleRate"]), ) super().__init__(source=source) # self.adjustForNoise() + class SelectedSpeakerEnergyRecorder(BaseEnergyRecorder): - def __init__(self, device): + def __init__(self, device: dict) -> None: source = Microphone(speaker=True, device_index= device["index"], @@ -90,7 +103,15 @@ class SelectedSpeakerEnergyRecorder(BaseEnergyRecorder): # self.adjustForNoise() class BaseEnergyAndAudioRecorder: - def __init__(self, source, energy_threshold, dynamic_energy_threshold, phrase_time_limit, phrase_timeout, record_timeout): + def __init__( + self, + source: Any, + energy_threshold: int, + dynamic_energy_threshold: bool, + phrase_time_limit: int, + phrase_timeout: int, + record_timeout: int, + ) -> None: self.recorder = Recognizer() self.recorder.energy_threshold = energy_threshold self.recorder.dynamic_energy_threshold = dynamic_energy_threshold @@ -104,11 +125,11 @@ class BaseEnergyAndAudioRecorder: self.source = source - def adjustForNoise(self): + def adjustForNoise(self) -> None: with self.source: self.recorder.adjust_for_ambient_noise(self.source) - def recordIntoQueue(self, audio_queue, energy_queue=None): + def recordIntoQueue(self, audio_queue: Any, energy_queue: Any = None) -> None: def audioRecordCallback(_, audio): audio_queue.put((audio.get_raw_data(), datetime.now())) @@ -121,11 +142,21 @@ class BaseEnergyAndAudioRecorder: phrase_time_limit=self.phrase_time_limit, callback_energy=energyRecordCallback if energy_queue is not None else None, phrase_timeout=self.phrase_timeout, - record_timeout=self.record_timeout) + record_timeout=self.record_timeout, + ) + class SelectedMicEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder): - def __init__(self, device, energy_threshold, dynamic_energy_threshold, phrase_time_limit, phrase_timeout:int=1, record_timeout:int=5): - source=Microphone( + def __init__( + self, + device: dict, + energy_threshold: int, + dynamic_energy_threshold: bool, + phrase_time_limit: int, + phrase_timeout: int = 1, + record_timeout: int = 5, + ) -> None: + source = Microphone( device_index=device['index'], sample_rate=int(device["defaultSampleRate"]), ) @@ -139,14 +170,23 @@ class SelectedMicEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder): ) # self.adjustForNoise() + class SelectedSpeakerEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder): - def __init__(self, device, energy_threshold, dynamic_energy_threshold, phrase_time_limit, phrase_timeout:int=1, record_timeout:int=5): + def __init__( + self, + device: dict, + energy_threshold: int, + dynamic_energy_threshold: bool, + phrase_time_limit: int, + phrase_timeout: int = 1, + record_timeout: int = 5, + ) -> None: source = Microphone(speaker=True, device_index= device["index"], sample_rate=int(device["defaultSampleRate"]), chunk_size=get_sample_size(paInt16), - channels=device["maxInputChannels"] + channels=device["maxInputChannels"], ) super().__init__( source=source, diff --git a/src-python/models/transcription/transcription_transcriber.py b/src-python/models/transcription/transcription_transcriber.py index 9d874b30..15a9aa51 100644 --- a/src-python/models/transcription/transcription_transcriber.py +++ b/src-python/models/transcription/transcription_transcriber.py @@ -1,7 +1,14 @@ +"""Runtime transcriber that wraps Google SpeechRecognition and faster-whisper. + +This class focuses on converting incoming raw audio buffers into text using +either the Google web recognizer (online) or a local Whisper model (offline). +""" + import time from io import BytesIO from threading import Event import wave +from typing import Any, Callable, Dict, List, Optional, Tuple from speech_recognition import Recognizer, AudioData, AudioFile from speech_recognition.exceptions import UnknownValueError from datetime import timedelta @@ -20,38 +27,71 @@ warnings.simplefilter('ignore', RuntimeWarning) PHRASE_TIMEOUT = 3 MAX_PHRASES = 10 + class AudioTranscriber: - def __init__(self, speaker, source, phrase_timeout, max_phrases, transcription_engine, root=None, whisper_weight_type=None, device="cpu", device_index=0, compute_type="auto"): + """Convert queued audio buffers into transcripts. + + Public attributes set by the constructor: + - speaker: bool + - phrase_timeout: int + - max_phrases: int + + Methods are intentionally permissive about input types to match the + existing codebase; this wrapper adds typing for clarity. + """ + + def __init__( + self, + speaker: bool, + source: Any, + phrase_timeout: int, + max_phrases: int, + transcription_engine: str, + root: Optional[str] = None, + whisper_weight_type: Optional[str] = None, + device: str = "cpu", + device_index: int = 0, + compute_type: str = "auto", + ) -> None: self.speaker = speaker self.phrase_timeout = phrase_timeout self.max_phrases = max_phrases - self.transcript_data = [] + self.transcript_data: List[Dict[str, Any]] = [] self.transcript_changed_event = Event() self.audio_recognizer = Recognizer() self.transcription_engine = "Google" self.whisper_model = None - self.audio_sources = { - "sample_rate": source.SAMPLE_RATE, - "sample_width": source.SAMPLE_WIDTH, - "channels": source.channels, - "last_sample": bytes(), - "last_spoken": None, - "new_phrase": True, - "process_data_func": self.processSpeakerData if speaker else self.processSpeakerData + self.audio_sources: Dict[str, Any] = { + "sample_rate": source.SAMPLE_RATE, + "sample_width": source.SAMPLE_WIDTH, + "channels": source.channels, + "last_sample": bytes(), + "last_spoken": None, + "new_phrase": True, + "process_data_func": self.processSpeakerData if speaker else self.processSpeakerData, } if transcription_engine == "Whisper" and checkWhisperWeight(root, whisper_weight_type) is True: - self.whisper_model = getWhisperModel(root, whisper_weight_type, device=device, device_index=device_index, compute_type=compute_type) + self.whisper_model = getWhisperModel( + root, whisper_weight_type, device=device, device_index=device_index, compute_type=compute_type + ) self.transcription_engine = "Whisper" - def transcribeAudioQueue(self, audio_queue, languages, countries, avg_logprob=-0.8, no_speech_prob=0.6): + def transcribeAudioQueue( + self, + audio_queue: Any, + languages: List[str], + countries: List[str], + avg_logprob: float = -0.8, + no_speech_prob: float = 0.6, + ) -> bool: if audio_queue.empty(): time.sleep(0.01) return False audio, time_spoken = audio_queue.get() self.updateLastSampleAndPhraseStatus(audio, time_spoken) - confidences = [{"confidence": 0, "text": "", "language": None}] + confidences: List[Dict[str, Any]] = [{"confidence": 0, "text": "", "language": None}] try: audio_data = self.audio_sources["process_data_func"]() match self.transcription_engine: @@ -67,13 +107,19 @@ class AudioTranscriber: except Exception: pass case "Whisper": - audio_data = np.frombuffer(audio_data.get_raw_data(convert_rate=16000, convert_width=2), np.int16).flatten().astype(np.float32) / 32768.0 + audio_data = np.frombuffer( + audio_data.get_raw_data(convert_rate=16000, convert_width=2), np.int16 + ).flatten().astype(np.float32) / 32768.0 if isinstance(audio_data, torch.Tensor): audio_data = audio_data.detach().numpy() for language, country in zip(languages, countries): text = "" - source_language = transcription_lang[language][country][self.transcription_engine] if len(languages) == 1 else None + source_language = ( + transcription_lang[language][country][self.transcription_engine] + if len(languages) == 1 + else None + ) segments, info = self.whisper_model.transcribe( audio_data, beam_size=5, @@ -85,13 +131,15 @@ class AudioTranscriber: without_timestamps=True, task="transcribe", vad_filter=False, - ) + ) for s in segments: if s.avg_logprob < avg_logprob or s.no_speech_prob > no_speech_prob: continue text += s.text confidences.append({"confidence": info.language_probability, "text": text, "language": language}) - if (len(languages) == 1) or (transcription_lang[language][country][self.transcription_engine] == info.language): + if (len(languages) == 1) or ( + transcription_lang[language][country][self.transcription_engine] == info.language + ): break except UnknownValueError: @@ -106,7 +154,7 @@ class AudioTranscriber: self.updateTranscript(result) return True - def updateLastSampleAndPhraseStatus(self, data, time_spoken): + def updateLastSampleAndPhraseStatus(self, data: bytes, time_spoken) -> None: source_info = self.audio_sources if source_info["last_spoken"] and time_spoken - source_info["last_spoken"] > timedelta(seconds=self.phrase_timeout): source_info["last_sample"] = bytes() @@ -117,11 +165,13 @@ class AudioTranscriber: source_info["last_sample"] += data source_info["last_spoken"] = time_spoken - def processMicData(self): - audio_data = AudioData(self.audio_sources["last_sample"], self.audio_sources["sample_rate"], self.audio_sources["sample_width"]) + def processMicData(self) -> AudioData: + audio_data = AudioData( + self.audio_sources["last_sample"], self.audio_sources["sample_rate"], self.audio_sources["sample_width"] + ) return audio_data - def processSpeakerData(self): + def processSpeakerData(self) -> AudioData: temp_file = BytesIO() with wave.open(temp_file, 'wb') as wf: wf.setnchannels(self.audio_sources["channels"]) @@ -141,7 +191,7 @@ class AudioTranscriber: audio = self.audio_recognizer.record(source) return audio - def updateTranscript(self, result): + def updateTranscript(self, result: dict) -> None: source_info = self.audio_sources transcript = self.transcript_data @@ -152,14 +202,14 @@ class AudioTranscriber: else: transcript[0] = result - def getTranscript(self): + def getTranscript(self) -> dict: if len(self.transcript_data) > 0: result = self.transcript_data.pop(-1) else: result = {"confidence": 0, "text": "", "language": None} return result - def clearTranscriptData(self): + def clearTranscriptData(self) -> None: self.transcript_data.clear() self.audio_sources["last_sample"] = bytes() self.audio_sources["new_phrase"] = True \ No newline at end of file diff --git a/src-python/models/transcription/transcription_whisper.py b/src-python/models/transcription/transcription_whisper.py index 5f61a121..ccfd2260 100644 --- a/src-python/models/transcription/transcription_whisper.py +++ b/src-python/models/transcription/transcription_whisper.py @@ -1,6 +1,17 @@ +"""Helpers for downloading and loading Whisper (faster-whisper) models. + +This module exposes small utilities used by the transcription subsystem: +- downloadFile: stream-download a file with optional progress callback +- checkWhisperWeight: quick local availability check +- downloadWhisperWeight: download model artifacts from HF hub +- getWhisperModel: construct and return a WhisperModel instance + +The functions are defensive: failures are caught and reported by the caller. +""" + from os import path as os_path, makedirs as os_makedirs from requests import get as requests_get -from typing import Callable +from typing import Callable, Optional import huggingface_hub from faster_whisper import WhisperModel import logging @@ -30,24 +41,36 @@ _FILENAMES = [ "vocabulary.json", ] -def downloadFile(url, path, func=None): +def downloadFile(url: str, path: str, func: Optional[Callable[[float], None]] = None) -> None: + """Download a file from `url` to `path`. + + Args: + url: remote URL to download from + path: local filepath to write + func: optional callback(progress: float) called with a 0.0-1.0 progress + """ try: res = requests_get(url, stream=True) res.raise_for_status() file_size = int(res.headers.get('content-length', 0)) total_chunk = 0 with open(os_path.join(path), 'wb') as file: - for chunk in res.iter_content(chunk_size=1024*2000): + for chunk in res.iter_content(chunk_size=1024 * 2000): file.write(chunk) - if isinstance(func, Callable): + if callable(func) and file_size: total_chunk += len(chunk) - func(total_chunk/file_size) + func(total_chunk / file_size) except Exception: + # Silent failure here; caller may re-check or log pass -def checkWhisperWeight(root, weight_type): +def checkWhisperWeight(root: str, weight_type: str) -> bool: + """Return True if a Whisper model for `weight_type` is loadable from disk. + + This attempts to construct a local `WhisperModel` with local_files_only=True + to verify required files exist and are compatible. + """ path = os_path.join(root, "weights", "whisper", weight_type) - result = False try: WhisperModel( path, @@ -58,23 +81,47 @@ def checkWhisperWeight(root, weight_type): num_workers=1, local_files_only=True, ) - result = True + return True except Exception: - pass - return result + return False -def downloadWhisperWeight(root, weight_type, callback=None, end_callback=None): +def downloadWhisperWeight( + root: str, + weight_type: str, + callback: Optional[Callable[[float], None]] = None, + end_callback: Optional[Callable[[], None]] = None, +) -> None: + """Ensure Whisper weight files are present locally; download them if missing. + + Args: + root: project root where `weights/whisper` lives + weight_type: key from `_MODELS` (eg. "tiny", "base") + callback: progress callback for the main model file + end_callback: called when download completes + """ path = os_path.join(root, "weights", "whisper", weight_type) os_makedirs(path, exist_ok=True) - if checkWhisperWeight(root, weight_type) is False: + if not checkWhisperWeight(root, weight_type): for filename in _FILENAMES: file_path = os_path.join(path, filename) url = huggingface_hub.hf_hub_url(_MODELS[weight_type], filename) downloadFile(url, file_path, func=callback if filename == "model.bin" else None) - if isinstance(end_callback, Callable): + if callable(end_callback): end_callback() -def getWhisperModel(root, weight_type, device="cpu", device_index=0, compute_type="auto"): +def getWhisperModel( + root: str, + weight_type: str, + device: str = "cpu", + device_index: int = 0, + compute_type: str = "auto", +) -> WhisperModel: + """Return a `WhisperModel` instance loaded from local weights. + + Raises: + ValueError: when VRAM shortage is detected (wrapped from RuntimeError) + Exception: other loading errors are propagated. + """ path = os_path.join(root, "weights", "whisper", weight_type) if compute_type == "auto": compute_type = getBestComputeType(device, device_index) @@ -90,11 +137,10 @@ def getWhisperModel(root, weight_type, device="cpu", device_index=0, compute_typ ) return model except RuntimeError as e: - # VRAM不足エラーの検出 + # Detect VRAM out-of-memory-like errors and raise a clear ValueError error_message = str(e) if "CUDA out of memory" in error_message or "CUBLAS_STATUS_ALLOC_FAILED" in error_message: raise ValueError("VRAM_OUT_OF_MEMORY", error_message) - # その他のエラーは通常通り再送出 raise if __name__ == "__main__": From 944577eaf48bb09bb7a6a26616e43258d04c03ef Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 17:39:52 +0900 Subject: [PATCH 79/92] =?UTF-8?q?OSC=E3=83=A2=E3=82=B8=E3=83=A5=E3=83=BC?= =?UTF-8?q?=E3=83=AB=E3=81=AE=E3=83=89=E3=82=AD=E3=83=A5=E3=83=A1=E3=83=B3?= =?UTF-8?q?=E3=83=88=E3=82=92=E6=9B=B4=E6=96=B0=E3=81=97=E3=80=81=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=E4=BE=8B=E3=82=84=E6=B3=A8=E6=84=8F=E7=82=B9=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=E3=80=82=E5=9E=8B=E6=B3=A8=E9=87=88=E3=82=92?= =?UTF-8?q?=E5=BC=B7=E5=8C=96=E3=81=97=E3=80=81=E3=82=A8=E3=83=A9=E3=83=BC?= =?UTF-8?q?=E3=83=8F=E3=83=B3=E3=83=89=E3=83=AA=E3=83=B3=E3=82=B0=E3=82=92?= =?UTF-8?q?=E6=94=B9=E5=96=84=E3=80=82OSCHandler=E3=82=AF=E3=83=A9?= =?UTF-8?q?=E3=82=B9=E3=81=AE=E5=88=9D=E6=9C=9F=E5=8C=96=E3=83=A1=E3=82=BD?= =?UTF-8?q?=E3=83=83=E3=83=89=E3=82=92=E4=BF=AE=E6=AD=A3=E3=81=97=E3=80=81?= =?UTF-8?q?=E3=82=B5=E3=83=BC=E3=83=93=E3=82=B9=E3=81=AE=E3=82=A2=E3=83=89?= =?UTF-8?q?=E3=83=90=E3=82=BF=E3=82=A4=E3=82=BA=E4=B8=AD=E3=81=AE=E4=BE=8B?= =?UTF-8?q?=E5=A4=96=E5=87=A6=E7=90=86=E3=82=92=E8=BF=BD=E5=8A=A0=E3=80=82?= =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=83=95=E3=82=A1=E3=82=A4=E3=83=AB?= =?UTF-8?q?=E3=82=92=E6=96=B0=E8=A6=8F=E4=BD=9C=E6=88=90=E3=81=97=E3=80=81?= =?UTF-8?q?OSC=E3=83=A2=E3=82=B8=E3=83=A5=E3=83=BC=E3=83=AB=E3=81=AE?= =?UTF-8?q?=E3=82=A4=E3=83=B3=E3=83=9D=E3=83=BC=E3=83=88=E3=83=86=E3=82=B9?= =?UTF-8?q?=E3=83=88=E3=82=92=E8=BF=BD=E5=8A=A0=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/docs/modules/osc.md | 30 ++++++ src-python/models/osc/osc.py | 142 ++++++++++++++++++--------- src-python/tests/test_osc_imports.py | 6 ++ 3 files changed, 133 insertions(+), 45 deletions(-) create mode 100644 src-python/tests/test_osc_imports.py diff --git a/src-python/docs/modules/osc.md b/src-python/docs/modules/osc.md index 7ad5454d..1dc15e48 100644 --- a/src-python/docs/modules/osc.md +++ b/src-python/docs/modules/osc.md @@ -1,3 +1,33 @@ +## OSC モジュール (models.osc) + +このドキュメントは `models/osc/osc.py` の使い方と注意点を簡潔にまとめたものです。 + +### 概要 +- `OSCHandler` クラスは OSC メッセージの送信 (/chatbox/input, /chatbox/typing 等) と、 + ローカル環境では OSCQuery でエンドポイントを公開するための薄いラッパーを提供します。 + +### 依存関係 +- `python-osc` — UDP クライアント/サーバ +- `tinyoscquery` — OSCQuery を利用する場合に必要(オプショナル) + +### 使い方(例) + +```python +from models.osc.osc import OSCHandler + +handler = OSCHandler(ip_address="127.0.0.1", port=9000) +handler.setDictFilterAndTarget({ + "/chatbox/input": lambda addr, *args: print(args), +}) +handler.receiveOscParameters() +handler.sendTyping(True) +handler.sendMessage("Hello") +handler.oscServerStop() +``` + +### 注意点 +- `tinyoscquery` がインストールされていない場合、OSCQuery 関連機能は無効になりますが、送信(UDP クライアント)は動作します。 +- サービスのアドバタイズ中に例外が発生した場合、内部でリトライします。 # models/osc — 詳細設計 目的: VRChat 等と OSC / OSCQuery 経由で値の取得やチャット送信を行う。 diff --git a/src-python/models/osc/osc.py b/src-python/models/osc/osc.py index ce64623b..c97c6dfb 100644 --- a/src-python/models/osc/osc.py +++ b/src-python/models/osc/osc.py @@ -1,82 +1,116 @@ +"""OSC helpers and a thin OSCQuery-enabled server wrapper. + +This module provides `OSCHandler`, a convenience wrapper used by the +application to send OSC messages and expose OSCQuery endpoints when the +target address is localhost. The implementation is defensive: missing +utilities are handled gracefully and logging helpers are used where +available. +""" + import time -from typing import Any +from typing import Any, Callable, Dict, Optional from time import sleep from threading import Thread from pythonosc import udp_client, dispatcher, osc_server -from tinyoscquery.queryservice import OSCQueryService -from tinyoscquery.query import OSCQueryBrowser, OSCQueryClient -from tinyoscquery.utility import get_open_udp_port, get_open_tcp_port -from tinyoscquery.shared.node import OSCAccess +try: + from tinyoscquery.queryservice import OSCQueryService + from tinyoscquery.query import OSCQueryBrowser, OSCQueryClient + from tinyoscquery.utility import get_open_udp_port, get_open_tcp_port + from tinyoscquery.shared.node import OSCAccess +except Exception: + # tinyoscquery is optional for non-local usage; functionality that + # depends on it will be disabled if it's missing. + OSCQueryService = None # type: ignore + OSCQueryBrowser = None # type: ignore + OSCQueryClient = None # type: ignore + def get_open_udp_port() -> int: # type: ignore + return 0 + + def get_open_tcp_port() -> int: # type: ignore + return 0 + OSCAccess = None # type: ignore try: from utils import errorLogging -except ImportError: - def errorLogging(): +except Exception: + def errorLogging() -> None: import traceback print("Error occurred:", traceback.format_exc()) class OSCHandler: - def __init__(self, ip_address="127.0.0.1", port=9000) -> None: + """Thin wrapper managing OSC send/receive and optional OSCQuery advertising. - if ip_address in ["127.0.0.1", "localhost"]: - self.is_osc_query_enabled = True - else: - self.is_osc_query_enabled = False + Args: + ip_address: OSC server client target / bind address + port: UDP port to send to + """ - self.osc_ip_address = ip_address - self.osc_port = port - self.osc_parameter_muteself = "/avatar/parameters/MuteSelf" - self.osc_parameter_chatbox_typing = "/chatbox/typing" - self.osc_parameter_chatbox_input = "/chatbox/input" + def __init__(self, ip_address: str = "127.0.0.1", port: int = 9000) -> None: + + self.is_osc_query_enabled: bool = ip_address in ["127.0.0.1", "localhost"] + + self.osc_ip_address: str = ip_address + self.osc_port: int = port + self.osc_parameter_muteself: str = "/avatar/parameters/MuteSelf" + self.osc_parameter_chatbox_typing: str = "/chatbox/typing" + self.osc_parameter_chatbox_input: str = "/chatbox/input" self.udp_client = udp_client.SimpleUDPClient(self.osc_ip_address, self.osc_port) - self.osc_server_name = "VRChat-Client" + self.osc_server_name: str = "VRChat-Client" self.osc_server = None self.osc_query_service = None - self.osc_query_service_name = "VRCT" - self.osc_server_ip_address = ip_address - self.http_port = None - self.osc_server_port = None - self.dict_filter_and_target = {} + self.osc_query_service_name: str = "VRCT" + self.osc_server_ip_address: str = ip_address + self.http_port: Optional[int] = None + self.osc_server_port: Optional[int] = None + self.dict_filter_and_target: Dict[str, Callable] = {} self.browser = None def getIsOscQueryEnabled(self) -> bool: + """Return whether OSCQuery support is enabled (local addresses only).""" return self.is_osc_query_enabled - def setOscIpAddress(self, ip_address:str) -> None: - if ip_address in ["127.0.0.1", "localhost"]: - self.is_osc_query_enabled = True - else: - self.is_osc_query_enabled = False + def setOscIpAddress(self, ip_address: str) -> None: + """Change the OSC target IP address and reinitialize services.""" + self.is_osc_query_enabled = ip_address in ["127.0.0.1", "localhost"] self.oscServerStop() self.osc_ip_address = ip_address self.udp_client = udp_client.SimpleUDPClient(self.osc_ip_address, self.osc_port) self.receiveOscParameters() - def setOscPort(self, port:int) -> None: + def setOscPort(self, port: int) -> None: + """Change the OSC UDP port used for sending and reinitialize services.""" self.oscServerStop() self.osc_port = port self.udp_client = udp_client.SimpleUDPClient(self.osc_ip_address, self.osc_port) self.receiveOscParameters() # send OSC message typing - def sendTyping(self, flag:bool=False) -> None: + def sendTyping(self, flag: bool = False) -> None: + """Send /chatbox/typing with a boolean flag.""" self.udp_client.send_message(self.osc_parameter_chatbox_typing, [flag]) # send OSC message - def sendMessage(self, message:str="", notification:bool=True) -> None: + def sendMessage(self, message: str = "", notification: bool = True) -> None: + """Send /chatbox/input if message is non-empty. + + The second argument historically was a boolean flag for clearing; we keep + compatibility by sending [message, True, notification]. + """ if len(message) > 0: self.udp_client.send_message(self.osc_parameter_chatbox_input, [f"{message}", True, notification]) - def getOSCParameterValue(self, address:str) -> Any: + def getOSCParameterValue(self, address: str) -> Any: if not self.is_osc_query_enabled: # OSCQueryが無効な場合はNoneを返す return None - - value = None + value: Any = None try: # browserインスタンスを再利用し、毎回の生成と破棄を避ける if self.browser is None: + # OSCQueryBrowser may not be available; guard + if OSCQueryBrowser is None: + return None self.browser = OSCQueryBrowser() sleep(1) # 初回のみスリープ @@ -99,15 +133,22 @@ class OSCHandler: self.browser = None return value - def getOSCParameterMuteSelf(self) -> bool: + def getOSCParameterMuteSelf(self) -> Optional[bool]: + """Return the value of the MuteSelf parameter when available, else None.""" return self.getOSCParameterValue(self.osc_parameter_muteself) - def setDictFilterAndTarget(self, dict_filter_and_target:dict) -> None: + def setDictFilterAndTarget(self, dict_filter_and_target: Dict[str, Callable]) -> None: + """Set the mapping from OSC address filters to handler callables.""" self.dict_filter_and_target = dict_filter_and_target def receiveOscParameters(self) -> None: - if self.is_osc_query_enabled is False: - # OSCQueryが無効な場合は何もしない + """Start a local OSC server and advertise OSCQuery endpoints when supported. + + If `tinyoscquery` is not available or OSCQuery is disabled, this is a + no-op. + """ + if not self.is_osc_query_enabled or OSCQueryService is None: + # OSCQuery が無効またはライブラリが無い場合は何もしない return self.osc_server_port = get_open_udp_port() @@ -120,28 +161,39 @@ class OSCHandler: while True: try: - # osc_server_name + UTC timestampでユニークなサービス名を生成 + # osc_server_name + UTC timestamp でユニークなサービス名を生成 service_name = f"{self.osc_query_service_name}:{int(time.time())}" self.osc_query_service = OSCQueryService(service_name, self.http_port, self.osc_server_port) for filter, target in self.dict_filter_and_target.items(): - self.osc_query_service.advertise_endpoint(filter, access=OSCAccess.READWRITE_VALUE) + # OSCAccess may be None when tinyoscquery is not present; guard + if OSCAccess is not None: + self.osc_query_service.advertise_endpoint(filter, access=OSCAccess.READWRITE_VALUE) break except Exception: errorLogging() sleep(1) def oscServerServe(self) -> None: + """Run the OSC server loop with a longer poll interval to reduce CPU.""" # ポーリング間隔を長くして(2秒から10秒に)CPUの使用率を削減 - self.osc_server.serve_forever(10) + if self.osc_server is not None: + self.osc_server.serve_forever(10) def oscServerStop(self) -> None: + """Stop and clean up any running OSC server and OSCQuery service.""" if isinstance(self.osc_server, osc_server.ThreadingOSCUDPServer): - self.osc_server.shutdown() + try: + self.osc_server.shutdown() + except Exception: + pass self.osc_server = None - if isinstance(self.osc_query_service, OSCQueryService): - self.osc_query_service.http_server.shutdown() + if OSCQueryService is not None and isinstance(self.osc_query_service, OSCQueryService): + try: + self.osc_query_service.http_server.shutdown() + except Exception: + pass self.osc_query_service = None - # browserがある場合はクリーンアップ + # browser がある場合はクリーンアップ if self.browser is not None: try: if hasattr(self.browser, 'zc') and self.browser.zc is not None: diff --git a/src-python/tests/test_osc_imports.py b/src-python/tests/test_osc_imports.py new file mode 100644 index 00000000..1df699c3 --- /dev/null +++ b/src-python/tests/test_osc_imports.py @@ -0,0 +1,6 @@ +def test_import_osc_module(): + try: + import importlib + importlib.import_module('models.osc.osc') + except Exception as e: + raise AssertionError(f"Failed importing models.osc.osc: {e}") From 35e8d7dda92001191cd18e557af395651fc8a2e3 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 18:43:12 +0900 Subject: [PATCH 80/92] =?UTF-8?q?=E3=82=B9=E3=83=AC=E3=83=83=E3=83=89?= =?UTF-8?q?=E3=82=BB=E3=83=BC=E3=83=95=E3=81=AA=E3=83=88=E3=83=BC=E3=82=AF?= =?UTF-8?q?=E3=83=8A=E3=82=A4=E3=82=B6=E3=83=BC=E3=82=A2=E3=82=AF=E3=82=BB?= =?UTF-8?q?=E3=82=B9=E3=81=AE=E3=81=9F=E3=82=81=E3=81=AB=E3=83=AD=E3=83=83?= =?UTF-8?q?=E3=82=AF=E3=82=92=E8=BF=BD=E5=8A=A0=E3=80=82=E3=83=88=E3=83=BC?= =?UTF-8?q?=E3=82=AF=E3=83=8A=E3=82=A4=E3=82=B6=E3=83=BC=E3=81=AE=E5=91=BC?= =?UTF-8?q?=E3=81=B3=E5=87=BA=E3=81=97=E6=99=82=E3=81=AB=E7=99=BA=E7=94=9F?= =?UTF-8?q?=E3=81=99=E3=82=8B=E5=8F=AF=E8=83=BD=E6=80=A7=E3=81=AE=E3=81=82?= =?UTF-8?q?=E3=82=8BRuntimeError=E3=82=92=E9=98=B2=E3=81=90=E3=81=9F?= =?UTF-8?q?=E3=82=81=E3=81=AB=E3=80=81=E3=82=A2=E3=82=AF=E3=82=BB=E3=82=B9?= =?UTF-8?q?=E3=82=92=E7=9B=B4=E5=88=97=E5=8C=96=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../transliteration/transliteration_transliterator.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src-python/models/transliteration/transliteration_transliterator.py b/src-python/models/transliteration/transliteration_transliterator.py index 8aff912e..44464348 100644 --- a/src-python/models/transliteration/transliteration_transliterator.py +++ b/src-python/models/transliteration/transliteration_transliterator.py @@ -1,6 +1,7 @@ from sudachipy import tokenizer from sudachipy import dictionary from typing import List, Dict, Any +import threading try: from .transliteration_kana_to_hepburn import katakana_to_hepburn except ImportError: @@ -14,6 +15,9 @@ class Transliterator: def __init__(self) -> None: self.tokenizer_obj = dictionary.Dictionary(dict_type="full").create() self.mode = tokenizer.Tokenizer.SplitMode.C + # Lock to prevent concurrent access to sudachipy tokenizer which may + # internally use Rust/PyO3 borrow semantics and raise "Already borrowed". + self._tokenizer_lock = threading.Lock() @staticmethod def is_kanji(ch: str) -> bool: @@ -132,7 +136,10 @@ class Transliterator: results. """ - tokens = self.tokenizer_obj.tokenize(text, self.mode) + # Tokenizer may raise RuntimeError: Already borrowed when called + # concurrently. Protect the call with a lock to serialize access. + with self._tokenizer_lock: + tokens = self.tokenizer_obj.tokenize(text, self.mode) results: List[Dict[str, Any]] = [] for t in tokens: From eca5e31429daeb1da5ac241afc3127a78f5a130e Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 18:53:42 +0900 Subject: [PATCH 81/92] =?UTF-8?q?torch=E3=81=A8ctranslate2=E3=81=AE?= =?UTF-8?q?=E3=82=A4=E3=83=B3=E3=83=9D=E3=83=BC=E3=83=88=E3=82=92=E3=82=AC?= =?UTF-8?q?=E3=83=BC=E3=83=89=E3=81=97=E3=80=81=E5=AE=89=E5=85=A8=E3=81=AA?= =?UTF-8?q?=E3=83=87=E3=83=95=E3=82=A9=E3=83=AB=E3=83=88=E3=82=92=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E3=80=82=E5=9E=8B=E6=B3=A8=E9=87=88=E3=81=A8docstring?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=E3=81=A6=E5=8F=AF=E8=AA=AD?= =?UTF-8?q?=E6=80=A7=E3=82=92=E5=90=91=E4=B8=8A=E3=80=82=E3=83=AD=E3=82=B0?= =?UTF-8?q?=E8=A8=AD=E5=AE=9A=E3=81=AE=E9=87=8D=E8=A4=87=E3=83=8F=E3=83=B3?= =?UTF-8?q?=E3=83=89=E3=83=A9=E8=BF=BD=E5=8A=A0=E3=82=92=E9=98=B2=E3=81=90?= =?UTF-8?q?=E3=83=81=E3=82=A7=E3=83=83=E3=82=AF=E3=82=92=E5=B0=8E=E5=85=A5?= =?UTF-8?q?=E3=80=82encodeBase64=E3=81=AF=E3=83=87=E3=82=B3=E3=83=BC?= =?UTF-8?q?=E3=83=89=E5=A4=B1=E6=95=97=E6=99=82=E3=81=AB=E7=A9=BA=E8=BE=9E?= =?UTF-8?q?=E6=9B=B8=E3=82=92=E8=BF=94=E3=81=99=E3=82=88=E3=81=86=E3=81=AB?= =?UTF-8?q?=E5=A4=89=E6=9B=B4=E3=80=82getComputeDeviceList=E3=81=AFGPU?= =?UTF-8?q?=E6=83=85=E5=A0=B1=E5=8F=96=E5=BE=97=E5=A4=B1=E6=95=97=E6=99=82?= =?UTF-8?q?=E3=81=ABCPU=E6=83=85=E5=A0=B1=E3=82=92=E8=BF=94=E3=81=99?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E4=BE=8B=E5=A4=96=E4=BF=9D=E8=AD=B7?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/docs/modules/utils.md | 58 ++++++++++ src-python/utils.py | 177 ++++++++++++++++++++----------- 2 files changed, 176 insertions(+), 59 deletions(-) diff --git a/src-python/docs/modules/utils.md b/src-python/docs/modules/utils.md index 7a7b2289..9b7d4e71 100644 --- a/src-python/docs/modules/utils.md +++ b/src-python/docs/modules/utils.md @@ -1,3 +1,61 @@ +## utils モジュール(src-python/utils.py) + +このドキュメントは `src-python/utils.py` に対する最近のリファクタ内容、公開 API、利用上の注意点、テスト方法をまとめたものです。 + +### 概要 +- `utils.py` はプロジェクト全体で使われる汎用ユーティリティ群を提供します。主な内容: + - ネットワーク接続チェック (`isConnectedNetwork`) + - ソケットの空きポート確認 (`isAvailableWebSocketServer`) + - IP アドレス検証 (`isValidIpAddress`) + - 計算デバイス一覧取得 (`getComputeDeviceList` / `getBestComputeType`) + - Base64 デコード (JSON) (`encodeBase64`) + - ロガー設定/ログ出力ヘルパー (`setupLogger`, `printLog`, `printResponse`, `errorLogging`) + +### 今回のリファクタ(要点) +- Optional 依存へのフォールバック: `torch` と `ctranslate2` が存在しない環境でも動作するよう、import をガードし、安全なデフォルトを返す実装にしました。 +- 型注釈と docstring を追加して可読性を向上させました。 +- ログ設定の重複ハンドラ追加を防ぐチェックを導入しました。 +- `encodeBase64` はデコード失敗時に例外を投げず空辞書を返すように(安全側)変更しました。 +- `getComputeDeviceList` は GPU 情報取得で失敗しても CPU 情報を返すように例外保護を行いました。 + +### 重要な利用上の注意(breaking/behavior changes) +- Optional 依存 + - `torch` が無い環境では GPU 情報は取得できません(`getComputeDeviceList` は CPU エントリのみ返します)。 + - `ctranslate2` の `get_supported_compute_types` が無い場合は空リストを返します。 + → 環境に依存する挙動を想定して、呼び出し側は存在チェックやフォールバックを実装してください。 + +- `encodeBase64` の挙動 + - 不正な base64/JSON を入力した場合、例外を投げず `{}` を返します。既存コードが例外を期待している場合は注意してください。 + +- `isAvailableWebSocketServer` の仕様 + - 指定した host:port に対して bind が成功すれば True を返します(「使用中かどうか」を判定する用途と逆の意味合いになることがあるため注意)。 + +- ロギング + - `setupLogger` は同じログファイルに対するハンドラを重複して追加しません。`errorLogging()` はログ書き込みに失敗した場合でも最後に trace を stdout に出力するフォールバックがあります。 + +### API 使い方(短い例) + +```python +from utils import getComputeDeviceList, encodeBase64, printResponse + +devices = getComputeDeviceList() +print(devices) + +obj = encodeBase64('eyAia2V5IjogInZhbHVlIiB9') # -> {'key': 'value'} + +printResponse(200, '/health', {'status': 'ok'}) +``` + +### テスト方針 +- optional 依存の違いを扱うため、ユニットテストは `torch` と `ctranslate2` をモックして行うことを推奨します。 +- 例: `getComputeDeviceList()` は GPU がない環境でも CPU のエントリを返すことを確認するテスト。 + +### トラブルシュート +- ログファイルの書き込みエラー: 権限やディスク容量を確認してください。`error.log` と `process.log` の存在と権限をチェックします。 +- `getComputeDeviceList()` が空しか返さない場合、`torch` または `ctranslate2` のインストールを確認してください。 + +### 変更履歴 +- 2025-10-09: 型注釈・docstring 追加、optional import ガード、ロギング堅牢化。 # utils.py — 関数一覧と使用例 目的: 共通ユーティリティ(ログ、JSON 出力、ネットワーク/ポート検査、デバイス/計算タイプ列挙、バリデーション等)を提供します。 diff --git a/src-python/utils.py b/src-python/utils.py index fe4faa90..1e250e87 100644 --- a/src-python/utils.py +++ b/src-python/utils.py @@ -1,12 +1,22 @@ import base64 -from typing import Any, List, Dict +from typing import Any, List, Dict, Optional import json import traceback import logging from logging.handlers import RotatingFileHandler -import torch -from ctranslate2 import get_supported_compute_types +try: + import torch +except Exception: + torch = None # type: ignore + +try: + from ctranslate2 import get_supported_compute_types +except Exception: + # Fallback: if ctranslate2 is not installed, provide a safe stub. + def get_supported_compute_types(device: str, device_index: int) -> List[str]: + return [] + import requests import ipaddress import socket @@ -47,32 +57,32 @@ def validateDictStructure(data: dict, structure: dict) -> bool: return True def isConnectedNetwork(url="http://www.google.com", timeout=3) -> bool: + """Quick network connectivity check by requesting `url`. + + Returns True when a 200 response is returned within `timeout` seconds. + """ try: response = requests.get(url, timeout=timeout) return response.status_code == 200 except requests.RequestException: return False -def isAvailableWebSocketServer(host:str, port:int) -> bool: - """WebSocketサーバーのポートが使用中かどうかを確認する""" - response = True +def isAvailableWebSocketServer(host: str, port: int) -> bool: + """Return True if the given host/port appear available for binding. + + Note: This attempts to bind a TCP socket to the address. If bind + succeeds the function returns True (meaning the address was available). + """ try: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as chk: - try: - # SO_REUSEADDRを設定してソケットの再利用を許可 - chk.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - chk.bind((host, port)) - # シャットダウン前にリッスン状態にする必要はない - chk.close() - except Exception: - response = False + chk.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + chk.bind((host, port)) + return True except Exception: - errorLogging() - response = False - - return response + return False def isValidIpAddress(ip_address: str) -> bool: + """Return True if `ip_address` is a valid IPv4/IPv6 address.""" try: ipaddress.ip_address(ip_address) return True @@ -80,7 +90,12 @@ def isValidIpAddress(ip_address: str) -> bool: return False def getComputeDeviceList() -> List[Dict[str, Any]]: - compute_types = [ + """Return a list of available compute devices and supported compute types. + + The returned list contains dicts describing CPU and (if available) + CUDA devices. This function is defensive to missing optional packages. + """ + compute_types: List[Dict[str, Any]] = [ { "device": "cpu", "device_index": 0, @@ -89,32 +104,47 @@ def getComputeDeviceList() -> List[Dict[str, Any]]: } ] - if torch.cuda.is_available(): - for device_index in range(torch.cuda.device_count()): - gpu_device_name = torch.cuda.get_device_name(device_index) - gpu_compute_types = ["auto"] + list(get_supported_compute_types("cuda", device_index)) + try: + if torch is not None and hasattr(torch, "cuda") and torch.cuda.is_available(): + for device_index in range(torch.cuda.device_count()): + gpu_device_name = torch.cuda.get_device_name(device_index) + gpu_compute_types = ["auto"] + list(get_supported_compute_types("cuda", device_index)) - # デバイスごとの計算タイプの制限 - if "GTX" in gpu_device_name: - unsupported_types = {"int8_bfloat16", "bfloat16", "float16", "int8"} - gpu_compute_types = [t for t in gpu_compute_types if t not in unsupported_types] - elif not any(keyword in gpu_device_name for keyword in ["RTX", "Tesla", "A100", "Quadro"]): - gpu_compute_types = ["float32"] + # デバイスごとの計算タイプの制限 + if "GTX" in gpu_device_name: + unsupported_types = {"int8_bfloat16", "bfloat16", "float16", "int8"} + gpu_compute_types = [t for t in gpu_compute_types if t not in unsupported_types] + elif not any(keyword in gpu_device_name for keyword in ["RTX", "Tesla", "A100", "Quadro"]): + gpu_compute_types = ["float32"] - compute_types.append( - { - "device": "cuda", - "device_index": device_index, - "device_name": gpu_device_name, - "compute_types": gpu_compute_types, - } - ) + compute_types.append( + { + "device": "cuda", + "device_index": device_index, + "device_name": gpu_device_name, + "compute_types": gpu_compute_types, + } + ) + except Exception: + # If querying GPU devices fails, return at least the CPU entry + errorLogging() return compute_types def getBestComputeType(device: str, device_index: int) -> str: - compute_types = set(get_supported_compute_types(device, device_index)) - device_name = "cpu" if device == "cpu" else torch.cuda.get_device_name(device_index) + """Pick the best available compute type for a device. + + Falls back to "float32" when no preferred type is available. + """ + try: + compute_types = set(get_supported_compute_types(device, device_index)) + except Exception: + compute_types = set() + + try: + device_name = "cpu" if device == "cpu" else (torch.cuda.get_device_name(device_index) if torch is not None else "") + except Exception: + device_name = "" # デバイスごとの優先計算タイプ preferred_types = { @@ -141,14 +171,26 @@ def getBestComputeType(device: str, device_index: int) -> str: return "float32" -def encodeBase64(data:str) -> dict: - return json.loads(base64.b64decode(data).decode('utf-8')) +def encodeBase64(data: str) -> Dict[str, Any]: + """Decode a base64-encoded JSON string and return the parsed object. -def removeLog(): - with open('process.log', 'w', encoding="utf-8") as f: - f.write("") + Returns an empty dict on failure. + """ + try: + return json.loads(base64.b64decode(data).decode('utf-8')) + except Exception: + errorLogging() + return {} -def setupLogger(name, log_file, level=logging.INFO): +def removeLog() -> None: + """Truncate the process log file (process.log) if present.""" + try: + with open('process.log', 'w', encoding="utf-8") as f: + f.write("") + except Exception: + errorLogging() + +def setupLogger(name: str, log_file: str, level: int = logging.INFO) -> logging.Logger: """ 特定の名前とログファイルを持つロガーを設定します。 """ @@ -174,13 +216,17 @@ def setupLogger(name, log_file, level=logging.INFO): formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_handler.setFormatter(formatter) - # ロガーにハンドラーを追加 - logger.addHandler(file_handler) + # ロガーにハンドラーを追加(重複追加を避ける) + if not any(isinstance(h, RotatingFileHandler) and getattr(h, 'baseFilename', None) == getattr(file_handler, 'baseFilename', None) for h in logger.handlers): + logger.addHandler(file_handler) return logger -process_logger = None -def printLog(log:str, data:Any=None) -> None: +process_logger: Optional[logging.Logger] = None + + +def printLog(log: str, data: Any = None) -> None: + """Log and print a structured process log message.""" global process_logger if process_logger is None: process_logger = setupLogger("process", "process.log", logging.INFO) @@ -194,7 +240,11 @@ def printLog(log:str, data:Any=None) -> None: serialized = json.dumps(response) print(serialized, flush=True) -def printResponse(status:int, endpoint:str, result:Any=None) -> None: +def printResponse(status: int, endpoint: str, result: Any = None) -> None: + """Log and print a structured response object. + + If JSON serialization fails, record the error and emit a generic error payload. + """ global process_logger if process_logger is None: process_logger = setupLogger("process", "process.log", logging.INFO) @@ -208,28 +258,37 @@ def printResponse(status:int, endpoint:str, result:Any=None) -> None: try: serialized_response = json.dumps(response) - except OSError as e: - errorLogging() # Log the full traceback of the OSError - process_logger.error(f"Problematic response object before json.dumps: {response}") - process_logger.error(f"OSError during json.dumps: {e}") - # Optionally, print a generic error JSON to stdout if needed, or re-raise - # For now, we'll print a simple error message to stdout as a fallback + except Exception as e: + errorLogging() # Log the full traceback of the exception + try: + process_logger.error(f"Problematic response object before json.dumps: {response}") + process_logger.error(f"Exception during json.dumps: {e}") + except Exception: + pass + # Fallback generic error payload error_json = json.dumps({ "status": 500, "endpoint": endpoint, - "result": {"error": "Failed to serialize response due to OSError", "details": str(e)} + "result": {"error": "Failed to serialize response", "details": str(e)}, }) print(error_json, flush=True) else: print(serialized_response, flush=True) -error_logger = None +error_logger: Optional[logging.Logger] = None + + def errorLogging() -> None: + """Log the current exception traceback to the error logger.""" global error_logger if error_logger is None: error_logger = setupLogger("error", "error.log", logging.ERROR) - error_logger.error(traceback.format_exc()) + try: + error_logger.error(traceback.format_exc()) + except Exception: + # As a last resort, print the traceback to stdout + print(traceback.format_exc(), flush=True) if __name__ == "__main__": print(getComputeDeviceList()) \ No newline at end of file From 61cbe07f0fe4648820beeccc5bb41d6cf53cc9f3 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 19:04:31 +0900 Subject: [PATCH 82/92] =?UTF-8?q?=E3=83=87=E3=83=90=E3=82=A4=E3=82=B9?= =?UTF-8?q?=E7=AE=A1=E7=90=86=E3=83=A2=E3=82=B8=E3=83=A5=E3=83=BC=E3=83=AB?= =?UTF-8?q?=E3=81=AE=E3=82=A4=E3=83=B3=E3=83=9D=E3=83=BC=E3=83=88=E3=82=92?= =?UTF-8?q?=E3=82=AC=E3=83=BC=E3=83=89=E3=81=97=E3=80=81Windows=E5=9B=BA?= =?UTF-8?q?=E6=9C=89=E3=81=AE=E4=BE=9D=E5=AD=98=E9=96=A2=E4=BF=82=E3=82=92?= =?UTF-8?q?=E3=82=AA=E3=83=97=E3=82=B7=E3=83=A7=E3=83=8A=E3=83=AB=E3=81=AB?= =?UTF-8?q?=E5=A4=89=E6=9B=B4=E3=80=82=E3=82=AF=E3=83=A9=E3=82=B9=E3=81=AE?= =?UTF-8?q?=E5=88=9D=E6=9C=9F=E5=8C=96=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89?= =?UTF-8?q?=E3=82=92=E4=BF=AE=E6=AD=A3=E3=81=97=E3=80=81=E3=83=87=E3=83=95?= =?UTF-8?q?=E3=82=A9=E3=83=AB=E3=83=88=E3=83=87=E3=83=90=E3=82=A4=E3=82=B9?= =?UTF-8?q?=E5=A4=89=E6=9B=B4=E6=99=82=E3=81=AE=E3=82=B3=E3=83=BC=E3=83=AB?= =?UTF-8?q?=E3=83=90=E3=83=83=E3=82=AF=E3=82=92=E8=BF=BD=E5=8A=A0=E3=80=82?= =?UTF-8?q?=E3=83=89=E3=82=AD=E3=83=A5=E3=83=A1=E3=83=B3=E3=83=88=E3=82=92?= =?UTF-8?q?=E6=96=B0=E8=A6=8F=E4=BD=9C=E6=88=90=E3=81=97=E3=80=81=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=E4=BE=8B=E3=82=84=E6=B3=A8=E6=84=8F=E7=82=B9=E3=82=92?= =?UTF-8?q?=E6=98=8E=E7=A4=BA=E5=8C=96=E3=80=82=E3=82=A8=E3=83=A9=E3=83=BC?= =?UTF-8?q?=E3=83=8F=E3=83=B3=E3=83=89=E3=83=AA=E3=83=B3=E3=82=B0=E3=82=92?= =?UTF-8?q?=E5=BC=B7=E5=8C=96=E3=81=97=E3=80=81=E3=82=B3=E3=83=BC=E3=83=89?= =?UTF-8?q?=E3=81=AE=E5=8F=AF=E8=AA=AD=E6=80=A7=E3=82=92=E5=90=91=E4=B8=8A?= =?UTF-8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/device_manager.py | 361 ++++++++++++------ src-python/docs/modules/device_manager_ref.md | 93 +++++ 2 files changed, 331 insertions(+), 123 deletions(-) create mode 100644 src-python/docs/modules/device_manager_ref.md diff --git a/src-python/device_manager.py b/src-python/device_manager.py index 7f741d26..b41a0cc3 100644 --- a/src-python/device_manager.py +++ b/src-python/device_manager.py @@ -1,27 +1,53 @@ -from typing import Callable +from typing import Callable, Dict, List, Optional, Any from time import sleep from threading import Thread -import comtypes -from pyaudiowpatch import PyAudio, paWASAPI -from pycaw.callbacks import MMNotificationClient -from pycaw.utils import AudioUtilities + +# Optional, Windows-specific dependencies. Guard imports so module can be imported on non-Windows systems. +try: + import comtypes +except Exception: # pragma: no cover - optional runtime + comtypes = None # type: ignore + +try: + from pyaudiowpatch import PyAudio, paWASAPI +except Exception: # pragma: no cover - optional runtime + PyAudio = None # type: ignore + paWASAPI = None # type: ignore + +try: + from pycaw.callbacks import MMNotificationClient + from pycaw.utils import AudioUtilities +except Exception: # pragma: no cover - optional runtime + MMNotificationClient = object # type: ignore + AudioUtilities = None # type: ignore + from utils import errorLogging class Client(MMNotificationClient): - def __init__(self): - super().__init__() - self.loop = True + """Callback client used by pycaw to detect device changes. - def on_default_device_changed(self, flow, flow_id, role, role_id, default_device_id): + This subclass is lightweight: it flips a flag when events arrive so the + monitoring loop can break and refresh device lists. + """ + + def __init__(self) -> None: + # If MMNotificationClient is the placeholder object (non-windows), avoid calling super + try: + super().__init__() + except Exception: + pass + self.loop: bool = True + + def on_default_device_changed(self, *args: Any, **kwargs: Any) -> None: self.loop = False - def on_device_added(self, added_device_id): + def on_device_added(self, *args: Any, **kwargs: Any) -> None: self.loop = False - def on_device_removed(self, removed_device_id): + def on_device_removed(self, *args: Any, **kwargs: Any) -> None: self.loop = False - def on_device_state_changed(self, device_id, state): + def on_device_state_changed(self, *args: Any, **kwargs: Any) -> None: self.loop = False # def on_property_value_changed(self, device_id, key): @@ -33,108 +59,150 @@ class DeviceManager: def __new__(cls): if cls._instance is None: cls._instance = super(DeviceManager, cls).__new__(cls) - cls._instance.init() + # do NOT auto-init monitoring-heavy resources on import; require explicit init + cls._instance._initialized = False return cls._instance - def init(self): - self.mic_devices = {"NoHost": [{"index": -1, "name": "NoDevice"}]} - self.default_mic_device = {"host": {"index": -1, "name": "NoHost"}, "device": {"index": -1, "name": "NoDevice"}} - self.speaker_devices = [{"index": -1, "name": "NoDevice"}] - self.default_speaker_device = {"device": {"index": -1, "name": "NoDevice"}} + def init(self) -> None: + """Initialize internal state. This is intentionally separate from object + creation so importing the module won't start threads or access OS + audio APIs. Call `device_manager.init()` and then + `device_manager.startMonitoring()` explicitly when ready. + """ + if getattr(self, "_initialized", False): + return - self.update() + self.mic_devices: Dict[str, List[Dict[str, Any]]] = {"NoHost": [{"index": -1, "name": "NoDevice"}]} + self.default_mic_device: Dict[str, Any] = {"host": {"index": -1, "name": "NoHost"}, "device": {"index": -1, "name": "NoDevice"}} + self.speaker_devices: List[Dict[str, Any]] = [{"index": -1, "name": "NoDevice"}] + self.default_speaker_device: Dict[str, Any] = {"device": {"index": -1, "name": "NoDevice"}} - self.prev_mic_host = [host for host in self.mic_devices] - self.prev_mic_devices = self.mic_devices - self.prev_default_mic_device = self.default_mic_device - self.prev_speaker_devices = self.speaker_devices - self.prev_default_speaker_device = self.default_speaker_device + # Initialize previous state trackers + self.prev_mic_host: List[str] = [host for host in self.mic_devices] + self.prev_mic_devices: Dict[str, List[Dict[str, Any]]] = self.mic_devices + self.prev_default_mic_device: Dict[str, Any] = self.default_mic_device + self.prev_speaker_devices: List[Dict[str, Any]] = self.speaker_devices + self.prev_default_speaker_device: Dict[str, Any] = self.default_speaker_device - self.update_flag_default_mic_device = False - self.update_flag_default_speaker_device = False - self.update_flag_host_list = False - self.update_flag_mic_device_list = False - self.update_flag_speaker_device_list = False + # Update flags + self.update_flag_default_mic_device: bool = False + self.update_flag_default_speaker_device: bool = False + self.update_flag_host_list: bool = False + self.update_flag_mic_device_list: bool = False + self.update_flag_speaker_device_list: bool = False - self.callback_default_mic_device = None - self.callback_default_speaker_device = None - self.callback_host_list = None - self.callback_mic_device_list = None - self.callback_speaker_device_list = None - self.callback_process_before_update_devices = None - self.callback_process_after_update_devices = None + # Callbacks + self.callback_default_mic_device: Optional[Callable[..., None]] = None + self.callback_default_speaker_device: Optional[Callable[..., None]] = None + self.callback_host_list: Optional[Callable[..., None]] = None + self.callback_mic_device_list: Optional[Callable[..., None]] = None + self.callback_speaker_device_list: Optional[Callable[..., None]] = None + self.callback_process_before_update_mic_devices: Optional[Callable[..., None]] = None + self.callback_process_after_update_mic_devices: Optional[Callable[..., None]] = None + self.callback_process_before_update_speaker_devices: Optional[Callable[..., None]] = None + self.callback_process_after_update_speaker_devices: Optional[Callable[..., None]] = None - self.monitoring_flag = False - self.startMonitoring() + # Monitoring control + self.monitoring_flag: bool = False + self.th_monitoring: Optional[Thread] = None + + self._initialized = True def update(self): - buffer_mic_devices = {} - buffer_default_mic_device = {"host": {"index": -1, "name": "NoHost"}, "device": {"index": -1, "name": "NoDevice"}} - buffer_speaker_devices = [] - buffer_default_speaker_device = {"device": {"index": -1, "name": "NoDevice"}} + buffer_mic_devices: Dict[str, List[Dict[str, Any]]] = {} + buffer_default_mic_device: Dict[str, Any] = {"host": {"index": -1, "name": "NoHost"}, "device": {"index": -1, "name": "NoDevice"}} + buffer_speaker_devices: List[Dict[str, Any]] = [] + buffer_default_speaker_device: Dict[str, Any] = {"device": {"index": -1, "name": "NoDevice"}} - with PyAudio() as p: - for host_index in range(p.get_host_api_count()): - host = p.get_host_api_info_by_index(host_index) - device_count = host.get('deviceCount', 0) - for device_index in range(device_count): - device = p.get_device_info_by_host_api_device_index(host_index, device_index) - if device.get("maxInputChannels", 0) > 0 and not device.get("isLoopbackDevice", True): - buffer_mic_devices.setdefault(host["name"], []).append(device) - if not buffer_mic_devices: - buffer_mic_devices = {"NoHost": [{"index": -1, "name": "NoDevice"}]} + if PyAudio is None: + # PyAudio not available; leave defaults in place + self.mic_devices = buffer_mic_devices or {"NoHost": [{"index": -1, "name": "NoDevice"}]} + self.default_mic_device = buffer_default_mic_device + self.speaker_devices = buffer_speaker_devices or [{"index": -1, "name": "NoDevice"}] + self.default_speaker_device = buffer_default_speaker_device + return - api_info = p.get_default_host_api_info() - default_mic_device = api_info["defaultInputDevice"] - - for host_index in range(p.get_host_api_count()): - host = p.get_host_api_info_by_index(host_index) - device_count = host.get('deviceCount', 0) - for device_index in range(device_count): - device = p.get_device_info_by_host_api_device_index(host_index, device_index) - if device["index"] == default_mic_device: - buffer_default_mic_device = {"host": host, "device": device} - break - else: - continue - break - - speaker_devices = [] - wasapi_info = p.get_host_api_info_by_type(paWASAPI) - wasapi_name = wasapi_info["name"] - for host_index in range(p.get_host_api_count()): - host = p.get_host_api_info_by_index(host_index) - if host["name"] == wasapi_name: + try: + with PyAudio() as p: + # gather input devices grouped by host + for host_index in range(p.get_host_api_count()): + host = p.get_host_api_info_by_index(host_index) device_count = host.get('deviceCount', 0) for device_index in range(device_count): device = p.get_device_info_by_host_api_device_index(host_index, device_index) - if not device.get("isLoopbackDevice", True): - for loopback in p.get_loopback_device_info_generator(): - if device["name"] in loopback["name"]: - speaker_devices.append(loopback) - speaker_devices = [dict(t) for t in {tuple(d.items()) for d in speaker_devices}] or [{"index": -1, "name": "NoDevice"}] - buffer_speaker_devices = sorted(speaker_devices, key=lambda d: d['index']) + if device.get("maxInputChannels", 0) > 0 and not device.get("isLoopbackDevice", True): + buffer_mic_devices.setdefault(host["name"], []).append(device) + if not buffer_mic_devices: + buffer_mic_devices = {"NoHost": [{"index": -1, "name": "NoDevice"}]} - wasapi_info = p.get_host_api_info_by_type(paWASAPI) - default_speaker_device_index = wasapi_info["defaultOutputDevice"] + api_info = p.get_default_host_api_info() + default_mic_device = api_info.get("defaultInputDevice", -1) - for host_index in range(p.get_host_api_count()): - host_info = p.get_host_api_info_by_index(host_index) - device_count = host_info.get('deviceCount', 0) - for device_index in range(0, device_count): - device = p.get_device_info_by_host_api_device_index(host_index, device_index) - if device["index"] == default_speaker_device_index: - default_speakers = device - if not default_speakers.get("isLoopbackDevice", True): - for loopback in p.get_loopback_device_info_generator(): - if default_speakers["name"] in loopback["name"]: - buffer_default_speaker_device = {"device": loopback} - break - break - - if buffer_default_speaker_device["device"]["name"] != "NoDevice": + for host_index in range(p.get_host_api_count()): + host = p.get_host_api_info_by_index(host_index) + device_count = host.get('deviceCount', 0) + for device_index in range(device_count): + device = p.get_device_info_by_host_api_device_index(host_index, device_index) + if device.get("index") == default_mic_device: + buffer_default_mic_device = {"host": host, "device": device} + break + else: + continue break + # collect speaker loopback devices (requires WASAPI) + speaker_devices: List[Dict[str, Any]] = [] + if paWASAPI is not None: + try: + wasapi_info = p.get_host_api_info_by_type(paWASAPI) + wasapi_name = wasapi_info.get("name") + for host_index in range(p.get_host_api_count()): + host = p.get_host_api_info_by_index(host_index) + if host.get("name") == wasapi_name: + device_count = host.get('deviceCount', 0) + for device_index in range(device_count): + device = p.get_device_info_by_host_api_device_index(host_index, device_index) + if not device.get("isLoopbackDevice", True): + for loopback in p.get_loopback_device_info_generator(): + # match by name inclusion + if device.get("name") in loopback.get("name", ""): + speaker_devices.append(loopback) + except Exception: + # WASAPI not available or failed; ignore and continue + pass + + # deduplicate and sort + speaker_devices = [dict(t) for t in {tuple(d.items()) for d in speaker_devices}] or [{"index": -1, "name": "NoDevice"}] + buffer_speaker_devices = sorted(speaker_devices, key=lambda d: d.get('index', -1)) + + # default speaker + if paWASAPI is not None: + try: + wasapi_info = p.get_host_api_info_by_type(paWASAPI) + default_speaker_device_index = wasapi_info.get("defaultOutputDevice", -1) + for host_index in range(p.get_host_api_count()): + host_info = p.get_host_api_info_by_index(host_index) + device_count = host_info.get('deviceCount', 0) + for device_index in range(0, device_count): + device = p.get_device_info_by_host_api_device_index(host_index, device_index) + if device.get("index") == default_speaker_device_index: + default_speakers = device + if not default_speakers.get("isLoopbackDevice", True): + for loopback in p.get_loopback_device_info_generator(): + if default_speakers.get("name") in loopback.get("name", ""): + buffer_default_speaker_device = {"device": loopback} + break + break + + if buffer_default_speaker_device["device"].get("name") != "NoDevice": + break + except Exception: + # best-effort; ignore failures + pass + + except Exception: + errorLogging() + self.mic_devices = buffer_mic_devices self.default_mic_device = buffer_default_mic_device self.speaker_devices = buffer_speaker_devices @@ -170,14 +238,27 @@ class DeviceManager: try: while self.monitoring_flag is True: try: - comtypes.CoInitialize() - cb = Client() - enumerator = AudioUtilities.GetDeviceEnumerator() - enumerator.RegisterEndpointNotificationCallback(cb) - while cb.loop is True: - sleep(1) - enumerator.UnregisterEndpointNotificationCallback(cb) - comtypes.CoUninitialize() + # Use COM only when available (Windows). If comtypes is not present, + # fall back to periodic polling using PyAudio only. + if comtypes is not None and AudioUtilities is not None: + try: + comtypes.CoInitialize() + cb = Client() + enumerator = AudioUtilities.GetDeviceEnumerator() + enumerator.RegisterEndpointNotificationCallback(cb) + while cb.loop is True and self.monitoring_flag is True: + sleep(1) + try: + enumerator.UnregisterEndpointNotificationCallback(cb) + except Exception: + # best-effort unregister + pass + comtypes.CoUninitialize() + except Exception: + # if COM monitoring fails, log and fall through to polling + errorLogging() + + # polling and update cycle self.runProcessBeforeUpdateMicDevices() self.runProcessBeforeUpdateSpeakerDevices() sleep(2) @@ -191,12 +272,12 @@ class DeviceManager: self.runProcessAfterUpdateSpeakerDevices() except Exception: errorLogging() - finally: - pass except Exception: errorLogging() def startMonitoring(self): + if self.monitoring_flag: + return self.monitoring_flag = True self.th_monitoring = Thread(target=self.monitoring) self.th_monitoring.daemon = True @@ -204,7 +285,12 @@ class DeviceManager: def stopMonitoring(self): self.monitoring_flag = False - self.th_monitoring.join() + if getattr(self, "th_monitoring", None) is not None: + try: + self.th_monitoring.join(timeout=5) + except Exception: + # If join fails or thread is not joinable, ignore - it's a best-effort stop + pass def setCallbackDefaultMicDevice(self, callback): self.callback_default_mic_device = callback @@ -244,7 +330,10 @@ class DeviceManager: def runProcessBeforeUpdateMicDevices(self): if isinstance(self.callback_process_before_update_mic_devices, Callable): - self.callback_process_before_update_mic_devices() + try: + self.callback_process_before_update_mic_devices() + except Exception: + errorLogging() def setCallbackProcessAfterUpdateMicDevices(self, callback): self.callback_process_after_update_mic_devices = callback @@ -254,7 +343,10 @@ class DeviceManager: def runProcessAfterUpdateMicDevices(self): if isinstance(self.callback_process_after_update_mic_devices, Callable): - self.callback_process_after_update_mic_devices() + try: + self.callback_process_after_update_mic_devices() + except Exception: + errorLogging() def setCallbackProcessBeforeUpdateSpeakerDevices(self, callback): self.callback_process_before_update_speaker_devices = callback @@ -264,7 +356,10 @@ class DeviceManager: def runProcessBeforeUpdateSpeakerDevices(self): if isinstance(self.callback_process_before_update_speaker_devices, Callable): - self.callback_process_before_update_speaker_devices() + try: + self.callback_process_before_update_speaker_devices() + except Exception: + errorLogging() def setCallbackProcessAfterUpdateSpeakerDevices(self, callback): self.callback_process_after_update_speaker_devices = callback @@ -274,7 +369,10 @@ class DeviceManager: def runProcessAfterUpdateSpeakerDevices(self): if isinstance(self.callback_process_after_update_speaker_devices, Callable): - self.callback_process_after_update_speaker_devices() + try: + self.callback_process_after_update_speaker_devices() + except Exception: + errorLogging() def noticeUpdateDevices(self): if self.update_flag_default_mic_device is True: @@ -296,23 +394,38 @@ class DeviceManager: def setMicDefaultDevice(self): if isinstance(self.callback_default_mic_device, Callable): - self.callback_default_mic_device(self.default_mic_device["host"]["name"], self.default_mic_device["device"]["name"]) + try: + self.callback_default_mic_device(self.default_mic_device["host"]["name"], self.default_mic_device["device"]["name"]) + except Exception: + errorLogging() def setSpeakerDefaultDevice(self): if isinstance(self.callback_default_speaker_device, Callable): - self.callback_default_speaker_device(self.default_speaker_device["device"]["name"]) + try: + self.callback_default_speaker_device(self.default_speaker_device["device"]["name"]) + except Exception: + errorLogging() def setMicHostList(self): if isinstance(self.callback_host_list, Callable): - self.callback_host_list() + try: + self.callback_host_list() + except Exception: + errorLogging() def setMicDeviceList(self): if isinstance(self.callback_mic_device_list, Callable): - self.callback_mic_device_list() + try: + self.callback_mic_device_list() + except Exception: + errorLogging() def setSpeakerDeviceList(self): if isinstance(self.callback_speaker_device_list, Callable): - self.callback_speaker_device_list() + try: + self.callback_speaker_device_list() + except Exception: + errorLogging() def getMicDevices(self): return self.mic_devices @@ -337,13 +450,15 @@ class DeviceManager: self.setSpeakerDeviceList() self.setSpeakerDefaultDevice() +# Provide a module-level singleton. Call `device_manager.init()` explicitly to +# initialize audio resources and `device_manager.startMonitoring()` to begin +# background monitoring. This avoids side-effects during simple imports. device_manager = DeviceManager() if __name__ == "__main__": - # print("getMicDevices()", device_manager.getMicDevices()) - # print("getDefaultMicDevice()", device_manager.getDefaultMicDevice()) - # print("getSpeakerDevices()", device_manager.getSpeakerDevices()) - # print("getDefaultSpeakerDevice()", device_manager.getDefaultSpeakerDevice()) - - while True: - sleep(1) \ No newline at end of file + print("DeviceManager demo. Call device_manager.init() and device_manager.startMonitoring() to run live monitoring.") + try: + while True: + sleep(1) + except KeyboardInterrupt: + print("exiting") \ No newline at end of file diff --git a/src-python/docs/modules/device_manager_ref.md b/src-python/docs/modules/device_manager_ref.md new file mode 100644 index 00000000..893bfb8b --- /dev/null +++ b/src-python/docs/modules/device_manager_ref.md @@ -0,0 +1,93 @@ +# device_manager.py — デバイス検出と監視 (改訂版) + +### 概要 +`device_manager.py` はローカルのマイク(入力)とスピーカー(ループバックから抽出)を列挙し、デフォルトデバイスの変更やデバイスリストの変化を監視してコールバックで通知するユーティリティです。 + +設計上のポイント: +- Windows 固有の依存 (`comtypes`, `pyaudiowpatch` (PyAudio + WASAPI), `pycaw`) はオプショナルです。モジュールを import してもこれらが無ければ例外にならず、プレースホルダ値を返すようになっています。 +- モジュールの import 時点では監視は開始されません。リソースやスレッドの副作用を避けるため、`init()` と `startMonitoring()` は呼び出し側で明示的に実行してください。 + +--- + +### 使い方(簡単な流れ) + +1. モジュールをインポート + +```py +from device_manager import device_manager +``` + +2. 初期化(内部状態のセットアップ) + +```py +device_manager.init() +``` + +3. 監視の開始(バックグラウンドスレッド) + +```py +device_manager.startMonitoring() +``` + +4. 停止(アプリ終了時など) + +```py +device_manager.stopMonitoring() +``` + +--- + +### 主な API + +- `device_manager.init()` + - internal state の初期化。import 後に必ず呼ぶ必要はないが、実機デバイスを取得する前に呼ぶことを推奨します。 +- `device_manager.startMonitoring()` / `device_manager.stopMonitoring()` + - 監視の開始 / 停止。`startMonitoring()` はデーモンスレッドを作成します。`stopMonitoring()` は best-effort で join を試みます。 +- `device_manager.getMicDevices()` + - ホストごとにグループ化された入力デバイスの辞書を返します。例: `{ 'Realtek': [ {index: 2, name: 'Microphone (Realtek)'} ] }`。 +- `device_manager.getDefaultMicDevice()` / `device_manager.getSpeakerDevices()` / `device_manager.getDefaultSpeakerDevice()` + - デフォルトデバイスやスピーカーループバックの情報を返します。 +- `device_manager.forceUpdateAndSetMicDevices()` / `device_manager.forceUpdateAndSetSpeakerDevices()` + - 即時に update() を実行して対応するコールバックを呼びます。 + +--- + +### コールバック登録(例) + +コールバックは例外を内部で捕捉してログを出すため、コールバック実装側でもエラーハンドリングしてください。 + +- `setCallbackDefaultMicDevice(callback)` — デフォルト入力が変わったときに `callback(host_name, device_name)` が呼ばれます。 +- `setCallbackDefaultSpeakerDevice(callback)` — デフォルト出力が変わったときに `callback(device_name)` が呼ばれます。 +- `setCallbackHostList(callback)` / `setCallbackMicDeviceList(callback)` / `setCallbackSpeakerDeviceList(callback)` — それぞれ list 変更時に `callback()` が呼ばれます。 +- `setCallbackProcessBeforeUpdateMicDevices(callback)` / `setCallbackProcessAfterUpdateMicDevices(callback)` — 更新の前後に呼ばれるフックです。 + +簡単な例: + +```py +from device_manager import device_manager + +def on_default_mic(host, device): + print('default mic changed', host, device) + +device_manager.init() +device_manager.setCallbackDefaultMicDevice(on_default_mic) +device_manager.startMonitoring() + +# 後で停止 +# device_manager.stopMonitoring() +``` + +--- + +### 注意点 / トラブルシュート + +- Windows 固有の依存が無い場合、`getMicDevices()` などはデフォルトのプレースホルダ(`NoHost` / `NoDevice`)を返します。実機のデバイス検出や WASAPI によるループバック検出は Windows 環境でのみ保証されます。 +- `startMonitoring()` は監視用のデーモンスレッドを作るため、アプリケーションの終了時には `stopMonitoring()` を呼ぶかプロセスを終了してください。`stopMonitoring()` は join を行いますが、失敗した場合でも致命的にならないよう best-effort 実装です。 +- コールバック内部で例外が発生してもモジュール側で捕捉してログ出力します(`utils.errorLogging()`)。コールバック側で詳細なハンドリングやリトライが必要な場合は呼び出し側で行ってください。 + +--- + +### 実装メモ + +- `monitoring()` は可能なら Windows の COM (pycaw / MMNotificationClient) を使ってイベント駆動で待ち受け、失敗時や非Windows 環境では PyAudio を使ったポーリング(定期的な update()) にフォールバックします。 +- 外部ライブラリが原因の例外は内部で捕捉し、`errorLogging()` を呼んで記録する設計です。 From ba13443d1c880a864387a0af7950636641d11a2e Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 19:27:55 +0900 Subject: [PATCH 83/92] =?UTF-8?q?config.py=E3=81=AE=E3=83=AA=E3=83=95?= =?UTF-8?q?=E3=82=A1=E3=82=AF=E3=82=BF=E3=83=AA=E3=83=B3=E3=82=B0=E3=82=92?= =?UTF-8?q?=E5=AE=9F=E6=96=BD=E3=81=97=E3=80=81=E5=A4=96=E9=83=A8=E3=83=A2?= =?UTF-8?q?=E3=82=B8=E3=83=A5=E3=83=BC=E3=83=AB=E3=81=AE=E3=82=A4=E3=83=B3?= =?UTF-8?q?=E3=83=9D=E3=83=BC=E3=83=88=E3=82=92=E3=82=AC=E3=83=BC=E3=83=89?= =?UTF-8?q?=E3=81=97=E3=81=A6=E5=AE=89=E5=85=A8=E6=80=A7=E3=82=92=E5=90=91?= =?UTF-8?q?=E4=B8=8A=E3=80=82=E5=88=9D=E6=9C=9F=E5=8C=96=E6=99=82=E3=81=AE?= =?UTF-8?q?=E3=82=A8=E3=83=A9=E3=83=BC=E3=83=8F=E3=83=B3=E3=83=89=E3=83=AA?= =?UTF-8?q?=E3=83=B3=E3=82=B0=E3=82=92=E5=BC=B7=E5=8C=96=E3=81=97=E3=80=81?= =?UTF-8?q?=E3=83=87=E3=83=95=E3=82=A9=E3=83=AB=E3=83=88=E5=80=A4=E3=81=AE?= =?UTF-8?q?=E5=8F=96=E5=BE=97=E3=82=92=E5=AE=89=E5=85=A8=E3=81=AB=E8=A1=8C?= =?UTF-8?q?=E3=81=88=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E4=BF=AE=E6=AD=A3?= =?UTF-8?q?=E3=80=82=E9=96=A2=E9=80=A3=E3=83=89=E3=82=AD=E3=83=A5=E3=83=A1?= =?UTF-8?q?=E3=83=B3=E3=83=88=E3=82=92=E6=96=B0=E8=A6=8F=E4=BD=9C=E6=88=90?= =?UTF-8?q?=E3=81=97=E3=80=81=E5=A4=89=E6=9B=B4=E7=82=B9=E3=81=A8=E5=88=A9?= =?UTF-8?q?=E7=94=A8=E4=B8=8A=E3=81=AE=E6=B3=A8=E6=84=8F=E3=82=92=E6=98=8E?= =?UTF-8?q?=E7=A4=BA=E5=8C=96=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/config.py | 96 ++++++++++++++++++++++----- src-python/docs/modules/config_ref.md | 39 +++++++++++ 2 files changed, 118 insertions(+), 17 deletions(-) create mode 100644 src-python/docs/modules/config_ref.md diff --git a/src-python/config.py b/src-python/config.py index 8f32f4af..0e8a37e2 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -7,11 +7,34 @@ from json import dump as json_dump import threading from typing import Optional, Dict, Any import torch -from device_manager import device_manager -from models.translation.translation_languages import translation_lang -from models.translation.translation_utils import ctranslate2_weights -from models.transcription.transcription_languages import transcription_lang -from models.transcription.transcription_whisper import _MODELS as whisper_models + +# Guard optional, potentially heavy or platform-specific imports so importing +# config.py doesn't raise in environments missing those packages. +try: + from device_manager import device_manager +except Exception: # pragma: no cover - optional runtime + device_manager = None # type: ignore + +try: + from models.translation.translation_languages import translation_lang +except Exception: # pragma: no cover - optional runtime + translation_lang = {} # type: ignore + +try: + from models.translation.translation_utils import ctranslate2_weights +except Exception: # pragma: no cover - optional runtime + ctranslate2_weights = {} # type: ignore + +try: + from models.transcription.transcription_languages import transcription_lang +except Exception: # pragma: no cover - optional runtime + transcription_lang = {} # type: ignore + +try: + from models.transcription.transcription_whisper import _MODELS as whisper_models +except Exception: # pragma: no cover - optional runtime + whisper_models = {} # type: ignore + from utils import errorLogging, validateDictStructure, getComputeDeviceList json_serializable_vars = {} @@ -22,23 +45,39 @@ def json_serializable(var_name): return decorator class Config: + """Application configuration singleton. + + Responsibilities: + - expose read-only and read-write configuration via properties + - persist selected values to JSON with debounce + Implementation notes: initialization may depend on optional subsystems; any + exceptions during init/load are captured and logged to avoid import-time + crashes. + """ + _instance = None _config_data: Dict[str, Any] = {} _timer: Optional[threading.Timer] = None - _debounce_time = 2 + _debounce_time: int = 2 def __new__(cls): if cls._instance is None: cls._instance = super(Config, cls).__new__(cls) - cls._instance.init_config() - cls._instance.load_config() + try: + cls._instance.init_config() + except Exception: + errorLogging() + try: + cls._instance.load_config() + except Exception: + errorLogging() return cls._instance - def saveConfigToFile(self): + def saveConfigToFile(self) -> None: with open(self.PATH_CONFIG, "w", encoding="utf-8") as fp: json_dump(self._config_data, fp, indent=4, ensure_ascii=False) - def saveConfig(self, key, value, immediate_save=False): + def saveConfig(self, key: str, value: Any, immediate_save: bool = False) -> None: self._config_data[key] = value if isinstance(self._timer, threading.Timer) and self._timer.is_alive(): @@ -1069,10 +1108,16 @@ class Config: self._WATCHDOG_INTERVAL = 20 self._SELECTABLE_TAB_NO_LIST = ["1", "2", "3"] - self._SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_LIST = ctranslate2_weights.keys() - self._SELECTABLE_WHISPER_WEIGHT_TYPE_LIST = whisper_models.keys() - self._SELECTABLE_TRANSLATION_ENGINE_LIST = translation_lang.keys() - self._SELECTABLE_TRANSCRIPTION_ENGINE_LIST = list(transcription_lang[list(transcription_lang.keys())[0]].values())[0].keys() + # these external mappings may be empty dicts if the optional modules failed to import + self._SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_LIST = getattr(ctranslate2_weights, 'keys', lambda: [])() + self._SELECTABLE_WHISPER_WEIGHT_TYPE_LIST = getattr(whisper_models, 'keys', lambda: [])() + self._SELECTABLE_TRANSLATION_ENGINE_LIST = getattr(translation_lang, 'keys', lambda: [])() + try: + # transcription_lang is nested dict; attempt to extract keys defensively + first_key = next(iter(transcription_lang)) + self._SELECTABLE_TRANSCRIPTION_ENGINE_LIST = list(transcription_lang[first_key].values())[0].keys() + except Exception: + self._SELECTABLE_TRANSCRIPTION_ENGINE_LIST = [] self._SELECTABLE_UI_LANGUAGE_LIST = ["en", "ja", "ko", "zh-Hant", "zh-Hans"] self._COMPUTE_MODE = "cuda" if torch.cuda.is_available() else "cpu" self._SELECTABLE_COMPUTE_DEVICE_LIST = getComputeDeviceList() @@ -1172,8 +1217,18 @@ class Config: "height": 654, } self._AUTO_MIC_SELECT = True - self._SELECTED_MIC_HOST = device_manager.getDefaultMicDevice()["host"]["name"] - self._SELECTED_MIC_DEVICE = device_manager.getDefaultMicDevice()["device"]["name"] + # device_manager may be unavailable or not initialized; use safe defaults + try: + if device_manager is not None: + self._SELECTED_MIC_HOST = device_manager.getDefaultMicDevice()["host"]["name"] + self._SELECTED_MIC_DEVICE = device_manager.getDefaultMicDevice()["device"]["name"] + else: + self._SELECTED_MIC_HOST = "NoHost" + self._SELECTED_MIC_DEVICE = "NoDevice" + except Exception: + errorLogging() + self._SELECTED_MIC_HOST = "NoHost" + self._SELECTED_MIC_DEVICE = "NoDevice" self._MIC_THRESHOLD = 300 self._MIC_AUTOMATIC_THRESHOLD = False self._MIC_RECORD_TIMEOUT = 3 @@ -1190,7 +1245,14 @@ class Config: self._MIC_AVG_LOGPROB = -0.8 self._MIC_NO_SPEECH_PROB = 0.6 self._AUTO_SPEAKER_SELECT = True - self._SELECTED_SPEAKER_DEVICE = device_manager.getDefaultSpeakerDevice()["device"]["name"] + try: + if device_manager is not None: + self._SELECTED_SPEAKER_DEVICE = device_manager.getDefaultSpeakerDevice()["device"]["name"] + else: + self._SELECTED_SPEAKER_DEVICE = "NoDevice" + except Exception: + errorLogging() + self._SELECTED_SPEAKER_DEVICE = "NoDevice" self._SPEAKER_THRESHOLD = 300 self._SPEAKER_AUTOMATIC_THRESHOLD = False self._SPEAKER_RECORD_TIMEOUT = 3 diff --git a/src-python/docs/modules/config_ref.md b/src-python/docs/modules/config_ref.md new file mode 100644 index 00000000..776a2f1a --- /dev/null +++ b/src-python/docs/modules/config_ref.md @@ -0,0 +1,39 @@ +# config.py 変更参照ドキュメント + +このファイルは `config.py` に対して行った最近のリファクタリング / 安全化についての参照資料です。 + +目的: import 時の副作用を抑止し、`device_manager` などの外部モジュールがない環境でも安全に `config` をインポートできるようにすること。 + +主な変更点 + +- import-time の初期化保護 + - `Config.__new__` の中で `init_config()` / `load_config()` を呼び出しますが、これらを try/except で保護し、初期化に失敗しても例外を上位に伝播させずログ記録のみで処理を継続します。 + - このため、アプリ起動環境に必須ではない外部依存が欠けている場合でも、`import config` によるクラッシュを防止します。 + +- 外部モジュールの呼び出しをガード + - `device_manager`、翻訳/文字起こし関連のモデル一覧 (`whisper_models`, `ctranslate2_weights`) などは import 時に直接呼び出さず、存在チェック(try/except)を行って安全なデフォルト(空リストや "NoDevice" など)にフォールバックします。 + - これによりヘビーな依存(Windows 固有パッケージや大きな ML ライブラリ)がない CI 環境や軽量実行環境での import が安定します。 + +- エラーロギング + - 初期化やデフォルト取得に失敗した場合は、例外を握りつぶすのではなく `utils.errorLogging()` を経由してエラーメッセージを残します。これにより問題の診断が容易になります。 + +- 設定デフォルト値の扱い + - `getDefaultMicDevice()` / `getDefaultSpeakerDevice()` などを呼ぶ箇所は try/except で保護され、失敗時には `"NoHost"` / `"NoDevice"` 等の安全な文字列で代替されます。 + +利用上の注意 + +- 既存のコードは `config` をインポートしただけで `device_manager` を起動することを想定している箇所があるかもしれません。今回のリファクタリングでは "import 時に副作用を起こさない" ことを優先しているため、もし明示的な初期化を必要とする場合は、呼び出し側で `device_manager.init()` を明示的に行ってください。 + +- もし `config` のロードで致命的な設定エラーが発生した場合でも、アプリは継続動作しますが、ログを確認して手動で修復することが必要になる場合があります。 + +ドキュメントの提案差分 + +- 既存 `docs/modules/config.md` の "生成とライフサイクル" セクションに次の一文を追加することを推奨します: + + > 注意: `Config()` のインポートは副作用を起こさないよう保護されています。プラットフォーム依存のコンポーネント(例: `device_manager`)は明示的に初期化してください。 + +- `SELECTABLE_*` 系の説明に、起動環境に依存して空になる可能性があることを明示するパラグラフを追加してください(CI 環境や headless 環境では空になる)。 + +--- + +作業済み: このファイルはワークスペースに `docs/modules/config_ref.md` として作成済みです。既存 `docs/modules/config.md` は上書きしていません。上書き/マージの希望があれば続けます。 From 2b6611ef8e47b13fe6d21b372f8e631041c1186c Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 21:47:19 +0900 Subject: [PATCH 84/92] =?UTF-8?q?Model=E3=82=AF=E3=83=A9=E3=82=B9=E3=81=AE?= =?UTF-8?q?=E5=88=9D=E6=9C=9F=E5=8C=96=E3=82=92=E9=81=85=E5=BB=B6=E3=81=95?= =?UTF-8?q?=E3=81=9B=E3=82=8B=E8=A8=AD=E8=A8=88=E3=81=AB=E5=A4=89=E6=9B=B4?= =?UTF-8?q?=E3=81=97=E3=80=81=E5=90=84=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89?= =?UTF-8?q?=E3=81=A7=E3=81=AEensure=5Finitialized()=E5=91=BC=E3=81=B3?= =?UTF-8?q?=E5=87=BA=E3=81=97=E3=81=AB=E3=82=88=E3=81=A3=E3=81=A6=E5=BF=85?= =?UTF-8?q?=E8=A6=81=E6=99=82=E3=81=AB=E3=83=AA=E3=82=BD=E3=83=BC=E3=82=B9?= =?UTF-8?q?=E3=82=92=E5=88=9D=E6=9C=9F=E5=8C=96=E3=81=99=E3=82=8B=E3=82=88?= =?UTF-8?q?=E3=81=86=E3=81=AB=E4=BF=AE=E6=AD=A3=E3=80=82=E3=81=93=E3=82=8C?= =?UTF-8?q?=E3=81=AB=E3=82=88=E3=82=8A=E3=80=81=E3=82=A4=E3=83=B3=E3=83=9D?= =?UTF-8?q?=E3=83=BC=E3=83=88=E6=99=82=E3=81=AE=E5=89=AF=E4=BD=9C=E7=94=A8?= =?UTF-8?q?=E3=82=92=E6=8A=91=E6=AD=A2=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/docs/modules/model.md | 13 +++ src-python/model.py | 156 +++++++++++++++++++++++++++---- 2 files changed, 151 insertions(+), 18 deletions(-) diff --git a/src-python/docs/modules/model.md b/src-python/docs/modules/model.md index 3cb331ad..391976e9 100644 --- a/src-python/docs/modules/model.md +++ b/src-python/docs/modules/model.md @@ -67,6 +67,19 @@ model.startWebSocketServer('127.0.0.1', 2231) ## 詳細設計 +### 2025-10-09 のリファクタリング要約 + +- 遅延初期化 (lazy-init): `Model` のコンストラクタで重い初期化を行わず、`model.init()` を明示的に呼ぶか、各メソッド先頭で呼ばれる `ensure_initialized()` によって必要時に初期化する設計に変更しました。これによりインポート時の副作用(外部環境依存の初期化)が抑止されます。 + +- `threadFnc` の堅牢化: スレッドユーティリティは args/kwargs をインスタンスで保持し、内部で発生する例外を捕捉して `utils.errorLogging()` に委ねるようになりました。これによりバックグラウンドスレッドが例外で終了するリスクを減らしています。 + +- `device_manager` 呼び出しのガード: `getListMicHost()` / `getListMicDevice()` / `getMicDefaultDevice()` / `getListSpeakerDevice()` など、`device_manager` を参照する箇所は try/except で保護され、失敗時は安全なデフォルト(空リストや `"NoDevice"`)を返すようになりました。 + +- WebSocket/Overlay/Watchdog 等の起動系メソッドは `ensure_initialized()` を先頭に呼ぶようになり、遅延初期化の恩恵を受けるようになっています。 + +これらの変更は非破壊で既存の API を維持することを目的としていますが、起動フローで確実にリソースを確保したい場合はアプリ起動時に `model.init()` を呼ぶことを推奨します。 + + 目的: 各モデル(翻訳/転写/Overlay/Watchdog/OSC/WebSocket 等)のインスタンスを保持し、高レベルの操作を提供するファサード。 主要クラス/変数: diff --git a/src-python/model.py b/src-python/model.py index e78ed857..140e45b5 100644 --- a/src-python/model.py +++ b/src-python/model.py @@ -35,30 +35,47 @@ from models.websocket.websocket_server import WebSocketServer from utils import errorLogging, setupLogger class threadFnc(Thread): - def __init__(self, fnc, end_fnc=None, daemon=True, *args, **kwargs): - super(threadFnc, self).__init__(daemon=daemon, target=fnc, *args, **kwargs) + """A tiny Thread wrapper that repeatedly calls a function. + + Usage: threadFnc(fnc, end_fnc=None, daemon=True, *args, **kwargs) + The target function will be called repeatedly inside run(). + """ + def __init__(self, fnc, end_fnc=None, daemon: bool = True, *args, **kwargs): + # Do not pass target to super; manage call explicitly so we can + # store args/kwargs on the instance for later use. + super(threadFnc, self).__init__(daemon=daemon) self.fnc = fnc self.end_fnc = end_fnc self.loop = True self._pause = False + self._args = args + self._kwargs = kwargs - def stop(self): + def stop(self) -> None: self.loop = False - def pause(self): + def pause(self) -> None: self._pause = True - def resume(self): + def resume(self) -> None: self._pause = False - def run(self): - while self.loop: - self.fnc(*self._args, **self._kwargs) - while self._pause: - sleep(0.1) - - if callable(self.end_fnc): - self.end_fnc() + def run(self) -> None: + try: + while self.loop: + try: + self.fnc(*self._args, **self._kwargs) + except Exception: + # Protect the thread from terminating on user exceptions + errorLogging() + while self._pause: + sleep(0.1) + finally: + if callable(self.end_fnc): + try: + self.end_fnc() + except Exception: + errorLogging() return class Model: @@ -67,10 +84,22 @@ class Model: def __new__(cls): if cls._instance is None: cls._instance = super(Model, cls).__new__(cls) - cls._instance.init() + # Do NOT call init() here to avoid heavy import-time work. + # Callers should call `model.init()` explicitly or rely on + # `ensure_initialized()` which will lazy-initialize on demand. + cls._instance._inited = False return cls._instance def init(self): + """Perform full initialization of resources. + + This method performs heavy construction (models, overlay, threads) + and is intentionally not called at import time. Call explicitly + or let `ensure_initialized()` call it lazily. + """ + if getattr(self, '_inited', False): + return + self.logger = None self.th_check_device = None self.mic_print_transcript = None @@ -109,11 +138,24 @@ class Model: # default no-op callbacks for energy check functions self.check_mic_energy_fnc: Callable[[float], None] = lambda v: None self.check_speaker_energy_fnc: Callable[[float], None] = lambda v: None + self._inited = True + + def ensure_initialized(self) -> None: + """Ensure the model has been initialized. This is safe to call from + public methods that require initialized resources. + """ + if not getattr(self, '_inited', False): + try: + self.init() + except Exception: + # Log and continue; callers should handle missing features. + errorLogging() def checkTranslatorCTranslate2ModelWeight(self, weight_type:str): return checkCTranslate2Weight(config.PATH_LOCAL, weight_type) def changeTranslatorCTranslate2Model(self): + self.ensure_initialized() self.translator.changeCTranslate2Model( path=config.PATH_LOCAL, model_type=config.CTRANSLATE2_WEIGHT_TYPE, @@ -129,12 +171,15 @@ class Model: return downloadCTranslate2Tokenizer(config.PATH_LOCAL, weight_type) def isLoadedCTranslate2Model(self): + self.ensure_initialized() return self.translator.isLoadedCTranslate2Model() def isChangedTranslatorParameters(self): + self.ensure_initialized() return self.translator.isChangedTranslatorParameters() def setChangedTranslatorParameters(self, is_changed): + self.ensure_initialized() self.translator.setChangedTranslatorParameters(is_changed) def checkTranscriptionWhisperModelWeight(self, weight_type:str): @@ -144,20 +189,24 @@ class Model: return downloadWhisperWeight(config.PATH_LOCAL, weight_type, callback, end_callback) def resetKeywordProcessor(self): + self.ensure_initialized() del self.keyword_processor self.keyword_processor = KeywordProcessor() def authenticationTranslatorDeepLAuthKey(self, auth_key): + self.ensure_initialized() result = self.translator.authenticationDeepLAuthKey(auth_key) return result def startLogger(self): + self.ensure_initialized() os_makedirs(config.PATH_LOGS, exist_ok=True) file_name = os_path.join(config.PATH_LOGS, f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log") self.logger = setupLogger("log", file_name) self.logger.disabled = False def stopLogger(self): + self.ensure_initialized() self.logger.disabled = True self.logger = None @@ -198,6 +247,7 @@ class Model: return compatible_engines def getTranslate(self, translator_name, source_language, target_language, target_country, message): + self.ensure_initialized() success_flag = False translation = self.translator.translate( translator_name=translator_name, @@ -225,6 +275,7 @@ class Model: return translation, success_flag def getInputTranslate(self, message, source_language=None): + self.ensure_initialized() translator_name=config.SELECTED_TRANSLATION_ENGINES[config.SELECTED_TAB_NO] if source_language is None: source_language=config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] @@ -250,6 +301,7 @@ class Model: return translations, success_flags def getOutputTranslate(self, message, source_language=None): + self.ensure_initialized() translator_name=config.SELECTED_TRANSLATION_ENGINES[config.SELECTED_TAB_NO] if source_language is None: source_language=config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] @@ -266,10 +318,12 @@ class Model: return [translation], [success_flag] def addKeywords(self): + self.ensure_initialized() for f in config.MIC_WORD_FILTER: self.keyword_processor.add_keyword(f) def checkKeywords(self, message): + self.ensure_initialized() return len(self.keyword_processor.extract_keywords(message)) != 0 def detectRepeatSendMessage(self, message): @@ -287,14 +341,17 @@ class Model: return repeat_flag def startTransliteration(self): + self.ensure_initialized() if self.transliterator is None: self.transliterator = Transliterator() def stopTransliteration(self): + self.ensure_initialized() if self.transliterator is not None: self.transliterator = None def convertMessageToTransliteration(self, message: str, hiragana: bool=True, romaji: bool=True) -> list: + self.ensure_initialized() if hiragana is False and romaji is False: return [] @@ -315,24 +372,31 @@ class Model: return filtered_list def setOscIpAddress(self, ip_address): + self.ensure_initialized() self.osc_handler.setOscIpAddress(ip_address) def setOscPort(self, port): + self.ensure_initialized() self.osc_handler.setOscPort(port) def oscStartSendTyping(self): + self.ensure_initialized() self.osc_handler.sendTyping(flag=True) def oscStopSendTyping(self): + self.ensure_initialized() self.osc_handler.sendTyping(flag=False) def oscSendMessage(self, message:str): + self.ensure_initialized() self.osc_handler.sendMessage(message=message, notification=config.NOTIFICATION_VRC_SFX) def setMuteSelfStatus(self): + self.ensure_initialized() self.mic_mute_status = self.osc_handler.getOSCParameterMuteSelf() def startReceiveOSC(self): + self.ensure_initialized() def changeHandlerMute(address, osc_arguments): if config.ENABLE_TRANSCRIPTION_SEND is True: if osc_arguments is True and self.mic_mute_status is False: @@ -349,9 +413,11 @@ class Model: self.osc_handler.receiveOscParameters() def stopReceiveOSC(self): + self.ensure_initialized() self.osc_handler.oscServerStop() def getIsOscQueryEnabled(self): + self.ensure_initialized() return self.osc_handler.getIsOscQueryEnabled() @staticmethod @@ -416,22 +482,47 @@ class Model: Popen([program_name, "--cuda"], cwd=current_directory) def getListMicHost(self): - result = [host for host in device_manager.getMicDevices().keys()] + self.ensure_initialized() + try: + dm = device_manager.getMicDevices() + result = [host for host in dm.keys()] + except Exception: + errorLogging() + result = [] return result def getMicDefaultDevice(self): - result = device_manager.getMicDevices().get(config.SELECTED_MIC_HOST, [{"name": "NoDevice"}])[0]["name"] + self.ensure_initialized() + try: + dm = device_manager.getMicDevices() + result = dm.get(config.SELECTED_MIC_HOST, [{"name": "NoDevice"}])[0]["name"] + except Exception: + errorLogging() + result = "NoDevice" return result def getListMicDevice(self): - result = [device["name"] for device in device_manager.getMicDevices().get(config.SELECTED_MIC_HOST, [{"name": "NoDevice"}])] + self.ensure_initialized() + try: + dm = device_manager.getMicDevices() + result = [device["name"] for device in dm.get(config.SELECTED_MIC_HOST, [{"name": "NoDevice"}])] + except Exception: + errorLogging() + result = ["NoDevice"] return result def getListSpeakerDevice(self): - result = [device["name"] for device in device_manager.getSpeakerDevices()] + self.ensure_initialized() + try: + sd = device_manager.getSpeakerDevices() + result = [device["name"] for device in sd] + except Exception: + errorLogging() + result = ["NoDevice"] return result def startMicTranscript(self, fnc): + self.ensure_initialized() mic_host_name = config.SELECTED_MIC_HOST mic_device_name = config.SELECTED_MIC_DEVICE @@ -518,6 +609,7 @@ class Model: self.changeMicTranscriptStatus() def resumeMicTranscript(self): + self.ensure_initialized() # キューをクリア if isinstance(self.mic_audio_queue, Queue): while not self.mic_audio_queue.empty(): @@ -532,6 +624,7 @@ class Model: self.mic_audio_recorder.resume() def pauseMicTranscript(self): + self.ensure_initialized() # 文字起こしを一時停止 # if isinstance(self.mic_print_transcript, threadFnc): # self.mic_print_transcript.pause() @@ -565,6 +658,7 @@ class Model: self.resumeMicTranscript() def stopMicTranscript(self): + self.ensure_initialized() if isinstance(self.mic_print_transcript, threadFnc): self.mic_print_transcript.stop() self.mic_print_transcript.join() @@ -578,6 +672,7 @@ class Model: # self.mic_get_energy = None def startCheckMicEnergy(self, fnc:Optional[Callable[[float], None]]=None) -> None: + self.ensure_initialized() # fnc may be None or a callable. Use cast after checking for None to satisfy type checker. if fnc is not None: self.check_mic_energy_fnc = cast(Callable[[float], None], fnc) @@ -609,6 +704,7 @@ class Model: self.mic_energy_plot_progressbar.start() def stopCheckMicEnergy(self): + self.ensure_initialized() if isinstance(self.mic_energy_plot_progressbar, threadFnc): self.mic_energy_plot_progressbar.stop() self.mic_energy_plot_progressbar.join() @@ -619,6 +715,7 @@ class Model: self.mic_energy_recorder = None def startSpeakerTranscript(self, fnc:Optional[Callable[[dict], None]]=None) -> None: + self.ensure_initialized() speaker_device_name = config.SELECTED_SPEAKER_DEVICE speaker_device_list = device_manager.getSpeakerDevices() @@ -702,6 +799,7 @@ class Model: # self.speaker_get_energy.start() def stopSpeakerTranscript(self): + self.ensure_initialized() if isinstance(self.speaker_print_transcript, threadFnc): self.speaker_print_transcript.stop() self.speaker_print_transcript.join() @@ -714,6 +812,7 @@ class Model: # self.speaker_get_energy = None def startCheckSpeakerEnergy(self, fnc:Optional[Callable[[float], None]]=None) -> None: + self.ensure_initialized() # Accept None as default and assign safely with cast after None-check if fnc is not None: self.check_speaker_energy_fnc = cast(Callable[[float], None], fnc) @@ -743,6 +842,7 @@ class Model: self.speaker_energy_plot_progressbar.start() def stopCheckSpeakerEnergy(self): + self.ensure_initialized() if isinstance(self.speaker_energy_plot_progressbar, threadFnc): self.speaker_energy_plot_progressbar.stop() self.speaker_energy_plot_progressbar.join() @@ -753,6 +853,7 @@ class Model: self.speaker_energy_recorder = None def createOverlayImageSmallLog(self, message:Optional[str], your_language:Optional[str], translation:list, target_language:Optional[dict]) -> object: + self.ensure_initialized() # target_language may be provided as dict or None target_language_list = [] if isinstance(target_language, dict): @@ -760,6 +861,7 @@ class Model: return self.overlay_image.createOverlayImageSmallLog(message, your_language, translation, target_language_list) def createOverlayImageSmallMessage(self, message): + self.ensure_initialized() ui_language = config.UI_LANGUAGE convert_languages = { "en": "Default", @@ -772,12 +874,15 @@ class Model: return self.overlay_image.createOverlayImageSmallLog(message, language) def clearOverlayImageSmallLog(self): + self.ensure_initialized() self.overlay.clearImage("small") def updateOverlaySmallLog(self, img): + self.ensure_initialized() self.overlay.updateImage(img, "small") def updateOverlaySmallLogSettings(self): + self.ensure_initialized() size = "small" if (self.overlay.settings[size]["x_pos"] != config.OVERLAY_SMALL_LOG_SETTINGS["x_pos"] or @@ -807,6 +912,7 @@ class Model: self.overlay.updateUiScaling(config.OVERLAY_SMALL_LOG_SETTINGS["ui_scaling"], size) def createOverlayImageLargeLog(self, message_type:str, message:Optional[str], your_language:Optional[str], translation:list, target_language:Optional[dict]=None): + self.ensure_initialized() # normalize target_language dict -> list of language strings target_language_list = [] if isinstance(target_language, dict): @@ -814,6 +920,7 @@ class Model: return self.overlay_image.createOverlayImageLargeLog(message_type, message, your_language, translation, target_language_list) def createOverlayImageLargeMessage(self, message): + self.ensure_initialized() ui_language = config.UI_LANGUAGE convert_languages = { "en": "Default", @@ -831,12 +938,15 @@ class Model: return overlay_image.createOverlayImageLargeLog("send", message, language) def clearOverlayImageLargeLog(self): + self.ensure_initialized() self.overlay.clearImage("large") def updateOverlayLargeLog(self, img): + self.ensure_initialized() self.overlay.updateImage(img, "large") def updateOverlayLargeLogSettings(self): + self.ensure_initialized() size = "large" if (self.overlay.settings[size]["x_pos"] != config.OVERLAY_LARGE_LOG_SETTINGS["x_pos"] or self.overlay.settings[size]["y_pos"] != config.OVERLAY_LARGE_LOG_SETTINGS["y_pos"] or @@ -865,23 +975,29 @@ class Model: self.overlay.updateUiScaling(config.OVERLAY_LARGE_LOG_SETTINGS["ui_scaling"] * 0.25, size) def startOverlay(self): + self.ensure_initialized() self.overlay.startOverlay() def shutdownOverlay(self): + self.ensure_initialized() self.overlay.shutdownOverlay() def startWatchdog(self): + self.ensure_initialized() self.th_watchdog = threadFnc(self.watchdog.start) self.th_watchdog.daemon = True self.th_watchdog.start() def feedWatchdog(self): + self.ensure_initialized() self.watchdog.feed() def setWatchdogCallback(self, callback): + self.ensure_initialized() self.watchdog.setCallback(callback) def stopWatchdog(self): + self.ensure_initialized() if isinstance(self.th_watchdog, threadFnc): self.th_watchdog.stop() self.th_watchdog.join() @@ -893,6 +1009,7 @@ class Model: def startWebSocketServer(self, host, port): """WebSocketサーバーを起動し、別スレッドで実行する""" + self.ensure_initialized() if self.websocket_server_alive is True: # サーバーが既に起動している場合は何もしない return @@ -931,6 +1048,7 @@ class Model: def stopWebSocketServer(self): """WebSocketサーバーを停止する""" + self.ensure_initialized() if not hasattr(self, 'th_websocket_server') or self.th_websocket_server is None: return @@ -952,6 +1070,7 @@ class Model: def checkWebSocketServerAlive(self): """WebSocketサーバーの稼働状態を確認する""" + self.ensure_initialized() return self.websocket_server_alive def websocketSendMessage(self, message_dict:dict): @@ -960,6 +1079,7 @@ class Model: :param message_dict: 送信するメッセージの辞書 :return: 送信成功したかどうか """ + self.ensure_initialized() if not self.websocket_server_alive or not self.websocket_server: return False try: From 6f33f8afbd975da7a4154eb9d47da4453a0d19f6 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 22:38:50 +0900 Subject: [PATCH 85/92] =?UTF-8?q?Controller=E3=81=AE=E5=88=9D=E6=9C=9F?= =?UTF-8?q?=E5=8C=96=E6=99=82=E3=81=ABmodel.init()=E3=82=92=E5=91=BC?= =?UTF-8?q?=E3=81=B3=E5=87=BA=E3=81=99=E4=BA=92=E6=8F=9B=E3=83=AC=E3=82=A4?= =?UTF-8?q?=E3=83=A4=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81=E3=82=AA?= =?UTF-8?q?=E3=83=BC=E3=83=90=E3=83=BC=E3=83=AC=E3=82=A4=E3=81=AE=E5=AD=98?= =?UTF-8?q?=E5=9C=A8=E3=83=81=E3=82=A7=E3=83=83=E3=82=AF=E3=82=92=E5=AE=89?= =?UTF-8?q?=E5=85=A8=E3=81=AB=E8=A1=8C=E3=81=86=E3=81=9F=E3=82=81=E3=81=AE?= =?UTF-8?q?=E3=83=98=E3=83=AB=E3=83=91=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89?= =?UTF-8?q?=E3=82=92=E5=B0=8E=E5=85=A5=E3=80=82=E6=9C=AA=E4=BD=BF=E7=94=A8?= =?UTF-8?q?=E3=81=AEimport=E3=82=92=E5=89=8A=E9=99=A4=E3=81=97=E3=80=81?= =?UTF-8?q?=E3=83=89=E3=82=AD=E3=83=A5=E3=83=A1=E3=83=B3=E3=83=88=E3=82=92?= =?UTF-8?q?=E6=96=B0=E8=A6=8F=E4=BD=9C=E6=88=90=E3=81=97=E3=81=A6=E5=A4=89?= =?UTF-8?q?=E6=9B=B4=E7=82=B9=E3=81=A8=E6=B3=A8=E6=84=8F=E4=BA=8B=E9=A0=85?= =?UTF-8?q?=E3=82=92=E6=98=8E=E7=A4=BA=E5=8C=96=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/controller.py | 26 +++++++++++++++++++---- src-python/docs/modules/controller_ref.md | 25 ++++++++++++++++++++++ 2 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 src-python/docs/modules/controller_ref.md diff --git a/src-python/controller.py b/src-python/controller.py index c7d62402..d9d917e6 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -1,4 +1,3 @@ -import copy from typing import Callable, Any, List, Optional from time import sleep from subprocess import Popen @@ -19,6 +18,25 @@ class Controller: return None self.run: Callable[[int, str, Any], None] = _noop_run self.device_access_status: bool = True + # Ensure model is initialized at controller startup so existing + # attribute-based checks (e.g. model.overlay.initialized) continue to work. + try: + model.init() + except Exception: + # In test or headless environments initialization may fail; log and continue. + errorLogging() + + def _is_overlay_available(self) -> bool: + """Safe check whether overlay is present and initialized. + + This avoids AttributeError when `model` was not fully initialized. + """ + try: + overlay = getattr(model, "overlay", None) + return overlay is not None and getattr(overlay, "initialized", False) + except Exception: + errorLogging() + return False def setInitMapping(self, init_mapping:dict) -> None: self.init_mapping = init_mapping @@ -360,7 +378,7 @@ class Controller: ] }) - if config.OVERLAY_LARGE_LOG is True and model.overlay.initialized is True: + if config.OVERLAY_LARGE_LOG is True and self._is_overlay_available(): if config.OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES is True: if len(translation) > 0: overlay_image = model.createOverlayImageLargeLog( @@ -488,7 +506,7 @@ class Controller: transliteration_translation = [[]] if config.ENABLE_TRANSCRIPTION_RECEIVE is True: - if config.OVERLAY_SMALL_LOG is True and model.overlay.initialized is True: + if config.OVERLAY_SMALL_LOG is True and self._is_overlay_available(): if config.OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES is True: if len(translation) > 0: overlay_image = model.createOverlayImageSmallLog( @@ -507,7 +525,7 @@ class Controller: ) model.updateOverlaySmallLog(overlay_image) - if config.OVERLAY_LARGE_LOG is True and model.overlay.initialized is True: + if config.OVERLAY_LARGE_LOG is True and self._is_overlay_available(): if config.OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES is True: if len(translation) > 0: overlay_image = model.createOverlayImageLargeLog( diff --git a/src-python/docs/modules/controller_ref.md b/src-python/docs/modules/controller_ref.md new file mode 100644 index 00000000..2f7cd570 --- /dev/null +++ b/src-python/docs/modules/controller_ref.md @@ -0,0 +1,25 @@ +## Controller リファクタリングノート (2025-10-09) + +概要: +このドキュメントは `controller.py` に適用した互換性修正と実装上の注意点をまとめた参照用メモです。既存の `controller.md` を直接上書きするのではなく、参照版として保存しています。 + +実施内容(要約): +- Model の lazy-init 対応に合わせ、`Controller.__init__()` 内で明示的に `model.init()` を呼び出す互換レイヤを追加しました。これにより、既存コードが import 時に model の属性へアクセスしていても安全に動作します。 +- オーバーレイの存在チェックを安全に行うため、`_is_overlay_available()` ヘルパを導入しました。以前に直接参照していた `model.overlay.initialized` をこのヘルパで置換しています(合計 5 箇所を置換)。 +- `micMessage` 内の翻訳周りで発生していたインデントの回帰を修正しました(try/except ブロックの整合性を回復)。 +- 未使用の `import copy` を削除しました。 +- ドキュメント編集は非破壊を原則とし、既存ファイルの安全な上書きが困難な場合は参照版(このファイル)を作成する方針を採りました。 + +互換性と注意点: +- Controller は起動時に model を初期化するため、多くの通常の利用ケースで変更の影響はありません。 +- ただし、外部のモジュールやテストコードが import 時に model の内部属性(例: `model.overlay` や `model.translator`)へ直接アクセスしている場合は、明示的に `model.init()` を呼ぶか、Controller を経由して初期化することを推奨します。 + +検証: +- 軽量なローカル検証を行い、`from controller import Controller; Controller()` の実行で初期化が成功することを確認しました。 + +今後の作業候補: +- 既存の `docs/modules/controller.md` とこの参照ドキュメントのマージ(必要であれば差分を反映して上書きを行う)。 +- linter/mypy を通して型安全性の追加と残存する静的解析の問題を解消する。 +- テスト: Controller の初期化・主要ハンドラ(micMessage/chatMessage)を対象にしたユニットテストを追加して、model.lazy-init による破壊的変更が再発しないことを保証する。 + +このファイルは自動生成ではなく、安全に変更履歴を残すための参照メモです。上書きを希望する場合はご指示ください。 From 013079268284cac12bb214957de2bc8af5ef1bf6 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 22:52:15 +0900 Subject: [PATCH 86/92] =?UTF-8?q?mainloop=E3=83=A2=E3=82=B8=E3=83=A5?= =?UTF-8?q?=E3=83=BC=E3=83=AB=E3=81=AE=E3=83=89=E3=82=AD=E3=83=A5=E3=83=A1?= =?UTF-8?q?=E3=83=B3=E3=83=88=E3=82=92=E6=96=B0=E8=A6=8F=E4=BD=9C=E6=88=90?= =?UTF-8?q?=E3=81=97=E3=80=81Main=E3=82=AF=E3=83=A9=E3=82=B9=E3=81=AEstart?= =?UTF-8?q?()/stop()=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0=E3=80=82=E5=8F=97=E4=BF=A1=E3=82=B9=E3=83=AC=E3=83=83?= =?UTF-8?q?=E3=83=89=E3=81=A8=E3=83=8F=E3=83=B3=E3=83=89=E3=83=A9=E3=82=B9?= =?UTF-8?q?=E3=83=AC=E3=83=83=E3=83=89=E3=81=AE=E3=83=A9=E3=82=A4=E3=83=95?= =?UTF-8?q?=E3=82=B5=E3=82=A4=E3=82=AF=E3=83=AB=E7=AE=A1=E7=90=86=E3=82=92?= =?UTF-8?q?=E6=98=8E=E7=A4=BA=E5=8C=96=E3=81=97=E3=80=81=E3=82=A8=E3=83=A9?= =?UTF-8?q?=E3=83=BC=E3=83=8F=E3=83=B3=E3=83=89=E3=83=AA=E3=83=B3=E3=82=B0?= =?UTF-8?q?=E3=82=92=E5=BC=B7=E5=8C=96=E3=80=82=E3=83=9D=E3=83=BC=E3=83=AA?= =?UTF-8?q?=E3=83=B3=E3=82=B0=E8=B2=A0=E8=8D=B7=E3=82=92=E4=BD=8E=E6=B8=9B?= =?UTF-8?q?=E3=81=99=E3=82=8B=E3=81=9F=E3=82=81=E3=81=ABqueue.get()?= =?UTF-8?q?=E3=81=AB=E3=82=BF=E3=82=A4=E3=83=A0=E3=82=A2=E3=82=A6=E3=83=88?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/docs/modules/mainloop.md | 43 +++++++++++ src-python/mainloop.py | 113 ++++++++++++++++++---------- 2 files changed, 116 insertions(+), 40 deletions(-) create mode 100644 src-python/docs/modules/mainloop.md diff --git a/src-python/docs/modules/mainloop.md b/src-python/docs/modules/mainloop.md new file mode 100644 index 00000000..897f9d5d --- /dev/null +++ b/src-python/docs/modules/mainloop.md @@ -0,0 +1,43 @@ +## mainloop モジュール(src-python/mainloop.py) + +このドキュメントは `mainloop.py` の実装と、2025-10-09 に行ったリファクタの概要をまとめます。`mainloop` は標準入力から JSON を受け取り、`controller` のメソッドにルーティングして標準出力へ JSON で応答を返す小さなメインループです。 + +重要な変更点(2025-10-09): +- `Main` クラスに `start()` / `stop()` を追加し、受信スレッドとハンドラスレッドのライフサイクル管理を明示化しました。 +- `queue.get(timeout=...)` を使ってポーリング負荷を下げ、`_stop_event` による安全なシャットダウンを可能にしました。 +- 標準入力の JSON パースエラーと一般例外のハンドリングを強化しました。 +- `startReceiver()` / `startHandler()` を使って個別にスレッドを起動することも可能です。 + +クラス: Main +- __init__(controller_instance: Controller, mapping_data: dict) -> None + - `controller_instance`: `Controller` のインスタンス。 + - `mapping_data`: `mainloop` 内で使用する `mapping`(エンドポイント -> ハンドラ情報)辞書。 +- start() -> None + - 内部で `startReceiver()` と `startHandler()` を呼び、両スレッドを起動します。 +- stop(wait: float = 2.0) -> None + - シャットダウンシグナルをセットし、スレッド終了を待ちます(デフォルト 2 秒)。 + +使い方(例): + +```python +from mainloop import Main, mapping, controller + +main_instance = Main(controller_instance=controller, mapping_data=mapping) +main_instance.start() + +# 実行中に別スレッドや外部シグナルで停止させる +main_instance.stop() +``` + +既存のスクリプト互換性: +- 既存コードが `startReceiver()` や `startHandler()` を直接呼んでいる場合、そのまま動作します。`start()` / `stop()` を使うと簡潔に起動 / 停止が行えます。 + +注意点と推奨事項: +- `stop()` を呼ばないとバックグラウンドスレッドがデーモンであってもプロセス終了前にクリーンアップが不十分になる場合があります。アプリ終了時は `stop()` を呼ぶことを推奨します。 +- `queue.get(timeout=...)` を使うことで即時性よりも CPU 使用量の低減を優先しています。非常に低レイテンシが必要なケースでは timeout を短くしてください(ただし CPU 使用量に注意)。 + +スクリプト連携: +- `mainloop.mapping` と `mainloop.run_mapping` は `scripts/print_mapping.py` などのツールから直接参照されます。mapping のキー/値を変更する場合はそれらのスクリプトも確認してください。 + +変更履歴: +- 2025-10-09: start/stop ライフサイクル、タイムアウト付きキュー取得、エラー処理強化を追加。 diff --git a/src-python/mainloop.py b/src-python/mainloop.py index 9315ab56..644037a2 100644 --- a/src-python/mainloop.py +++ b/src-python/mainloop.py @@ -2,8 +2,8 @@ import sys import json import time from typing import Any, Tuple -from threading import Thread -from queue import Queue +from threading import Thread, Event +from queue import Queue, Empty import logging from controller import Controller # noqa: E402 from utils import printLog, printResponse, errorLogging, encodeBase64 # noqa: E402 @@ -358,31 +358,47 @@ init_mapping = {key:value for key, value in mapping.items() if key.startswith("/ controller.setInitMapping(init_mapping) class Main: - def __init__(self, controller_instance, mapping_data) -> None: + def __init__(self, controller_instance: Controller, mapping_data: dict) -> None: # queue holds tuples of (endpoint, data) self.queue: Queue[Tuple[str, Any]] = Queue() - self.main_loop = True + self._stop_event: Event = Event() self.controller = controller_instance self.mapping = mapping_data + self._threads: list[Thread] = [] def receiver(self) -> None: - while True: - received_data = sys.stdin.readline().strip() - received_data = json.loads(received_data) + """Read lines from stdin, parse JSON and enqueue requests. - if received_data: - endpoint = received_data.get("endpoint", None) - data = received_data.get("data", None) - data = encodeBase64(data) if data is not None else None - printLog(endpoint, {"receive_data": data}) - self.queue.put((endpoint, data)) + Uses blocking readline but honors stop via _stop_event checked between reads. + """ + while not self._stop_event.is_set(): + try: + line = sys.stdin.readline() + if not line: + # EOF reached; sleep briefly and re-check stop event + time.sleep(0.1) + continue + received_data = json.loads(line.strip()) + + if received_data: + endpoint = received_data.get("endpoint") + data = received_data.get("data") + data = encodeBase64(data) if data is not None else None + printLog(endpoint, {"receive_data": data}) + self.queue.put((endpoint, data)) + except json.JSONDecodeError: + # malformed input; log and continue + errorLogging() + except Exception: + errorLogging() def startReceiver(self) -> None: - th_receiver = Thread(target=self.receiver) + th_receiver = Thread(target=self.receiver, name="main_receiver") th_receiver.daemon = True th_receiver.start() + self._threads.append(th_receiver) - def handleRequest(self, endpoint, data=None) -> tuple: + def handleRequest(self, endpoint: str, data: Any = None) -> tuple: result = None # デフォルト値を設定 status = 500 # デフォルト値を設定 @@ -396,45 +412,62 @@ class Main: else: try: response = handler["variable"](data) - status = response.get("status", None) - result = response.get("result", None) - time.sleep(0.2) # 処理の安定化のために少し待機 - except Exception as e: + status = response.get("status") + result = response.get("result") + time.sleep(0.2) # 処理の安定化のために少し待機 + except Exception: errorLogging() - result = str(e) + result = "Internal error" status = 500 return result, status def handler(self) -> None: - while True: - if not self.queue.empty(): - try: - endpoint, data = self.queue.get() - result, status = self.handleRequest(endpoint, data) - except Exception as e: - errorLogging() - result = str(e) - status = 500 + """Main handler loop. Uses queue.get with timeout to avoid busy polling and to allow graceful shutdown.""" + while not self._stop_event.is_set(): + try: + endpoint, data = self.queue.get(timeout=0.5) + except Empty: + continue - if status == 423: - self.queue.put((endpoint, data)) - else: - printLog(endpoint, {"status": status, "send_data": result}) - printResponse(status, endpoint, result) - time.sleep(0.1) + try: + result, status = self.handleRequest(endpoint, data) + except Exception: + errorLogging() + result = "Internal error" + status = 500 + + if status == 423: + # Locked endpoint: requeue with a small delay to avoid tight loop + time.sleep(0.1) + self.queue.put((endpoint, data)) + else: + printLog(endpoint, {"status": status, "send_data": result}) + printResponse(status, endpoint, result) def startHandler(self) -> None: - th_handler = Thread(target=self.handler) + th_handler = Thread(target=self.handler, name="main_handler") th_handler.daemon = True th_handler.start() + self._threads.append(th_handler) def start(self) -> None: - while self.main_loop: - time.sleep(1) + """Start receiver and handler threads.""" + self.startReceiver() + self.startHandler() - def stop(self) -> None: - self.main_loop = False + def stop(self, wait: float = 2.0) -> None: + """Signal threads to stop and wait for them to finish. + + Args: + wait: maximum seconds to wait for threads to join. + """ + self._stop_event.set() + # give threads a chance to exit + start = time.time() + for th in self._threads: + remaining = max(0.0, wait - (time.time() - start)) + th.join(timeout=remaining) # 外部から参照可能なインスタンスを提供 main_instance = Main(controller_instance=controller, mapping_data=mapping) From 0e786cd04f8bc1c26275365ba358ffb28e6aeb68 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Fri, 10 Oct 2025 09:26:07 +0900 Subject: [PATCH 87/92] [bugfix] Hotfix: Fix slowed message sending.(too much sent '/run/typing_message_box'). set 2 secs cool time. --- src-ui/logics/common/useMessage.js | 9 ++++++++- src-ui/store.js | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src-ui/logics/common/useMessage.js b/src-ui/logics/common/useMessage.js index 1b50bbbd..c1dcfabf 100644 --- a/src-ui/logics/common/useMessage.js +++ b/src-ui/logics/common/useMessage.js @@ -1,10 +1,13 @@ import { useStore_MessageLogs, useStore_MessageInputValue, + store, } from "@store"; import { useStdoutToPython } from "@useStdoutToPython"; +const COOLDOWN = 2000; // 2 seconds + export const useMessage = () => { const { currentMessageLogs, addMessageLogs, updateMessageLogs } = useStore_MessageLogs(); const { currentMessageInputValue, updateMessageInputValue } = useStore_MessageInputValue(); @@ -65,7 +68,11 @@ export const useMessage = () => { }; const startTyping = () => { - asyncStdoutToPython("/run/typing_message_box"); + const now = Date.now(); + if (now - store.last_executed_time_startTyping >= 2000) { + store.last_executed_time_startTyping = now; + asyncStdoutToPython("/run/typing_message_box"); + } }; const stopTyping = () => { diff --git a/src-ui/store.js b/src-ui/store.js index b5d1a966..19897723 100644 --- a/src-ui/store.js +++ b/src-ui/store.js @@ -22,6 +22,7 @@ export const store = { is_initialized_load_plugin: false, is_fetched_plugins_info_already: false, is_initialized_fetched_plugin_info: false, + last_executed_time_startTyping: 0, }; const generatePropertyNames = (base_name) => ({ From 127ff3e84a2badba516c37738a19b4672347d428 Mon Sep 17 00:00:00 2001 From: Sakamoto Shiina <68018796+ShiinaSakamoto@users.noreply.github.com> Date: Fri, 10 Oct 2025 09:29:35 +0900 Subject: [PATCH 88/92] =?UTF-8?q?=F0=9F=91=8D=EF=B8=8F[Update]=20Version?= =?UTF-8?q?=203.3.0=20->=203.3.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/config.py | 2 +- src-tauri/tauri.conf.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src-python/config.py b/src-python/config.py index 9500b8fe..8ae9a925 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -1048,7 +1048,7 @@ class Config: def init_config(self): # Read Only - self._VERSION = "3.3.0" + self._VERSION = "3.3.1" if getattr(sys, 'frozen', False): self._PATH_LOCAL = os_path.dirname(sys.executable) else: diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index 8bc95f6b..10b04d98 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -1,7 +1,7 @@ { "$schema": "https://schema.tauri.app/config/2", "productName": "VRCT", - "version": "3.3.0", + "version": "3.3.1", "identifier": "com.vrct.app", "build": { "beforeDevCommand": "", From 914789c9cb2cbdfc73e93f07d022e1bd1d980ded Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Mon, 13 Oct 2025 08:28:27 +0900 Subject: [PATCH 89/92] =?UTF-8?q?mainloop=E3=81=AE=E3=82=B9=E3=83=AC?= =?UTF-8?q?=E3=83=83=E3=83=89=E7=AE=A1=E7=90=86=E3=82=92=E6=94=B9=E5=96=84?= =?UTF-8?q?=E3=81=97=E3=80=81=E3=83=9E=E3=83=AB=E3=83=81=E3=83=AF=E3=83=BC?= =?UTF-8?q?=E3=82=AB=E3=83=BC=E5=8C=96=E3=82=92=E5=AE=9F=E8=A3=85=E3=80=82?= =?UTF-8?q?=E3=83=87=E3=83=90=E3=82=A4=E3=82=B9=E7=AE=A1=E7=90=86=E3=81=AE?= =?UTF-8?q?=E5=88=9D=E6=9C=9F=E5=8C=96=E3=82=92=E9=81=85=E5=BB=B6=E3=81=95?= =?UTF-8?q?=E3=81=9B=E3=80=81=E3=82=A8=E3=83=A9=E3=83=BC=E3=83=8F=E3=83=B3?= =?UTF-8?q?=E3=83=89=E3=83=AA=E3=83=B3=E3=82=B0=E3=82=92=E5=BC=B7=E5=8C=96?= =?UTF-8?q?=E3=80=82=E3=83=89=E3=82=AD=E3=83=A5=E3=83=A1=E3=83=B3=E3=83=88?= =?UTF-8?q?=E3=82=92=E6=9B=B4=E6=96=B0=E3=81=97=E3=80=81=E8=A8=AD=E5=AE=9A?= =?UTF-8?q?=E3=81=AE=E5=A4=89=E6=9B=B4=E7=82=B9=E3=82=92=E6=98=8E=E7=A4=BA?= =?UTF-8?q?=E5=8C=96=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/backend_test.py | 49 ++++--- src-python/config.py | 9 +- src-python/device_manager.py | 73 +++++++++- src-python/docs/modules/config.md | 9 ++ src-python/docs/modules/controller.md | 4 + src-python/docs/modules/device_manager.md | 11 ++ src-python/docs/modules/mainloop.md | 37 ++--- src-python/mainloop.py | 83 +++++++++--- src-python/models/osc/osc.py | 12 +- .../transcription/transcription_recorder.py | 127 +++++++++++++----- 10 files changed, 328 insertions(+), 86 deletions(-) diff --git a/src-python/backend_test.py b/src-python/backend_test.py index d56b1622..8fea1092 100644 --- a/src-python/backend_test.py +++ b/src-python/backend_test.py @@ -35,12 +35,13 @@ class Color: class TestMainloop(): def __init__(self): self.main = main_instance - self.main.startReceiver() - self.main.startHandler() + # Start mainloop threads + self.main.start() - def stop_main(): - pass - self.main.controller.setWatchdogCallback(stop_main) + # Ensure the watchdog can stop the mainloop cleanly + def _none_watchdog(): + return None + self.main.controller.setWatchdogCallback(_none_watchdog) self.main.controller.init() # mappingのすべてのstatusをTrueにする @@ -148,15 +149,16 @@ class TestMainloop(): def test_endpoints_on_off_continuous(self): print("----ON/OFF連続テスト----") - endpoints = ["/set/enable/websocket_server", "/set/disable/websocket_server"] - # endpoints = [ - # "/set/enable/translation", - # "/set/disable/translation", - # "/set/enable/transcription_send", - # "/set/disable/transcription_send", - # "/set/enable/transcription_receive", - # "/set/disable/transcription_receive", - # ] + endpoints = [ + "/set/enable/translation", + "/set/disable/translation", + "/set/enable/transcription_send", + "/set/disable/transcription_send", + "/set/enable/transcription_receive", + "/set/disable/transcription_receive", + # "/set/enable/websocket_server", + # "/set/disable/websocket_server", + ] for i in range(1000): endpoint = random.choice(endpoints) print(f"No.{i:04} Testing endpoint: {endpoint}", flush=True) @@ -739,14 +741,27 @@ if __name__ == "__main__": # test.test_set_data_endpoints_all() # test.test_run_endpoints_all() # test.test_delete_data_endpoints_all() - test.test_endpoints_all_random() - # test.test_endpoints_on_off_continuous() + # test.test_endpoints_all_random() + test.test_endpoints_on_off_continuous() # test.test_endpoints_on_off_random() # test.test_endpoints_specific_random() # test.test_translate_all_language_pairs() test.generate_summary() except KeyboardInterrupt: print("Interrupted by user, shutting down...") + try: + main_instance.stop() + except Exception: + pass except Exception as e: traceback.print_exc() - print(f"An error occurred: {e}") \ No newline at end of file + print(f"An error occurred: {e}") + try: + main_instance.stop() + except Exception: + pass + finally: + try: + main_instance.stop() + except Exception: + pass \ No newline at end of file diff --git a/src-python/config.py b/src-python/config.py index 0e8a37e2..54ae2b67 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -1220,8 +1220,10 @@ class Config: # device_manager may be unavailable or not initialized; use safe defaults try: if device_manager is not None: - self._SELECTED_MIC_HOST = device_manager.getDefaultMicDevice()["host"]["name"] - self._SELECTED_MIC_DEVICE = device_manager.getDefaultMicDevice()["device"]["name"] + # getDefaultMicDevice performs lazy init/update if needed + dm_def = device_manager.getDefaultMicDevice() + self._SELECTED_MIC_HOST = dm_def.get("host", {}).get("name", "NoHost") + self._SELECTED_MIC_DEVICE = dm_def.get("device", {}).get("name", "NoDevice") else: self._SELECTED_MIC_HOST = "NoHost" self._SELECTED_MIC_DEVICE = "NoDevice" @@ -1247,7 +1249,8 @@ class Config: self._AUTO_SPEAKER_SELECT = True try: if device_manager is not None: - self._SELECTED_SPEAKER_DEVICE = device_manager.getDefaultSpeakerDevice()["device"]["name"] + sp_def = device_manager.getDefaultSpeakerDevice() + self._SELECTED_SPEAKER_DEVICE = sp_def.get("device", {}).get("name", "NoDevice") else: self._SELECTED_SPEAKER_DEVICE = "NoDevice" except Exception: diff --git a/src-python/device_manager.py b/src-python/device_manager.py index b41a0cc3..6c4096a7 100644 --- a/src-python/device_manager.py +++ b/src-python/device_manager.py @@ -60,7 +60,20 @@ class DeviceManager: if cls._instance is None: cls._instance = super(DeviceManager, cls).__new__(cls) # do NOT auto-init monitoring-heavy resources on import; require explicit init + # Still perform a light-weight init so that callers observing the singleton + # do not see uninitialized internal structures (which caused NoDevice to + # be seen when import order differed). cls._instance._initialized = False + try: + # Call init() to populate internal containers. This will NOT start + # the monitoring thread (startMonitoring must be called explicitly). + cls._instance.init() + except Exception: + # Avoid import-time crashes; log and continue. + try: + errorLogging() + except Exception: + pass return cls._instance def init(self) -> None: @@ -108,6 +121,22 @@ class DeviceManager: self._initialized = True + # Best-effort single update: if PyAudio is available, attempt to populate + # real device lists. Keep this short and ignore errors to avoid import-time + # failures. + try: + if PyAudio is not None: + try: + # update() is robust and will fall back to defaults if audio libs + # are missing or fail; do not let exceptions bubble up. + self.update() + except Exception: + errorLogging() + except Exception: + # defensive: if errorLogging isn't available or other issues occur, + # swallow to avoid breaking initialization + pass + def update(self): buffer_mic_devices: Dict[str, List[Dict[str, Any]]] = {} buffer_default_mic_device: Dict[str, Any] = {"host": {"index": -1, "name": "NoHost"}, "device": {"index": -1, "name": "NoDevice"}} @@ -428,16 +457,52 @@ class DeviceManager: errorLogging() def getMicDevices(self): - return self.mic_devices + # Ensure initialized and return devices (safe default if still not populated) + if not getattr(self, '_initialized', False): + try: + self.init() + except Exception: + try: + errorLogging() + except Exception: + pass + return getattr(self, 'mic_devices', {"NoHost": [{"index": -1, "name": "NoDevice"}]}) def getDefaultMicDevice(self): - return self.default_mic_device + # Ensure initialized and return default mic device (safe default if still not populated) + if not getattr(self, '_initialized', False): + try: + self.init() + except Exception: + try: + errorLogging() + except Exception: + pass + return getattr(self, 'default_mic_device', {"host": {"index": -1, "name": "NoHost"}, "device": {"index": -1, "name": "NoDevice"}}) def getSpeakerDevices(self): - return self.speaker_devices + # Ensure initialized and return speaker devices (safe default if still not populated) + if not getattr(self, '_initialized', False): + try: + self.init() + except Exception: + try: + errorLogging() + except Exception: + pass + return getattr(self, 'speaker_devices', [{"index": -1, "name": "NoDevice"}]) def getDefaultSpeakerDevice(self): - return self.default_speaker_device + # Ensure initialized and return default speaker device (safe default if still not populated) + if not getattr(self, '_initialized', False): + try: + self.init() + except Exception: + try: + errorLogging() + except Exception: + pass + return getattr(self, 'default_speaker_device', {"device": {"index": -1, "name": "NoDevice"}}) def forceUpdateAndSetMicDevices(self): self.update() diff --git a/src-python/docs/modules/config.md b/src-python/docs/modules/config.md index d33e7b42..cdd83b8c 100644 --- a/src-python/docs/modules/config.md +++ b/src-python/docs/modules/config.md @@ -172,6 +172,15 @@ config.saveConfig('CUSTOM_SAVE', {'foo': 'bar'}, immediate_save=True) - `saveConfig()` はデバウンスされるため、高頻度の設定変更では複数の変更がまとめて書き込まれます。即時書き込みが必要な操作(重要な鍵の更新など)は `immediate_save=True` を使ってください。 - `SELECTABLE_*` 系や `*_DICT` 系は初期化時に外部モジュール(翻訳リソース、whisper_models、device_manager 等)から生成されます。これらが利用できない環境ではデフォルトが空になる可能性があります。 +### 2025-10-13 の変更(device_manager / config に関する挙動改善) + +- `DeviceManager` のシングルトン生成時に軽量 `init()` を実行するようになりました。これにより、モジュールのインポート順序に依存して `config` の `SELECTED_*` が `NoDevice` のままになる問題が軽減されます(監視スレッドは自動起動しません)。 +- `config.init_config()` はこれまで `device_manager._initialized` をチェックしていた箇所を見直し、`device_manager.getDefaultMicDevice()` / `getDefaultSpeakerDevice()` といったアクセサを利用して値を取得するように変更しました。アクセサは必要なら遅延初期化を行うため、`controller` と `config` のトップレベルインポート順に依存しません。 +- 影響: 起動時に PyAudio 等の依存が利用可能であれば、起動中に実機デバイス名が `config` に反映される確率が高くなります。依存がない場合は従来どおり `NoDevice` にフォールバックします。 + +推奨運用: +- `controller.init()` でコールバック登録後、`mainloop` の起動シーケンスで `device_manager.startMonitoring()` を明示的に呼ぶと、起動後もデバイス変更がコールバック経由で確実に届きます(この呼び出しは任意です)。 + ## 推奨改善点(将来的なドキュメント/実装) - 設定スキーマを JSON Schema で定義し、load 時の検証を明確化すると安全性が向上します。 - 設定変更イベントを発火する仕組み(observer パターン)を導入すると、Controller/Model 側の再初期化処理をより明確に実装できます。 diff --git a/src-python/docs/modules/controller.md b/src-python/docs/modules/controller.md index f2ae57a6..ccbb6cfd 100644 --- a/src-python/docs/modules/controller.md +++ b/src-python/docs/modules/controller.md @@ -5,6 +5,10 @@ - UI からのコマンドを受け取り、`model` の開始/停止、設定の変更、ダウンロードの開始、各種フラグの切り替え、進捗通知(`run` コールバック経由)を行います。 - 多くのメソッドは JSON 系の応答オブジェクトを返します: {"status": int, "result": Any}。副作用で `self.run(status, run_mapping[key], payload)` を呼び出して UI に通知します。 +### mainloop のマルチワーカー化とカノニカルロックについて (2025-10-13) + +- `mainloop.Main` はデフォルトで複数(デフォルト 3)のハンドラワーカースレッドを動かすようになりました。これにより、モデルロードなどの重い操作で他のリクエストが待たされることが少なくなります。 +- `/set/enable/` と `/set/disable/` のように同一機能の ON/OFF を切り替えるエンドポイントは、内部的にカノニカルロックキー(例: `/lock/set/`)に正規化してロック取得されます。これにより、遅い disable の処理が後から来て最終状態を書き換えてしまうレースが防がれます。 初期化とランタイムフック - __init__() -> None - フィールド: `init_mapping: dict`, `run_mapping: dict`, `run: Callable`, `device_access_status: bool` diff --git a/src-python/docs/modules/device_manager.md b/src-python/docs/modules/device_manager.md index f681b2d9..316384c2 100644 --- a/src-python/docs/modules/device_manager.md +++ b/src-python/docs/modules/device_manager.md @@ -45,6 +45,17 @@ device_manager.forceUpdateAndSetMicDevices() - Windows 固有のモジュール(PyAudio paWASAPI, pycaw)に依存します。クロスプラットフォーム対応が必要な場合は別実装が必要です。 - 監視スレッドは永続的に動作するため、アプリケーション終了時は `stopMonitoring()` を呼んで安全に停止してください。 +変更点(2025-10-13): +- `DeviceManager` のシングルトン生成時(`__new__`)に軽量な `init()` を実行するようになりました。これによりモジュールのインポート順に依存せず、最小限の内部構造が常に確立されます(※監視スレッドは自動で起動しません)。 +- `init()` は監視スレッドを開始しませんが、PyAudio が利用可能な場合に限りベストエフォートで一度だけ `update()` を呼び、起動時に可能な限り実機デバイス情報を埋めるようになりました(例外は握り潰して安全性を維持)。 +- アクセサ (`getDefaultMicDevice()` / `getDefaultSpeakerDevice()` など) は遅延初期化を行い、呼び出し時に `init()` が動いていない場合は安全に初期化されるようになりました。これにより `controller` と `config` がトップレベルインポートで互いに依存している状況でも、`config` に正しいデバイス情報が入るようになります。 + +推奨起動シーケンス: +- `controller.init()` でコールバック登録が完了した直後に、`mainloop` の起動シーケンス中で明示的に `device_manager.startMonitoring()` を呼ぶことを推奨します。これにより以降のデバイス変更がコールバックを通じて確実に届きます。なお、`startMonitoring()` は任意で、軽量にしたい場合は呼ばなくても構いません(ただし動的変化は検出されません)。 + +ドキュメントにおける重要な注意: +- この変更は "import-time に重大な副作用を持たせない" という方針を維持しつつ、インポート順の違いによる初期化漏れを解消するために行われています。`init()` は監視スレッドを開始しないため、インポートだけでスレッドが走ることはありません。 + ## 詳細設計 目的: ローカルの入力(マイク)と出力(ループバックから抽出されたスピーカー)デバイスを列挙し、変更を監視してコールバックで通知する。Windows の WASAPI 等に依存。 diff --git a/src-python/docs/modules/mainloop.md b/src-python/docs/modules/mainloop.md index 897f9d5d..ba38c6a8 100644 --- a/src-python/docs/modules/mainloop.md +++ b/src-python/docs/modules/mainloop.md @@ -1,22 +1,28 @@ ## mainloop モジュール(src-python/mainloop.py) -このドキュメントは `mainloop.py` の実装と、2025-10-09 に行ったリファクタの概要をまとめます。`mainloop` は標準入力から JSON を受け取り、`controller` のメソッドにルーティングして標準出力へ JSON で応答を返す小さなメインループです。 +このドキュメントは `mainloop.py` の実装と、最近行ったリファクタの概要をまとめます。`mainloop` は標準入力から JSON を受け取り、`controller` のメソッドにルーティングして標準出力へ JSON で応答を返す小さなメインループです。 -重要な変更点(2025-10-09): -- `Main` クラスに `start()` / `stop()` を追加し、受信スレッドとハンドラスレッドのライフサイクル管理を明示化しました。 -- `queue.get(timeout=...)` を使ってポーリング負荷を下げ、`_stop_event` による安全なシャットダウンを可能にしました。 -- 標準入力の JSON パースエラーと一般例外のハンドリングを強化しました。 -- `startReceiver()` / `startHandler()` を使って個別にスレッドを起動することも可能です。 +重要な変更点: +- 2025-10-09: `Main` クラスに `start()` / `stop()` を追加し、受信スレッドとハンドラスレッドのライフサイクル管理を明示化しました。`queue.get(timeout=...)` による安全なシャットダウンを可能にしています。 +- 2025-10-13: ハンドラの振る舞いを改善しました(マルチワーカー化とロック正規化): + - マルチワーカー化: ハンドラ処理はデフォルトで複数ワーカー(例: 3 本)で並列実行されます。これにより、1 つの重い処理が他のすべてのリクエストをブロックしてしまう問題を緩和します。 + - ロック正規化: `/set/enable/` と `/set/disable/` のような on/off ペアは同一のロックキーに正規化され、同一機能の on と off が同時に別スレッドで実行されることを防ぎます。これにより、遅い方の処理結果が後から上書きして最終状態が意図しないものになる不具合を防止します。 クラス: Main -- __init__(controller_instance: Controller, mapping_data: dict) -> None +- __init__(controller_instance: Controller, mapping_data: dict, worker_count: int = 3) -> None - `controller_instance`: `Controller` のインスタンス。 - `mapping_data`: `mainloop` 内で使用する `mapping`(エンドポイント -> ハンドラ情報)辞書。 + - `worker_count`: ハンドラワーカー数(デフォルト 3)。実行環境に応じて調整可能です。 - start() -> None - - 内部で `startReceiver()` と `startHandler()` を呼び、両スレッドを起動します。 + - 内部で `startReceiver()` と `startHandler()` を呼び、受信とハンドラのスレッド群を起動します。 - stop(wait: float = 2.0) -> None - シャットダウンシグナルをセットし、スレッド終了を待ちます(デフォルト 2 秒)。 +動作の重要ポイント +- キュー運用: 受信した JSON は内部キューに入れられ、ハンドラワーカーが順次取り出して処理します。`queue.get(timeout=...)` を使っているため CPU 負荷を抑えつつ安全に停止できます。 +- 同期応答設計: 各エンドポイントは基本的に呼び出し元に同期的に結果を返します(`handler` が戻り値としてステータスと結果を返す)。今回の変更でもこの設計は維持されています。 +- 同一機能直列化: `/set/enable/X` と `/set/disable/X` のような on/off ペアは内部で同一の "ロックキー" に正規化され、同時に両方が実行されることを防ぎます。これにより、enable と disable が競合して遅い方が勝つ問題が解消されます。 + 使い方(例): ```python @@ -29,15 +35,16 @@ main_instance.start() main_instance.stop() ``` -既存のスクリプト互換性: -- 既存コードが `startReceiver()` や `startHandler()` を直接呼んでいる場合、そのまま動作します。`start()` / `stop()` を使うと簡潔に起動 / 停止が行えます。 +確認手順(変更の検証): +1. バックエンドを起動しておく。 +2. UI/テストスクリプトから `/set/enable/translation` と `/set/disable/translation` を高速に交互送信する(数十〜数百ミリ秒間隔で連打)。 +3. ログ(`printLog` 出力)を確認し、同一機能の複数実行が同時に走っていないこと、最終状態が遅い方に常に上書きされないことを確認する。 +4. 必要に応じて `worker_count` を増減して挙動を確認する(PC リソースに応じて 1〜6 程度を推奨)。 注意点と推奨事項: -- `stop()` を呼ばないとバックグラウンドスレッドがデーモンであってもプロセス終了前にクリーンアップが不十分になる場合があります。アプリ終了時は `stop()` を呼ぶことを推奨します。 -- `queue.get(timeout=...)` を使うことで即時性よりも CPU 使用量の低減を優先しています。非常に低レイテンシが必要なケースでは timeout を短くしてください(ただし CPU 使用量に注意)。 - -スクリプト連携: -- `mainloop.mapping` と `mainloop.run_mapping` は `scripts/print_mapping.py` などのツールから直接参照されます。mapping のキー/値を変更する場合はそれらのスクリプトも確認してください。 +- `worker_count` を増やすと他のエンドポイントの並列処理性は上がりますが、controller/model 側で共有リソース(GPU メモリやデバイスハンドルなど)への同時アクセスが許可されていない場合は、controller 側で機能単位のロック(例: translation_lock)を追加してください。 +- このドキュメントの変更は `mainloop` の外側から見える挙動(同期応答、ログ、ロックの方針)を説明するものです。controller 内の処理自体は引き続き同期的に実行されます。必要があれば、enable 系の重い処理を非同期化して完了通知をイベントで返す設計(UI 変更が必要)も検討してください。 変更履歴: - 2025-10-09: start/stop ライフサイクル、タイムアウト付きキュー取得、エラー処理強化を追加。 +- 2025-10-13: マルチワーカー化(デフォルト 3)と enable/disable のロック正規化を実装。これにより同一機能の on/off の同時実行を防止し、UI からの高速トグルで最終状態が遅い方に上書きされる問題を修正しました。 diff --git a/src-python/mainloop.py b/src-python/mainloop.py index 644037a2..13efe44c 100644 --- a/src-python/mainloop.py +++ b/src-python/mainloop.py @@ -2,7 +2,7 @@ import sys import json import time from typing import Any, Tuple -from threading import Thread, Event +from threading import Thread, Event, Lock from queue import Queue, Empty import logging from controller import Controller # noqa: E402 @@ -357,14 +357,38 @@ mapping = { init_mapping = {key:value for key, value in mapping.items() if key.startswith("/get/data/")} controller.setInitMapping(init_mapping) +DEFAULT_WORKER_COUNT = 3 # 必要なら増やす + class Main: - def __init__(self, controller_instance: Controller, mapping_data: dict) -> None: - # queue holds tuples of (endpoint, data) + def __init__(self, controller_instance: Controller, mapping_data: dict, worker_count: int = DEFAULT_WORKER_COUNT) -> None: self.queue: Queue[Tuple[str, Any]] = Queue() self._stop_event: Event = Event() self.controller = controller_instance self.mapping = mapping_data self._threads: list[Thread] = [] + self._worker_count = worker_count + + # エンドポイントごとの排他制御用 Lock を作成 + # enable/disable ペアは同じロックキーに正規化する + def _canonical_lock_key(endpoint: str) -> str: + if not isinstance(endpoint, str): + return str(endpoint) + if endpoint.startswith("/set/enable/"): + return "/lock/set/" + endpoint[len("/set/enable/"):] + if endpoint.startswith("/set/disable/"): + return "/lock/set/" + endpoint[len("/set/disable/"):] + return endpoint + + # mapping に含まれるすべてのエンドポイントを走査して正規化キー集合を作る + lock_keys = set() + for key in self.mapping.keys(): + lock_keys.add(_canonical_lock_key(key)) + + # 正規化キーごとに Lock を割り当てる + self._endpoint_locks: dict[str, Lock] = {k: Lock() for k in lock_keys} + + # 正規化関数をインスタンスに保存 + self._canonical_lock_key = _canonical_lock_key def receiver(self) -> None: """Read lines from stdin, parse JSON and enqueue requests. @@ -422,23 +446,51 @@ class Main: return result, status + def _call_handler(self, endpoint: str, data: Any = None) -> tuple: + result = None + status = 500 + handler = self.mapping.get(endpoint) + if handler is None: + response = "Invalid endpoint" + status = 404 + else: + try: + response = handler["variable"](data) + status = response.get("status", 500) + result = response.get("result", None) + time.sleep(0.2) + except Exception: + errorLogging() + result = "Internal error" + status = 500 + return result, status + def handler(self) -> None: - """Main handler loop. Uses queue.get with timeout to avoid busy polling and to allow graceful shutdown.""" while not self._stop_event.is_set(): try: endpoint, data = self.queue.get(timeout=0.5) except Empty: continue - try: - result, status = self.handleRequest(endpoint, data) - except Exception: - errorLogging() - result = "Internal error" - status = 500 + # endpoint をロック用の正規化キーに変換してロックを取得 + lock_key = self._canonical_lock_key(endpoint) + lock = self._endpoint_locks.get(lock_key) + + if lock is not None: + acquired = lock.acquire(blocking=False) + if not acquired: + # 同一機能で既に処理中 -> 少し待って再キュー + time.sleep(0.05) + self.queue.put((endpoint, data)) + continue + try: + result, status = self._call_handler(endpoint, data) + finally: + lock.release() + else: + result, status = self._call_handler(endpoint, data) if status == 423: - # Locked endpoint: requeue with a small delay to avoid tight loop time.sleep(0.1) self.queue.put((endpoint, data)) else: @@ -446,10 +498,11 @@ class Main: printResponse(status, endpoint, result) def startHandler(self) -> None: - th_handler = Thread(target=self.handler, name="main_handler") - th_handler.daemon = True - th_handler.start() - self._threads.append(th_handler) + for i in range(max(1, self._worker_count)): + th_handler = Thread(target=self.handler, name=f"main_handler_{i}") + th_handler.daemon = True + th_handler.start() + self._threads.append(th_handler) def start(self) -> None: """Start receiver and handler threads.""" diff --git a/src-python/models/osc/osc.py b/src-python/models/osc/osc.py index c97c6dfb..99f304be 100644 --- a/src-python/models/osc/osc.py +++ b/src-python/models/osc/osc.py @@ -118,7 +118,17 @@ class OSCHandler: if service is not None: osc_query_client = OSCQueryClient(service) mute_self_node = osc_query_client.query_node(address) - value = mute_self_node.value[0] + # mute_self_node may be None when the node is not present on the + # remote OSCQuery service. Also mute_self_node.value may be None + # or an empty list. Guard against those cases to avoid + # AttributeError: 'NoneType' object has no attribute 'value' + if mute_self_node is None: + return None + # prefer explicit checks rather than relying on exceptions + node_value = getattr(mute_self_node, 'value', None) + if not node_value: + return None + value = node_value[0] except Exception: errorLogging() # エラー発生時にbrowserをリセットして次回再初期化 diff --git a/src-python/models/transcription/transcription_recorder.py b/src-python/models/transcription/transcription_recorder.py index 7214a375..30eb946e 100644 --- a/src-python/models/transcription/transcription_recorder.py +++ b/src-python/models/transcription/transcription_recorder.py @@ -37,23 +37,47 @@ class BaseRecorder: class SelectedMicRecorder(BaseRecorder): def __init__(self, device: dict, energy_threshold: int, dynamic_energy_threshold: bool, record_timeout: int) -> None: - source = Microphone( - device_index=device['index'], - sample_rate=int(device["defaultSampleRate"]), - ) + # Safely construct Microphone source. If device dict is missing expected keys + # or index is out-of-range for the platform, fallback to default device (None) + try: + device_index = int(device.get('index', -1)) + sample_rate = int(device.get("defaultSampleRate", 16000)) + if device_index < 0: + # invalid index -> fallback + raise ValueError("invalid device index") + source = Microphone( + device_index=device_index, + sample_rate=sample_rate, + ) + except Exception: + # Best-effort fallback: use system default microphone + try: + source = Microphone() + except Exception: + raise super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout) # self.adjustForNoise() class SelectedSpeakerRecorder(BaseRecorder): def __init__(self, device: dict, energy_threshold: int, dynamic_energy_threshold: bool, record_timeout: int) -> None: - - source = Microphone(speaker=True, - device_index= device["index"], - sample_rate=int(device["defaultSampleRate"]), - chunk_size=get_sample_size(paInt16), - channels=device["maxInputChannels"] - ) + try: + device_index = int(device.get('index', -1)) + sample_rate = int(device.get("defaultSampleRate", 16000)) + channels = int(device.get("maxInputChannels", 1)) + if device_index < 0: + raise ValueError("invalid device index") + source = Microphone(speaker=True, + device_index=device_index, + sample_rate=sample_rate, + chunk_size=get_sample_size(paInt16), + channels=channels + ) + except Exception: + try: + source = Microphone(speaker=True) + except Exception: + raise super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout) # self.adjustForNoise() @@ -83,22 +107,42 @@ class BaseEnergyRecorder: class SelectedMicEnergyRecorder(BaseEnergyRecorder): def __init__(self, device: dict) -> None: - source = Microphone( - device_index=device['index'], - sample_rate=int(device["defaultSampleRate"]), - ) + try: + device_index = int(device.get('index', -1)) + sample_rate = int(device.get("defaultSampleRate", 16000)) + if device_index < 0: + raise ValueError("invalid device index") + source = Microphone( + device_index=device_index, + sample_rate=sample_rate, + ) + except Exception: + try: + source = Microphone() + except Exception: + raise super().__init__(source=source) # self.adjustForNoise() class SelectedSpeakerEnergyRecorder(BaseEnergyRecorder): def __init__(self, device: dict) -> None: - - source = Microphone(speaker=True, - device_index= device["index"], - sample_rate=int(device["defaultSampleRate"]), - channels=device["maxInputChannels"] - ) + try: + device_index = int(device.get('index', -1)) + sample_rate = int(device.get("defaultSampleRate", 16000)) + channels = int(device.get("maxInputChannels", 1)) + if device_index < 0: + raise ValueError("invalid device index") + source = Microphone(speaker=True, + device_index=device_index, + sample_rate=sample_rate, + channels=channels + ) + except Exception: + try: + source = Microphone(speaker=True) + except Exception: + raise super().__init__(source=source) # self.adjustForNoise() @@ -156,10 +200,20 @@ class SelectedMicEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder): phrase_timeout: int = 1, record_timeout: int = 5, ) -> None: - source = Microphone( - device_index=device['index'], - sample_rate=int(device["defaultSampleRate"]), - ) + try: + device_index = int(device.get('index', -1)) + sample_rate = int(device.get("defaultSampleRate", 16000)) + if device_index < 0: + raise ValueError("invalid device index") + source = Microphone( + device_index=device_index, + sample_rate=sample_rate, + ) + except Exception: + try: + source = Microphone() + except Exception: + raise super().__init__( source=source, energy_threshold=energy_threshold, @@ -182,12 +236,23 @@ class SelectedSpeakerEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder): record_timeout: int = 5, ) -> None: - source = Microphone(speaker=True, - device_index= device["index"], - sample_rate=int(device["defaultSampleRate"]), - chunk_size=get_sample_size(paInt16), - channels=device["maxInputChannels"], - ) + try: + device_index = int(device.get('index', -1)) + sample_rate = int(device.get("defaultSampleRate", 16000)) + channels = int(device.get("maxInputChannels", 1)) + if device_index < 0: + raise ValueError("invalid device index") + source = Microphone(speaker=True, + device_index=device_index, + sample_rate=sample_rate, + chunk_size=get_sample_size(paInt16), + channels=channels, + ) + except Exception: + try: + source = Microphone(speaker=True) + except Exception: + raise super().__init__( source=source, energy_threshold=energy_threshold, From d4f89a734d5873824951f0204c216f32e7b06c26 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Mon, 13 Oct 2025 16:41:34 +0900 Subject: [PATCH 90/92] =?UTF-8?q?=E3=83=87=E3=83=90=E3=82=A4=E3=82=B9?= =?UTF-8?q?=E3=83=9E=E3=83=8D=E3=83=BC=E3=82=B8=E3=83=A3=E3=83=BC=E3=81=AE?= =?UTF-8?q?=E7=9B=A3=E8=A6=96=E6=A9=9F=E8=83=BD=E3=82=92=E8=BF=BD=E5=8A=A0?= =?UTF-8?q?=E3=81=97=E3=80=81=E3=83=9E=E3=82=A4=E3=82=AF=E3=81=8A=E3=82=88?= =?UTF-8?q?=E3=81=B3=E3=82=B9=E3=83=94=E3=83=BC=E3=82=AB=E3=83=BC=E3=81=AE?= =?UTF-8?q?=E8=87=AA=E5=8B=95=E9=81=B8=E6=8A=9E=E6=A9=9F=E8=83=BD=E3=81=AE?= =?UTF-8?q?=E6=9C=89=E5=8A=B9/=E7=84=A1=E5=8A=B9=E3=81=AB=E5=BF=9C?= =?UTF-8?q?=E3=81=98=E3=81=A6=E7=9B=A3=E8=A6=96=E3=82=92=E9=96=8B=E5=A7=8B?= =?UTF-8?q?=E3=83=BB=E5=81=9C=E6=AD=A2=E3=81=99=E3=82=8B=E3=83=AD=E3=82=B8?= =?UTF-8?q?=E3=83=83=E3=82=AF=E3=82=92=E5=AE=9F=E8=A3=85=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src-python/controller.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src-python/controller.py b/src-python/controller.py index d9d917e6..20afc048 100644 --- a/src-python/controller.py +++ b/src-python/controller.py @@ -1105,6 +1105,7 @@ class Controller: device_manager.setCallbackDefaultMicDevice(self.updateSelectedMicDevice) device_manager.setCallbackProcessAfterUpdateMicDevices(self.restartAccessMicDevices) device_manager.forceUpdateAndSetMicDevices() + device_manager.startMonitoring() def setEnableAutoMicSelect(self, *args, **kwargs) -> dict: if config.AUTO_MIC_SELECT is False: @@ -1114,6 +1115,9 @@ class Controller: @staticmethod def setDisableAutoMicSelect(*args, **kwargs) -> dict: + if config.AUTO_SPEAKER_SELECT is False: + device_manager.stopMonitoring() + if config.AUTO_MIC_SELECT is True: device_manager.clearCallbackProcessBeforeUpdateMicDevices() device_manager.clearCallbackDefaultMicDevice() @@ -1301,6 +1305,7 @@ class Controller: device_manager.setCallbackDefaultSpeakerDevice(self.updateSelectedSpeakerDevice) device_manager.setCallbackProcessAfterUpdateSpeakerDevices(self.restartAccessSpeakerDevices) device_manager.forceUpdateAndSetSpeakerDevices() + device_manager.startMonitoring() def setEnableAutoSpeakerSelect(self, *args, **kwargs) -> dict: if config.AUTO_SPEAKER_SELECT is False: @@ -1310,6 +1315,9 @@ class Controller: @staticmethod def setDisableAutoSpeakerSelect(*args, **kwargs) -> dict: + if config.AUTO_MIC_SELECT is False: + device_manager.stopMonitoring() + if config.AUTO_SPEAKER_SELECT is True: device_manager.clearCallbackProcessBeforeUpdateSpeakerDevices() device_manager.clearCallbackDefaultSpeakerDevice() From fcb12953026b854241809be75682e4b3afdea1dd Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Mon, 13 Oct 2025 22:55:48 +0900 Subject: [PATCH 91/92] Add documentation and coding guidelines for VRCT backend - Introduced a comprehensive coding rules document outlining naming conventions, module structure, import order, type annotations, error handling, and testing practices. - Created a specification document detailing project goals, target users, and functional/non-functional requirements for the VRCT project. - Added a design document describing the application's architecture, initialization policies, concurrency models, and error handling strategies. - Included a detailed design document specifying major classes, functions, data structures, and exception handling. - Removed outdated mypy configuration and several unused scripts related to documentation verification and cleanup. - Deleted test files for OSC and overlay imports as part of the cleanup process. --- src-python/docs/CHANGELOG.md | 27 - src-python/docs/README.md | 18 - src-python/docs/api.md | 701 -------- src-python/docs/architecture.md | 21 - src-python/docs/config.md | 433 +++++ src-python/docs/controller.md | 1225 ++++++++++++++ src-python/docs/device_manager.md | 1427 +++++++++++++++++ src-python/docs/diagrams.md | 51 - src-python/docs/mainloop.md | 346 ++++ src-python/docs/model.md | 1277 +++++++++++++++ src-python/docs/modules/config.md | 212 --- src-python/docs/modules/config_ref.md | 39 - src-python/docs/modules/controller.md | 162 -- src-python/docs/modules/controller_ref.md | 25 - src-python/docs/modules/device_manager.md | 84 - src-python/docs/modules/device_manager_ref.md | 93 -- src-python/docs/modules/mainloop.md | 50 - src-python/docs/modules/model.md | 118 -- src-python/docs/modules/model_extra.md | 60 - src-python/docs/modules/osc.md | 47 - src-python/docs/modules/overlay.md | 59 - src-python/docs/modules/overlay_image.md | 115 -- src-python/docs/modules/transcription.md | 126 -- src-python/docs/modules/translation.md | 113 -- src-python/docs/modules/transliteration.md | 17 - src-python/docs/modules/utils.md | 132 -- src-python/docs/modules/watchdog.md | 80 - src-python/docs/modules/websocket.md | 18 - src-python/docs/run_events_payloads.md | 125 -- src-python/docs/runtime.md | 43 - src-python/docs/utils.md | 940 +++++++++++ ...{CODING_RULES.md => コーディングルール.md} | 0 src-python/docs/仕様書.md | 58 + src-python/docs/設計書.md | 57 + src-python/docs/詳細設計書.md | 66 + src-python/mypy.ini | 32 - .../scripts/cleanup_docs_placeholders.py | 16 - src-python/scripts/find_doc_tokens.py | 21 - src-python/scripts/print_mapping.py | 28 - src-python/scripts/verify_docs_vs_code.py | 161 -- .../scripts/verify_docs_vs_code_runtime.py | 126 -- src-python/tests/test_osc_imports.py | 6 - src-python/tests/test_overlay_imports.py | 30 - 43 files changed, 5829 insertions(+), 2956 deletions(-) delete mode 100644 src-python/docs/CHANGELOG.md delete mode 100644 src-python/docs/README.md delete mode 100644 src-python/docs/api.md delete mode 100644 src-python/docs/architecture.md create mode 100644 src-python/docs/config.md create mode 100644 src-python/docs/controller.md create mode 100644 src-python/docs/device_manager.md delete mode 100644 src-python/docs/diagrams.md create mode 100644 src-python/docs/mainloop.md create mode 100644 src-python/docs/model.md delete mode 100644 src-python/docs/modules/config.md delete mode 100644 src-python/docs/modules/config_ref.md delete mode 100644 src-python/docs/modules/controller.md delete mode 100644 src-python/docs/modules/controller_ref.md delete mode 100644 src-python/docs/modules/device_manager.md delete mode 100644 src-python/docs/modules/device_manager_ref.md delete mode 100644 src-python/docs/modules/mainloop.md delete mode 100644 src-python/docs/modules/model.md delete mode 100644 src-python/docs/modules/model_extra.md delete mode 100644 src-python/docs/modules/osc.md delete mode 100644 src-python/docs/modules/overlay.md delete mode 100644 src-python/docs/modules/overlay_image.md delete mode 100644 src-python/docs/modules/transcription.md delete mode 100644 src-python/docs/modules/translation.md delete mode 100644 src-python/docs/modules/transliteration.md delete mode 100644 src-python/docs/modules/utils.md delete mode 100644 src-python/docs/modules/watchdog.md delete mode 100644 src-python/docs/modules/websocket.md delete mode 100644 src-python/docs/run_events_payloads.md delete mode 100644 src-python/docs/runtime.md create mode 100644 src-python/docs/utils.md rename src-python/docs/{CODING_RULES.md => コーディングルール.md} (100%) create mode 100644 src-python/docs/仕様書.md create mode 100644 src-python/docs/設計書.md create mode 100644 src-python/docs/詳細設計書.md delete mode 100644 src-python/mypy.ini delete mode 100644 src-python/scripts/cleanup_docs_placeholders.py delete mode 100644 src-python/scripts/find_doc_tokens.py delete mode 100644 src-python/scripts/print_mapping.py delete mode 100644 src-python/scripts/verify_docs_vs_code.py delete mode 100644 src-python/scripts/verify_docs_vs_code_runtime.py delete mode 100644 src-python/tests/test_osc_imports.py delete mode 100644 src-python/tests/test_overlay_imports.py diff --git a/src-python/docs/CHANGELOG.md b/src-python/docs/CHANGELOG.md deleted file mode 100644 index 723f3942..00000000 --- a/src-python/docs/CHANGELOG.md +++ /dev/null @@ -1,27 +0,0 @@ -# CHANGELOG - -## 2025-10-09 — 型チェック整備と安全性向上 - -- 修正: `controller.py` - - `Controller.chatMessage` の戻り値注釈を `dict` に明示(関数は JSON 系の応答オブジェクトを返します)。 - - `Controller.checkSoftwareUpdated` が実際に応答を返すように `return` を追加。 - -- 修正: `model.py` - - `startCheckMicEnergy` / `startCheckSpeakerEnergy` のコールバック引数を Optional に変更し、呼び出し前に `callable` チェックを追加。これにより None を渡しても安全に扱えるようになりました。 - - `convertMessageToTransliteration` の返り値を常に list に統一。hiragana/romaji が False の場合は空リストを返します。 - - `createOverlayImageLargeLog` 等の Overlay 作成関数で `target_language` を dict で受けた場合に内部で言語リストへ正規化する挙動を明確化。 - -- 目的: mypy の型チェックの警告/エラーを削減し、ランタイムでの None 呼び出しによるクラッシュを防止するための低リスクな変更です。 - -- 注記: - - 追加で `types-requests` をプロジェクト仮想環境にインストールし、mypy の外部型スタブ不足を解消しました。 - - 本チェンジは内部の型注釈とガードを中心としており、動作ロジックの大きな変更は行っていません。動作確認は mypy(型チェック)と ruff(lint)を通過したことをもって行っています。 - -## 1.0.0 (initial) -- 初回ドキュメント作成: ソースコードに基づく仕様書 / 詳細設計書を docs 配下に追加。 -- 対象: utils, model, controller, device_manager, config, translation, transcription, overlay, websocket, osc, transliteration, watchdog - -今後の作業候補: -- requirements.txt の自動生成とテストスイート追加 -- ドキュメントの API サンプル(リクエスト/レスポンス)追加 -- UML 図/シーケンス図の画像化 diff --git a/src-python/docs/README.md b/src-python/docs/README.md deleted file mode 100644 index be03a5d0..00000000 --- a/src-python/docs/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# VRCT — ドキュメント - -このドキュメントセットは、VRCT プロジェクト(`src-python`)に含まれる実装の仕様書 / 設計書 / 詳細設計書です。 - -目的 -- ソースコード構造、モジュール間データフロー、API エンドポイント、設定、実行手順、トラブルシュートを網羅して開発・運用の参照を容易にする。 - -対象 -- `utils.py`, `model.py`, `controller.py`, `mainloop.py`, `device_manager.py`, `config.py` および `models/` 以下の全モジュール。 - -ドキュメント構成(主要ファイル) -- `architecture.md` — アーキテクチャ概観 -- `modules/` — 各モジュールごとの詳細設計(個別ファイル) -- `api.md` — 外部/内部向け API エンドポイント マッピング(`mainloop.py` の `mapping` / `run_mapping` に準拠) -- `runtime.md` — 実行/セットアップ手順、依存関係 -- `diagrams.md` — システム図(Mermaid とテキスト両方) -- `CODING_RULES.md` — プロジェクト固有のコーディング規約(命名・型方針・lint/mypy 方針 等) -- `CHANGELOG.md` — 変更履歴 \ No newline at end of file diff --git a/src-python/docs/api.md b/src-python/docs/api.md deleted file mode 100644 index f512a191..00000000 --- a/src-python/docs/api.md +++ /dev/null @@ -1,701 +0,0 @@ ---- - - - -## API エンドポイント仕様 - -概要 -- このドキュメントは `mainloop.py` の `mapping` と `run_mapping` に定義された全エンドポイントを列挙します。 -- すべてのリクエストは標準入力経由で JSON を一行送る形で受信され、標準出力へ JSON 応答を出力します。 - -共通リクエスト形式 -- JSON オブジェクトを 1 行で標準入力に流します。 -- フィールド: - - `endpoint`: エンドポイント文字列 (例: `/get/data/version`) - - `data`: 任意(多くの GET 系は null、SET 系は新しい値やオブジェクト) - -例 -```json -{"endpoint":"/get/data/version","data":null} -``` - -共通レスポンス形式 -- mainloop は各リクエストの処理結果を次の形式で標準出力に出します(内部 util の `printResponse` を経由): - -成功例: -```json -{"status":200,"endpoint":"/get/data/version","result":"3.2.2"} -``` - -エラー例: -```json -{"status":400,"endpoint":"/set/data/osc_ip_address","result":{"message":"Invalid IP address","data":"127.0.0.1"}} -``` - -ロック状態と再試行 -- `mapping` にある各ハンドラは `"status": True|False` を持ちます。 - - False の場合、`handleRequest` は 423 (Locked endpoint) を返し、メインのハンドラはその要求をキューに戻して待機します(遅延再実行のため)。 - -run イベント -- `controller` は UI 更新などの非同期通知を行うために `run(status, endpoint, payload)` を呼び出します。これらは `run_mapping` にマップされ、外部 UI には `/run/...` 形式のエンドポイントで配信されます。 - -以下は `controller.py` から抽出した run イベントと、実際に送られるペイロードの具体例です。UI 側はこれらの JSON 形状を期待することで正しく動作します。 - -`/run/connected_network` (200) - - payload: true | false - -`/run/enable_ai_models` (200) - - payload: true | false - -`/run/mic_host_list` (200) - - payload: ["Host 1", "Host 2"] - -`/run/mic_device_list` (200) - - payload: ["Microphone (Realtek)", "Headset Microphone"] - -`/run/speaker_device_list` (200) - - payload: ["Speakers (Realtek)", "Headset"] - -`/run/initialization_complete` (200) - - payload: dict mapping endpoint -> current value (constructed from init_mapping) - - 例: {"/get/data/version":"3.2.2","/get/data/selected_tab_no":0} - -`/run/selected_mic_device` (200) - - payload: {"host": , "device": } - -`/run/selected_speaker_device` (200) - - payload: string (device name) - -`/run/error_device` (400) - - payload: {"message":"No mic device detected","data": null} - -`/run/check_mic_volume` (200) - - payload: numeric energy value (float) - -`/run/check_speaker_volume` (200) - - payload: numeric energy value (float) - -`/run/download_progress_ctranslate2_weight` (200) - - payload: {"weight_type":"m2m100_418m","progress":0.42} - -`/run/downloaded_ctranslate2_weight` (200) - - payload: "m2m100_418m" - -`/run/error_ctranslate2_weight` (400) - - payload: {"message":"CTranslate2 weight download error","data": null} - -`/run/download_progress_whisper_weight` (200) - - payload: {"weight_type":"base","progress":0.78} - -`/run/downloaded_whisper_weight` (200) - - payload: "base" - -`/run/error_whisper_weight` (400) - - payload: {"message":"Whisper weight download error","data": null} - -`/run/word_filter` (200) - - payload: {"message":"Detected by word filter: "} - -`/run/error_translation_engine` (400) - - payload: {"message":"Translation engine limit error","data": null} - -`/run/error_translation_mic_vram_overflow` (400) - - payload: {"message":"VRAM out of memory during translation of mic","data":""} - -`/run/error_translation_speaker_vram_overflow` (400) - - payload: {"message":"VRAM out of memory during translation of speaker","data":""} - -`/run/error_translation_chat_vram_overflow` (400) - - payload: {"message":"VRAM out of memory during translation of chat","data":""} - -`/run/enable_translation` (200/400) - - payload: on OOM: {"message":"Translation disabled due to VRAM overflow","data": false} - -`/run/transcription_send_mic_message` (200) - - payload: - { - "original": {"message": "Hello", "transliteration": []}, - "translations": [ {"message":"こんにちは","transliteration":[]}, ... ] - } - -`/run/transcription_receive_speaker_message` (200) - - payload: same shape as `/run/transcription_send_mic_message` - -`/run/software_update_info` (200) - - payload: e.g. {"has_update": true, "latest_version": "3.3.0"} - -`/run/selected_translation_compute_type` (200) - - payload: string ("auto"|"cpu"|"cuda:0") - -`/run/selected_transcription_compute_type` (200) - - payload: string - -`/run/selected_translation_engines` (200) - - payload: config.SELECTED_TRANSLATION_ENGINES (list/dict per tab) - -`/run/translation_engines` (200) - - payload: ["CTranslate2"] - -`/run/initialization_progress` (200) - - payload: integer (1..4) - -`/run/enable_osc_query` (200) - - payload: {"data": true|false, "disabled_functions": ["vrc_mic_mute_sync"]} - - -エンドポイント一覧(mapping にある全エンドポイント) - -注: 各行の説明では、`method` 的な概念はありません。すべてのエンドポイントは JSON リクエストで同様に呼び出します。`data` の期待値は説明に記載しています。 - -1) メイン操作 - -- /set/enable/translation — data: null — 翻訳を有効にします。 - - 成功応答例: - ```json - {"status":200, "endpoint":"/set/enable/translation", "result": true} - ``` - - 失敗例(VRAM OOM を検出して無効化されたケースは run イベントで通知されます): - ```json - {"status":400, "endpoint":"/set/enable/translation", "result":{"message":"Translation disabled due to VRAM overflow","data":false}} - ``` - -- /set/disable/translation — data: null — 翻訳を無効にします。 - - 成功応答例: - ```json - {"status":200, "endpoint":"/set/disable/translation", "result": false} - ``` - -- /set/enable/transcription_send — data: null — マイク転写(送信)を有効化します。 - - 実行はスレッドで開始される場合がある。成功例: - ```json - {"status":200, "endpoint":"/set/enable/transcription_send", "result": true} - ``` - -- /set/disable/transcription_send — data: null — 停止要求。成功例: - ```json - {"status":200, "endpoint":"/set/disable/transcription_send", "result": false} - ``` - -- /set/enable/transcription_receive — data: null — スピーカー側の転写を有効化します。 -- /set/disable/transcription_receive — data: null — 無効化します。 - -- /set/enable/foreground — data: null — フォアグラウンド表示を有効化します。 - - 成功例: {"status":200, "endpoint":"/set/enable/foreground", "result": true} - -- /get/data/selected_tab_no — data: null — 現在のタブ番号を返します。 - - 例: {"status":200, "endpoint":"/get/data/selected_tab_no", "result": 0} - -- /get/data/main_window_sidebar_compact_mode — data: null — サイドバーのコンパクト表示の現在値を返します。 - - 例: {"status":200, "endpoint":"/get/data/main_window_sidebar_compact_mode","result": false} - - -- /set/data/selected_tab_no — data: int — タブ番号を設定します。 - - リクエスト例: {"endpoint":"/set/data/selected_tab_no","data":1} - - 成功応答例: {"status":200, "endpoint":"/set/data/selected_tab_no","result":1} - -- /get/data/translation_engines — data: null — 利用可能な翻訳エンジン一覧を返します。 - - 例: {"status":200, "endpoint":"/get/data/translation_engines","result":["CTranslate2"]} - -- /get/data/selectable_language_list — data: null — 選択可能な言語一覧(言語コード, country 等を含むデータ構造) - - 例: {"status":200, "endpoint":"/get/data/selectable_language_list","result":[{"language":"English","country":"US"},{"language":"Japanese","country":"JP"}]} - -- /get/data/transcription_engines — data: null — 利用可能な転写エンジン一覧 - - 例: {"status":200, "endpoint":"/get/data/transcription_engines","result":["Google","Whisper"]} - - -- /run/send_message_box — data: {"id": <任意>, "message": "..."} - - 内部で `Controller.chatMessage` を呼び出します。戻りは変換済メッセージ構造体。 - - リクエスト例: - ```json - {"endpoint":"/run/send_message_box","data":{"id":123,"message":"Hello"}} - ``` - - 成功応答例: - ```json - {"status":200,"endpoint":"/run/send_message_box","result":{"id":123,"original":{"message":"Hello","transliteration":[]},"translations":[{"message":"","transliteration":[]}]}} - ``` - -- /run/typing_message_box — data: null — OSC でタイピング状態を伝える場合に使用。成功例: {"status":200,...} -- /run/stop_typing_message_box — data: null — 停止。 - -- /run/send_text_overlay — data: object — オーバーレイに表示するテキストを更新します。例: {"text":"Hello","lang":"English"} - - 成功応答は送信した data をそのまま返すことが多い。 - -- /run/swap_your_language_and_target_language — data: null — 選択中の入出力言語を入れ替えます。成功例: {"status":200, ...} - - -/run/update_software — data: null — 非同期でアップデート処理を開始します。成功応答: {"status":200, "result": true} -/run/update_cuda_software — data: null — CUDA アップデートを開始します。 - - -/set/enable/transcription_receive — data: null — スピーカー側の転写(受信)を有効化 -/set/disable/transcription_receive — data: null — 無効化 - - -/set/enable/foreground — data: null — フォアグラウンド表示を有効化 -/set/disable/foreground — data: null — 無効化 - -- /get/data/selected_tab_no — data: null — 現在のタブ番号を返す -- /set/data/selected_tab_no — data: int — タブ番号を設定 - -- /get/data/translation_engines — data: null — 使える翻訳エンジン一覧を返す - -- /get/data/selected_translation_engines — data: null — 各タブで選択されている翻訳エンジン(タブ別辞書) - - 例: {"status":200, "endpoint":"/get/data/selected_translation_engines","result":{"0":["CTranslate2"],"1":["CTranslate2"]}} - -- /get/data/selected_your_languages — data: null — 各タブの入力言語設定 - - 例: {"status":200, "endpoint":"/get/data/selected_your_languages","result":{"0":{"language":"English","enable":true}}} - -- /get/data/selected_target_languages — data: null — 各タブの出力言語設定 - - 例: {"status":200, "endpoint":"/get/data/selected_target_languages","result":{"0":{"1":{"language":"Japanese","enable":true}}}} - -- /get/data/selected_transcription_engine — data: null — 現在選択されている転写エンジン - - 例: {"status":200, "endpoint":"/get/data/selected_transcription_engine","result":"Whisper"} - -- /run/send_message_box — data: {"id":..., "message": "..."} — チャット送信を実行(chatMessage を内部呼び出し) -- /run/typing_message_box — data: null — タイピング開始通知(OSC 経由で送信される場合あり) -- /run/stop_typing_message_box — data: null — タイピング停止 - -- /run/send_text_overlay — data: {text settings...} — オーバーレイ用のテキスト表示を更新 - -- /run/swap_your_language_and_target_language — data: null — 入出力言語を入れ替え - -- /run/update_software — data: null — ソフト更新処理をスレッドで開始 -- /run/update_cuda_software — data: null — CUDA 関連更新を開始 - -2) 表示・外観設定 -- /get/data/version — data: null — アプリ版を返す -- /get/data/transparency — data: null — 透過率 -- /set/data/transparency — data: int — 透過率を設定 -- /get/data/ui_scaling — data: null — UI スケール -- /set/data/ui_scaling — data: int -- /get/data/textbox_ui_scaling, /set/data/textbox_ui_scaling -- /get/data/message_box_ratio, /set/data/message_box_ratio -- /get/data/send_message_button_type, /set/data/send_message_button_type -- /get/data/show_resend_button, /set/enable/show_resend_button, /set/disable/show_resend_button -- /get/data/font_family, /set/data/font_family -- /get/data/ui_language, /set/data/ui_language -- /get/data/main_window_geometry, /set/data/main_window_geometry - -3) 計算デバイス関連 -- /get/data/compute_mode — data: null — compute mode -- /get/data/translation_compute_device_list — data: null — 選択可能な翻訳デバイス一覧 -- /get/data/selected_translation_compute_device — data: null -- /set/data/selected_translation_compute_device — data: device descriptor — 選択 -- /get/data/transcription_compute_device_list — same as translation -- /get/data/selected_transcription_compute_device, /set/data/selected_transcription_compute_device - -4) 翻訳設定 -- /get/data/selectable_ctranslate2_weight_type_dict — data: null — 利用可能な ctranslate2 重みの辞書 -- /get/data/ctranslate2_weight_type, /set/data/ctranslate2_weight_type -- /get/data/selected_translation_compute_type, /set/data/selected_translation_compute_type -- /run/download_ctranslate2_weight — data: "weight_type" — 指定した重みをダウンロード(非同期可) -- /get/data/deepl_auth_key — data: null — DeepL API キー(存在すれば返却、セキュリティ上の注意あり) -- /set/data/deepl_auth_key — data: "" — DeepL キーを設定(キー検証あり) -- /delete/data/deepl_auth_key — data: null — DeepL キーを削除 - -- /set/data/selected_translation_engines — data: dict/list — 各タブの翻訳エンジン選択を設定します。 - - 例: {"endpoint":"/set/data/selected_translation_engines","data":{"0":["CTranslate2"]}} - -- /set/data/selected_transcription_engine — data: string — 現在の転写エンジンを設定します。 - - 例: {"endpoint":"/set/data/selected_transcription_engine","data":"Whisper"} - -- /set/enable/main_window_sidebar_compact_mode — data: null — サイドバーをコンパクト表示に設定 - - 例: {"status":200,"endpoint":"/set/enable/main_window_sidebar_compact_mode","result": true} - -- /set/disable/main_window_sidebar_compact_mode — data: null — サイドバーのコンパクト表示を解除 - - 例: {"status":200,"endpoint":"/set/disable/main_window_sidebar_compact_mode","result": false} -- /get/data/convert_message_to_romaji, /set/enable/convert_message_to_romaji, /set/disable/convert_message_to_romaji -- /get/data/convert_message_to_hiragana, /set/enable/convert_message_to_hiragana, /set/disable/convert_message_to_hiragana - -5) トランスクリプション / デバイス -- /get/data/mic_host_list, /get/data/mic_device_list, /get/data/speaker_device_list -- /get/data/auto_mic_select, /set/enable/auto_mic_select, /set/disable/auto_mic_select -- /get/data/selected_mic_host, /set/data/selected_mic_host -- /get/data/selected_mic_device, /set/data/selected_mic_device -- /get/data/mic_threshold, /set/data/mic_threshold -- /get/data/mic_automatic_threshold, /set/enable/mic_automatic_threshold, /set/disable/mic_automatic_threshold -- /get/data/mic_record_timeout, /set/data/mic_record_timeout -- /get/data/mic_phrase_timeout, /set/data/mic_phrase_timeout -- /get/data/mic_max_phrases, /set/data/mic_max_phrases -- /get/data/hotkeys, /set/data/hotkeys -- /get/data/plugins_status, /set/data/plugins_status -- /get/data/mic_avg_logprob, /set/data/mic_avg_logprob -- /get/data/mic_no_speech_prob, /set/data/mic_no_speech_prob -- /set/enable/check_mic_threshold, /set/disable/check_mic_threshold -- /get/data/mic_word_filter, /set/data/mic_word_filter - -6) スピーカー側設定 -- /get/data/auto_speaker_select, /set/enable/auto_speaker_select, /set/disable/auto_speaker_select -- /get/data/selected_speaker_device, /set/data/selected_speaker_device -- /get/data/speaker_threshold, /set/data/speaker_threshold -- /get/data/speaker_automatic_threshold, /set/enable/speaker_automatic_threshold, /set/disable/speaker_automatic_threshold -- /get/data/speaker_record_timeout, /set/data/speaker_record_timeout -- /get/data/speaker_phrase_timeout, /set/data/speaker_phrase_timeout -- /get/data/speaker_max_phrases, /set/data/speaker_max_phrases -- /get/data/speaker_avg_logprob, /set/data/speaker_avg_logprob -- /get/data/speaker_no_speech_prob, /set/data/speaker_no_speech_prob -- /set/enable/check_speaker_threshold, /set/disable/check_speaker_threshold - -7) Whisper / トランスクリプション重み -- /get/data/selectable_whisper_weight_type_dict -- /get/data/whisper_weight_type, /set/data/whisper_weight_type -- /get/data/selected_transcription_compute_type, /set/data/selected_transcription_compute_type -- /run/download_whisper_weight — data: "weight_type" - -8) VR / オーバーレイ -- /get/data/overlay_small_log, /set/enable/overlay_small_log, /set/disable/overlay_small_log -- /get/data/overlay_small_log_settings, /set/data/overlay_small_log_settings -- /get/data/overlay_large_log, /set/enable/overlay_large_log, /set/disable/overlay_large_log -- /get/data/overlay_large_log_settings, /set/data/overlay_large_log_settings -- /get/data/overlay_show_only_translated_messages, /set/enable/overlay_show_only_translated_messages, /set/disable/overlay_show_only_translated_messages - -9) その他設定 -- /get/data/send_message_format_parts, /set/data/send_message_format_parts -- /get/data/received_message_format_parts, /set/data/received_message_format_parts -- /get/data/auto_clear_message_box, /set/enable/auto_clear_message_box, /set/disable/auto_clear_message_box -- /get/data/send_only_translated_messages, /set/enable/send_only_translated_messages, /set/disable/send_only_translated_messages -- /get/data/logger_feature, /set/enable/logger_feature, /set/disable/logger_feature -- /run/open_filepath_logs -- /get/data/vrc_mic_mute_sync, /set/enable/vrc_mic_mute_sync, /set/disable/vrc_mic_mute_sync -- /get/data/send_message_to_vrc, /set/enable/send_message_to_vrc, /set/disable/send_message_to_vrc -- /get/data/send_received_message_to_vrc, /set/enable/send_received_message_to_vrc, /set/disable/send_received_message_to_vrc - -10) WebSocket -- /get/data/websocket_host, /set/data/websocket_host -- /get/data/websocket_port, /set/data/websocket_port -- /get/data/websocket_server, /set/enable/websocket_server, /set/disable/websocket_server - -11) OSC / 高度設定 -- /get/data/osc_ip_address, /set/data/osc_ip_address -- /get/data/osc_port, /set/data/osc_port -- /get/data/notification_vrc_sfx, /set/enable/notification_vrc_sfx, /set/disable/notification_vrc_sfx -- /run/open_filepath_config_file -- /run/feed_watchdog - -挙動メモ / 注意点 -- `data` は受信時に `encodeBase64` が適用される場合があります(バイナリや特殊文字対策)。 -- いくつかのエンドポイントは内部的にバックグラウンドスレッドを立ち上げます(ダウンロード・更新処理・transliteration 等)。 -- 翻訳・転写関連は VRAM OOM を検知すると自動的に関連機能を無効化し、UI に 400 系の run イベントを送信します。API 消費者はこれらの run イベントを監視する必要があります。 - -次の作業 -- `docs/modules/controller.md` に記載した Controller のメソッド詳細と紐付けて、各エンドポイントごとに具体的な request/response のサンプル(body の構造)を追加します。 -### API / メッセージマッピング(詳細) - -このアプリは stdin/stdout を通じた 1 行 JSON メッセージで制御します。内部では `mainloop.py` の `mapping` が受信 endpoint を Controller のメソッドに結び付け、`run_mapping` が非同期通知のエンドポイントを定義します。 - -受信メッセージ(stdin) -```json -{ "endpoint": "/set/data/selected_tab_no", "data": 0 } -``` - -送信メッセージ(stdout) -- 成功: printResponse が次を出力します。 -```json -{ "status": 200, "endpoint": "/get/data/version", "result": "3.2.2" } -``` -- エラー: -```json -{ "status": 400, "endpoint": "/set/data/osc_ip_address", "result": {"message":"Invalid IP address","data":"127.0.0.1"} } -``` - -動作原則 -- `/get/data/*` : Controller の getter を呼び、設定やリストを返す。 -- `/set/data/*` : Controller の setter を呼び、設定を変更して新値を返す。 -- `/run/*` : 非同期アクションや UI ボタンが実行する処理(ダウンロード、更新、送信など)。 -- `mapping` の `"status": False` はロック(423 を返し、要求はキューに戻され再試行される)。 - -表記ルール -- Controller メソッドは `Controller.` の形式で明記。 -- `run events` は Controller が UI に通知する `run_mapping` の `/run/...` エンドポイント名を列挙します。 - -以下は `mainloop.py` の `mapping` に基づいた、主要エンドポイントの詳細(カテゴリ順)。 - -1) メイン操作(チャット/翻訳/転写) - -- Endpoint: `/set/enable/translation` - - Controller: `Controller.setEnableTranslation` - - data: null - - success: {status:200, result: true} - - error example: {status:400, result:{message:"Translation disabled due to VRAM overflow", data: False}} - - run events: `/run/enable_translation` を発行して UI に状態を通知する。 - -- Endpoint: `/set/disable/translation` - - Controller: `Controller.setDisableTranslation` - - data: null - - success: {status:200, result: false} - - run events: `/run/enable_translation` - -- Endpoint: `/set/enable/transcription_send` - - Controller: `Controller.setEnableTranscriptionSend` - - data: null - - success: {status:200, result: true} - - side-effect: `Controller.startThreadingTranscriptionSendMessage` を呼びバックグラウンドで音声転写を開始する。 - - run events: `/run/enable_transcription_send` - -- Endpoint: `/set/disable/transcription_send` - - Controller: `Controller.setDisableTranscriptionSend` - - data: null - - success: {status:200, result: false} - -- Endpoint: `/run/send_message_box` - - Controller: `Controller.sendMessageBox` -> 内部で `Controller.chatMessage` - - data: {"id": <任意>, "message": "..."} - - success example: {status:200, result: {"id":123, "original":{...}, "translations":[...]}} - - run events: 転送先言語や翻訳結果があれば `/run/transcription_send_mic_message` などが発行される。 - -- Endpoint: `/run/send_text_overlay` - - Controller: `Controller.sendTextOverlay` - - data: object (例: {"text":"Hello","lang":"English"}) - - success: echo back the data - - side-effect: オーバーレイ更新(small/large に応じた出力) - -2) 表示 / 外観設定 -- Endpoint: `/get/data/version` - - Controller: `Controller.getVersion` - - data: null - - success: {status:200, result: config.VERSION} - -- Endpoint: `/get/data/transparency` / `/set/data/transparency` - - Controller: `Controller.getTransparency` / `Controller.setTransparency` - - data for set: integer (0-255 等、設定側で検証) - - success example: {status:200, result: } - -(UI スケーリング、textbox スケーリング、font_family, ui_language 等の /get と /set は同様のパターン: Controller の getXXX / setXXX を呼ぶ) - -3) 計算デバイス関連 -- Endpoint: `/get/data/translation_compute_device_list` -> `Controller.getComputeDeviceList` - - data: null - - result: list of device descriptors (構造は `config.SELECTABLE_COMPUTE_DEVICE_LIST` に従う) - -- Endpoint: `/set/data/selected_translation_compute_device` - - Controller: `Controller.setSelectedTranslationComputeDevice` - - data: device descriptor (例: {"name":"cuda:0","type":"gpu"}) - - side-effects: `model.setChangedTranslatorParameters(True)` が呼ばれ、実行時にモデル再ロードが必要な場合がある。 - - success: {status:200, result: selected_device} - -4) 翻訳/重み管理 -- Endpoint: `/get/data/selectable_ctranslate2_weight_type_dict` - - Controller: `Controller.getSelectableCtranslate2WeightTypeDict` - - result: dict mapping weight_type -> bool - -- Endpoint: `/run/download_ctranslate2_weight` - - Controller: `Controller.downloadCtranslate2Weight` - - data: "weight_type" (例: "m2m100_418m") - - behavior: 非同期フラグでスレッド起動可能。進捗は run events `/run/download_progress_ctranslate2_weight` を発行。完了時に `/run/downloaded_ctranslate2_weight`。 - -- Endpoint: `/set/data/deepl_auth_key` - - Controller: `Controller.setDeeplAuthKey` - - data: string (API key) - - behavior: 内部で `model.authenticationTranslatorDeepLAuthKey` を実行して検証。失敗時は 400 を返す。 - -5) トランスクリプション / デバイス -- Endpoint: `/get/data/mic_host_list` -> `Controller.getMicHostList` - - data: null - - result: dict/list of hosts - -- Endpoint: `/set/data/selected_mic_host` -> `Controller.setSelectedMicHost` - - data: host identifier (string) - - side-effects: デフォルトデバイスを `model.getMicDefaultDevice()` で選択し、エネルギーチェックや転写スレッドの再起動が発生する場合がある。 - -- Endpoint: `/set/data/mic_threshold` -> `Controller.setMicThreshold` - - data: integer - - validation: 0 <= value <= config.MAX_MIC_THRESHOLD - - success: {status:200, result: new_value} error: 400 with message and old value - -6) スピーカー関連(受信) -- Endpoint: `/set/data/selected_speaker_device` -> `Controller.setSelectedSpeakerDevice` - - data: device descriptor - - side-effects: スピーカー転写スレッド(ENABLE_CHECK_ENERGY_RECEIVE)を再起動する可能性あり - -7) Whisper / トランスクリプション重み -- Endpoint: `/run/download_whisper_weight` - - Controller: `Controller.downloadWhisperWeight` - - data: "weight_type" - - run events: `/run/download_progress_whisper_weight`, `/run/downloaded_whisper_weight` - -8) オーバーレイ / VR -- Endpoint: `/set/enable/overlay_small_log` -> `Controller.setEnableOverlaySmallLog` - - side-effect: `model.startOverlay()` を呼び、`model.updateOverlaySmallLog` で描画が更新される - -9) WebSocket / OSC / Watchdog -- Endpoint: `/set/data/websocket_host` -> `Controller.setWebSocketHost` - - validation: IP 形式チェック (`isValidIpAddress`) - - if WebSocket server running: attempts to restart server on new host/port (checks availability via `isAvailableWebSocketServer`) - -- Endpoint: `/set/data/osc_ip_address` -> `Controller.setOscIpAddress` - - validation: IP 形式。失敗時は 400 を返す。 - -- Endpoint: `/run/feed_watchdog` -> `Controller.feedWatchdog` - - Controller: `Controller.feedWatchdog` ➜ `model.feedWatchdog()` - -共通的な失敗モード(クライアント実装者向けメモ) -- 無効なパラメータ: 400 と {message,data} を返す。 -- ロック: 423 (Locked endpoint) — UI 側はリトライまたはキュー内での再試行を待つ。 -- 内部エラー: 500 とエラーメッセージ(詳細はログ)を返す。 -- VRAM OOM / モデルエラー: Controller は `model.detectVRAMError` を使い、必要に応じて機能無効化と run イベントで通知する。 - -付録: すぐ使える呼び出し例 -- バージョン取得 -```json -{ "endpoint": "/get/data/version", "data": null } -``` - -- タブ切替 -```json -{ "endpoint": "/set/data/selected_tab_no", "data": 1 } -``` - -- メッセージ送信(チャット) -```json -{ "endpoint": "/run/send_message_box", "data": {"id": 555, "message": "Hello world"} } -``` - -次の作業 -- ① `docs/modules/controller.md` の各メソッドとこの `docs/api.md` を突き合わせ、未記載の `run_mapping` イベントのペイロード例を追加します。 -- ② 軽い品質ゲート(README と runtime 注意の草案作成)を実行します。 - -## エンドポイント別 JSON スキーマ(補完) - -このセクションでは `mainloop.py` の `mapping` に定義された全エンドポイントをパターンごとに整理し、クライアントが送信すべき `request` と期待される `response` の JSON スキーマを明示します。多数のエンドポイントは共通パターンに従うため、パターン定義と代表例でほとんどのケースをカバーしています。 - -共通ルール -- リクエストは必ず 1 行 JSON: {"endpoint": "", "data": }。 -- レスポンスは {"status": , "endpoint": "", "result": } の形式(内部の `printResponse` により出力)。 - -1) /get/data/* パターン(読み取り) -- request.data: null -- response.result: 直ちに返せる JSON 値(数値/文字列/配列/辞書) -- schema(JSON Schema 風の簡易表記): - - request: - { - "endpoint": "/get/data/", - "data": null - } - - response: - { - "status": 200, - "endpoint": "/get/data/", - "result": - } - - 代表例: - - `/get/data/version` → result: string - {"status":200,"endpoint":"/get/data/version","result":"3.2.2"} - - `/get/data/mic_device_list` → result: ["Device 1", "Device 2"] - -2) /set/data/* パターン(書き込み) -- request.data: セッタが期待する型(下に代表的な型を列挙) -- response.result: 新しい値または検証済の値(成功時) -- error: バリデーション失敗時は status 400 と {message,data} - - 共通 request/response: - - request: - { - "endpoint": "/set/data/", - "data": - } - - response (success): - { - "status":200, - "endpoint":"/set/data/", - "result": - } - - response (validation error): - { - "status":400, - "endpoint":"/set/data/", - "result": {"message": "", "data": } - } - - 代表的リクエスト型一覧(多くはこの型いずれか): - - int: `/set/data/selected_tab_no`, `/set/data/transparency`, `/set/data/mic_threshold` など - - string: `/set/data/selected_mic_host`, `/set/data/selected_speaker_device`, `/set/data/deepl_auth_key` など - - dict/object: `/set/data/selected_your_languages`, `/set/data/selected_target_languages`, `/set/data/send_message_format_parts` など - - list: `/set/data/mic_word_filter` など - -3) フラグ切替(enable / disable) - -- 概要: 機能の有効化/無効化を行うエンドポイント群は、実装で定義された具体的なエンドポイント名(例: `/set/enable/translation`, `/set/disable/translation`, `/set/enable/foreground` など)で提供されています。本ドキュメントでは umbrella 的な汎用トークン(`/set/enable` や `/set/disable` 単体)は記載せず、実際に実装で定義されている concrete エンドポイントのみを列挙しています。 - -- 振る舞いの要点: - - リクエストの `data` は通常 `null` です。 - - 成功応答は多くの場合 boolean を返します(例: `{ "status":200, "endpoint":"/set/enable/foreground", "result": true }`)。 - - 条件により有効化/無効化ができない場合は 400 を返し、`{ "message": "...", "data": }` の形で詳細が返されます。 - -具体的なフラグ切替エンドポイントはドキュメント本文の各該当箇所で個別に列挙しています(例: `/set/enable/translation`, `/set/disable/translation`, `/set/enable/transcription_send`, `/set/disable/transcription_send`, `/set/enable/main_window_sidebar_compact_mode`, など)。 - -4) /run/*(アクション・実行系) -- request.data: アクションに依存(例: `/run/send_message_box` は {id, message}) -- response.result: 多くは action の結果(True/False, object)を返す -- 非同期で UI 更新を行う場合は `Controller.run(...)` により `/run/...` 形式の通知が stdout に出力される - - 代表例: - - `/run/send_message_box` - request.data: {"id": , "message": ""} - response.result: { - "id": , - "original": {"message": "", "transliteration": [] }, - "translations": [ {"message":"", "transliteration":[...]}, ... ] - } - - - `/run/download_ctranslate2_weight` - request.data: "" (string) - response.result: true - progress: `/run/download_progress_ctranslate2_weight` -> {"weight_type":"...","progress":0.0..1.0} - complete: `/run/downloaded_ctranslate2_weight` -> "" - -5) WebSocket / OSC / Watchdog 関連 -- `/set/data/websocket_host` : request.data:string(host) → response: {status:200, result: host} または 400 (not available) -- `/set/data/osc_ip_address` : request.data:string(ip) → validation via `isValidIpAddress` → 400 on invalid -- `/run/feed_watchdog`: request.data:null → response: {status:200,result:true} - -6) エラー応答の標準形 -- Validation / domain error : status 400, result: {"message": "<説明>", "data": } -- Locked endpoint: status 423, result: "Locked endpoint"(mainloop が再試行のためキューに戻す) -- Internal error: status 500, result: "" - -7) run events(UI 更新通知)- 参考(主要イベントのみ再掲) -- `/run/connected_network` : bool -- `/run/enable_ai_models` : bool -- `/run/initialization_progress` : int (1..4) - - `/run/transcription_send_mic_message` / `/run/transcription_receive_speaker_message` : オブジェクト(original/translations, see above) - -追加の run イベント(ランタイム検証で未記載と判定されたため追記): - -- `/run/enable_transcription_receive` : bool - - 説明: スピーカー側転写(transcription receive)の有効/無効を UI に通知します。 - -- `/run/transcription_send_mic_message` : object - - payload: 同 `/run/transcription_send_mic_message` の構造(original + translations) - - 説明: マイク側で転写結果が生成され、UI に送信するための通知です。 - -- `/run/transcription_receive_speaker_message` : object - - payload: 同 `/run/transcription_receive_speaker_message` の構造 - - 説明: スピーカー側で転写結果が生成されたときに発行されます。 - -- `/run/error_transcription_mic_vram_overflow` : object (400) - - payload: {"message": "VRAM out of memory during mic transcription", "data": ""} - - 説明: マイク転写中に VRAM OOM が発生した際に通知します。 - -- `/run/error_transcription_speaker_vram_overflow` : object (400) - - payload: {"message": "VRAM out of memory during speaker transcription", "data": ""} - - 説明: スピーカー転写中に VRAM OOM が発生した際に通知します。 - -補遺: 全エンドポイント一覧と期待型の速見表 -- `/get/data/*` : data=null -> result: primitive|array|object -- `/set/data/*` : data: 型指定 (int|string|dict|list) -> result: new value or validation error -- `/set/enable/*` `/set/disable/*` : data=null -> result: bool -- `/run/*` : data: action-specific -> result: action result object / bool - -ファイルの更新履歴 -- このドキュメントは `mainloop.py` の `mapping` と `controller.py` の `run_mapping` を参照して作成しました。将来的にエンドポイントを追加した場合は同じ箇所を参照して本ドキュメントを更新してください。 - ----- - -完了: エンドポイント別スキーマの補完を行いました。次は軽い品質ゲート(lint/typecheck)の実行を提案します。 - diff --git a/src-python/docs/architecture.md b/src-python/docs/architecture.md deleted file mode 100644 index 8fcf20e9..00000000 --- a/src-python/docs/architecture.md +++ /dev/null @@ -1,21 +0,0 @@ -# アーキテクチャ概観 - -VRCT(src-python)は、ローカル音声キャプチャ・音声認識・翻訳・VR 表示・OSC/ WebSocket 連携を統合するアプリケーションです。主な責務は次の通り。 - -- device_manager: オーディオ入出力デバイスの発見、監視、コールバック通知。 -- transcription (models/transcription/*): マイク/スピーカーからの音声取得、認識(Google/Whisper)、議事録管理。 -- translation (models/translation/*): 翻訳エンジン(DeepL/API、CTranslate2、Google など)管理と実行。 -- overlay (models/overlay/*): VR オーバーレイの画像生成と OpenVR を使った描画管理。 -- osc (models/osc/osc.py): VRChat 等との OSC(および OSCQuery)でのやり取り。 -- websocket (models/websocket/*): 外部クライアント向け WebSocket ブロードキャスト。 -- model.py: 高レベルなファサード。各機能のインスタンス化とランタイム操作。 -- controller.py: UI/外部メッセージを受け、config を更新・機能を起動するコマンド実行層。 -- mainloop.py: stdin 経由のコマンド受付ループとマッピング定義。GUI からの操作を受ける想定。 -- utils.py: ロギング、ネットワークチェック、デバイス/計算デバイスタイプ判定などのユーティリティ。 -- config.py: シングルトン設定ストア。アプリ起動中に共有して使うすべての設定値。 - -設計上のポイント: -- シングルトン/ファサード: `model` と `config` はシングルトンでグローバルに参照される。これにより UI 層(Controller)と低レイヤ(models/*)の橋渡しを行う。 -- 非同期処理: デバイス監視、音声録音・認識、WebSocket サーバー、Overlay のループはそれぞれ別スレッド/非同期ループで実行される。 -- フォールバック: 翻訳はまず選択されたエンジンを使い、失敗時に CTranslate2 にフォールバックする仕組みがある。 -- VRAM エラー検出: Whisper / CTranslate2 等で VRAM 不足が起きた場合、特殊なエラー検出を行い翻訳/音声機能を無効化して回復を試みる。 diff --git a/src-python/docs/config.md b/src-python/docs/config.md new file mode 100644 index 00000000..7d7d3f6e --- /dev/null +++ b/src-python/docs/config.md @@ -0,0 +1,433 @@ +# config.py ドキュメント + +## 概要 +`config.py` は、アプリケーションの全設定を一元管理するシングルトンクラス `Config` を提供するモジュール。設定値の読み込み・保存・検証を行い、JSON ファイルへの永続化をデバウンス機能付きで実現する。 + +## 主要機能 +- シングルトンパターンによる設定の一元管理 +- JSONファイル (`config.json`) からの設定読み込みと自動保存 +- デバウンス機能による書き込み最適化(デフォルト2秒) +- 読み取り専用プロパティと読み書き可能プロパティの明確な分離 +- オプショナルモジュールのセーフガードインポート(環境依存の依存関係を安全に処理) +- プロパティセッター内での型チェックとバリデーション +- `@json_serializable` デコレータによる永続化対象プロパティの管理 + +## アーキテクチャ + +### デザインパターン +- **シングルトンパターン**: `__new__` メソッドで単一インスタンスを保証 +- **プロパティパターン**: getter/setter による型安全なアクセス制御 + +### 設定の分類 +1. **読み取り専用設定** (Read Only) + - アプリケーションバージョン、パス、URL、定数など + - プロパティのみ(setter なし) + +2. **ランタイム設定** (Read Write) + - 機能の有効/無効フラグ + - 実行時の状態管理 + - JSON保存されない一時的な設定 + +3. **永続化設定** (Save Json Data) + - ユーザー設定、デバイス選択、UI設定など + - `@json_serializable` デコレータでマーク + - `saveConfig()` 経由で自動保存 + +## 使用方法 + +### 基本的な使い方 + +```python +from config import config + +# 設定値の取得(読み取り専用) +version = config.VERSION +app_path = config.PATH_LOCAL + +# 設定値の取得(読み書き可能) +current_tab = config.SELECTED_TAB_NO +mic_threshold = config.MIC_THRESHOLD + +# 設定値の変更(自動保存される) +config.SELECTED_TAB_NO = "2" +config.MIC_THRESHOLD = 500 +config.TRANSPARENCY = 80 + +# 即座に保存する場合 +config.MAIN_WINDOW_GEOMETRY = {"x_pos": 100, "y_pos": 200, "width": 900, "height": 700} +# MESSAGE_BOX_RATIO と MAIN_WINDOW_GEOMETRY は immediate_save=True で即座に保存 +``` + +### デバウンス保存の仕組み + +```python +# 通常の設定変更: 2秒後に保存 +config.UI_LANGUAGE = "ja" +config.FONT_FAMILY = "Arial" # 前のタイマーがキャンセルされ、新たに2秒のタイマー開始 + +# 即座保存が必要な設定: デバウンスなし +config.MESSAGE_BOX_RATIO = 15 # 即座にファイル書き込み +``` + +## 動作環境・依存関係 + +### 必須依存 +- Python 3.10以上(match-case 構文使用) +- `torch`: CUDA利用可否の判定に使用 +- `threading`: デバウンスタイマー用 + +### オプション依存(セーフガード付き) +以下のモジュールはインポートに失敗しても動作する: +- `device_manager`: デバイス管理(マイク/スピーカー) +- `models.translation.translation_languages`: 翻訳言語リスト +- `models.translation.translation_utils`: CTranslate2 重みリスト +- `models.transcription.transcription_languages`: 音声認識言語リスト +- `models.transcription.transcription_whisper`: Whisper モデルリスト + +### プロジェクト内依存 +- `utils`: エラーロギング、辞書構造検証、計算デバイスリスト取得 + +## ファイル構成 + +### 主要クラス: `Config` + +#### クラス属性 +```python +_instance: Config | None # シングルトンインスタンス +_config_data: Dict[str, Any] # JSON保存用データ +_timer: Optional[threading.Timer] # デバウンスタイマー +_debounce_time: int = 2 # デバウンス時間(秒) +``` + +#### 主要メソッド + +**初期化・保存** +- `__new__(cls)`: シングルトンインスタンス生成・初期化 +- `init_config()`: デフォルト値の設定 +- `load_config()`: JSONファイルから設定読み込み +- `saveConfig(key, value, immediate_save=False)`: 設定の保存(デバウンス付き) +- `saveConfigToFile()`: JSONファイルへの即座書き込み + +**デコレータ** +- `@json_serializable(var_name)`: 永続化対象プロパティのマーク + +### 設定プロパティ一覧 + +#### 読み取り専用設定(23項目) + +| プロパティ名 | 型 | 説明 | デフォルト値 | +|------------|----|----|------------| +| `VERSION` | str | アプリケーションバージョン | "3.3.0" | +| `PATH_LOCAL` | str | アプリケーションローカルパス | 実行時決定 | +| `PATH_CONFIG` | str | 設定ファイルパス | `{PATH_LOCAL}/config.json` | +| `PATH_LOGS` | str | ログディレクトリパス | `{PATH_LOCAL}/logs` | +| `GITHUB_URL` | str | GitHub API URL | リポジトリURL | +| `UPDATER_URL` | str | アップデーターAPIの URL | アップデーターURL | +| `BOOTH_URL` | str | Booth 販売ページURL | Booth URL | +| `DOCUMENTS_URL` | str | ドキュメントURL | Notion URL | +| `DEEPL_AUTH_KEY_PAGE_URL` | str | DeepL認証キー取得ページ | DeepL URL | +| `MAX_MIC_THRESHOLD` | int | マイクしきい値の最大値 | 2000 | +| `MAX_SPEAKER_THRESHOLD` | int | スピーカーしきい値の最大値 | 4000 | +| `WATCHDOG_TIMEOUT` | int | Watchdog タイムアウト(秒) | 60 | +| `WATCHDOG_INTERVAL` | int | Watchdog チェック間隔(秒) | 20 | +| `SELECTABLE_TAB_NO_LIST` | List[str] | 選択可能タブ番号 | ["1", "2", "3"] | +| `SELECTED_TAB_TARGET_LANGUAGES_NO_LIST` | List[str] | ターゲット言語タブ番号 | ["1", "2", "3"] | +| `SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_LIST` | List[str] | CTranslate2重みタイプリスト | 動的取得 | +| `SELECTABLE_WHISPER_WEIGHT_TYPE_LIST` | List[str] | Whisper重みタイプリスト | 動的取得 | +| `SELECTABLE_TRANSLATION_ENGINE_LIST` | List[str] | 翻訳エンジンリスト | 動的取得 | +| `SELECTABLE_TRANSCRIPTION_ENGINE_LIST` | List[str] | 音声認識エンジンリスト | 動的取得 | +| `SELECTABLE_UI_LANGUAGE_LIST` | List[str] | UI言語リスト | ["en", "ja", "ko", "zh-Hant", "zh-Hans"] | +| `COMPUTE_MODE` | str | 計算モード | "cuda" or "cpu" | +| `SELECTABLE_COMPUTE_DEVICE_LIST` | List[Dict] | 選択可能な計算デバイスリスト | 動的取得 | +| `SEND_MESSAGE_BUTTON_TYPE_LIST` | List[str] | 送信ボタンタイプリスト | ["show", "hide", "show_and_disable_enter_key"] | + +#### ランタイム設定(10項目) + +| プロパティ名 | 型 | 説明 | デフォルト値 | JSON保存 | +|------------|----|----|-----------|---------| +| `ENABLE_TRANSLATION` | bool | 翻訳機能有効フラグ | False | なし | +| `ENABLE_TRANSCRIPTION_SEND` | bool | 送信音声認識有効フラグ | False | なし | +| `ENABLE_TRANSCRIPTION_RECEIVE` | bool | 受信音声認識有効フラグ | False | なし | +| `ENABLE_FOREGROUND` | bool | フォアグラウンド有効フラグ | False | なし | +| `ENABLE_CHECK_ENERGY_SEND` | bool | 送信エネルギーチェック有効 | False | なし | +| `ENABLE_CHECK_ENERGY_RECEIVE` | bool | 受信エネルギーチェック有効 | False | なし | +| `SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT` | Dict[str, bool] | CTranslate2重み状態辞書 | {} | なし | +| `SELECTABLE_WHISPER_WEIGHT_TYPE_DICT` | Dict[str, bool] | Whisper重み状態辞書 | {} | なし | +| `SELECTABLE_TRANSLATION_ENGINE_STATUS` | Dict[str, bool] | 翻訳エンジン状態辞書 | {} | なし | +| `SELECTABLE_TRANSCRIPTION_ENGINE_STATUS` | Dict[str, bool] | 音声認識エンジン状態辞書 | {} | なし | + +#### 永続化設定(60項目以上) + +**メインウィンドウ設定** +- `SELECTED_TAB_NO`: 選択中のタブ番号 +- `SELECTED_TRANSLATION_ENGINES`: タブごとの翻訳エンジン選択 +- `SELECTED_YOUR_LANGUAGES`: タブごとの入力言語設定 +- `SELECTED_TARGET_LANGUAGES`: タブごとのターゲット言語設定 +- `SELECTED_TRANSCRIPTION_ENGINE`: 音声認識エンジン +- `CONVERT_MESSAGE_TO_ROMAJI`: ローマ字変換有効フラグ +- `CONVERT_MESSAGE_TO_HIRAGANA`: ひらがな変換有効フラグ +- `MAIN_WINDOW_SIDEBAR_COMPACT_MODE`: サイドバーコンパクトモード +- `SEND_MESSAGE_FORMAT_PARTS`: 送信メッセージフォーマット +- `RECEIVED_MESSAGE_FORMAT_PARTS`: 受信メッセージフォーマット + +**UIウィンドウ設定** +- `TRANSPARENCY`: ウィンドウ透明度(0-100) +- `UI_SCALING`: UIスケーリング(%) +- `TEXTBOX_UI_SCALING`: テキストボックススケーリング(%) +- `MESSAGE_BOX_RATIO`: メッセージボックス比率(即座保存) +- `SEND_MESSAGE_BUTTON_TYPE`: 送信ボタンタイプ +- `SHOW_RESEND_BUTTON`: 再送信ボタン表示フラグ +- `FONT_FAMILY`: フォントファミリー +- `UI_LANGUAGE`: UI言語 +- `MAIN_WINDOW_GEOMETRY`: ウィンドウ位置・サイズ(即座保存) + +**マイク設定** +- `AUTO_MIC_SELECT`: 自動マイク選択 +- `SELECTED_MIC_HOST`: 選択されたマイクホスト +- `SELECTED_MIC_DEVICE`: 選択されたマイクデバイス +- `MIC_THRESHOLD`: マイクしきい値 +- `MIC_AUTOMATIC_THRESHOLD`: 自動しきい値調整 +- `MIC_RECORD_TIMEOUT`: 録音タイムアウト(秒) +- `MIC_PHRASE_TIMEOUT`: フレーズタイムアウト(秒) +- `MIC_MAX_PHRASES`: 最大フレーズ数 +- `MIC_WORD_FILTER`: ワードフィルターリスト +- `MIC_AVG_LOGPROB`: 平均対数確率しきい値 +- `MIC_NO_SPEECH_PROB`: 無音確率しきい値 + +**スピーカー設定** +- `AUTO_SPEAKER_SELECT`: 自動スピーカー選択 +- `SELECTED_SPEAKER_DEVICE`: 選択されたスピーカーデバイス +- `SPEAKER_THRESHOLD`: スピーカーしきい値 +- `SPEAKER_AUTOMATIC_THRESHOLD`: 自動しきい値調整 +- `SPEAKER_RECORD_TIMEOUT`: 録音タイムアウト(秒) +- `SPEAKER_PHRASE_TIMEOUT`: フレーズタイムアウト(秒) +- `SPEAKER_MAX_PHRASES`: 最大フレーズ数 +- `SPEAKER_AVG_LOGPROB`: 平均対数確率しきい値 +- `SPEAKER_NO_SPEECH_PROB`: 無音確率しきい値 + +**モデル設定** +- `SELECTED_TRANSLATION_COMPUTE_DEVICE`: 翻訳計算デバイス +- `SELECTED_TRANSCRIPTION_COMPUTE_DEVICE`: 音声認識計算デバイス +- `CTRANSLATE2_WEIGHT_TYPE`: CTranslate2重みタイプ +- `SELECTED_TRANSLATION_COMPUTE_TYPE`: 翻訳計算タイプ +- `WHISPER_WEIGHT_TYPE`: Whisper重みタイプ +- `SELECTED_TRANSCRIPTION_COMPUTE_TYPE`: 音声認識計算タイプ + +**通信設定** +- `OSC_IP_ADDRESS`: OSC IPアドレス(デフォルト: "127.0.0.1") +- `OSC_PORT`: OSCポート(デフォルト: 9000) +- `AUTH_KEYS`: 認証キー辞書(DeepL API等) +- `WEBSOCKET_HOST`: WebSocketホスト +- `WEBSOCKET_PORT`: WebSocketポート +- `WEBSOCKET_SERVER`: WebSocketサーバー有効フラグ(非永続化) + +**オーバーレイ設定** +- `OVERLAY_SMALL_LOG`: 小ログオーバーレイ有効 +- `OVERLAY_SMALL_LOG_SETTINGS`: 小ログオーバーレイ設定(位置、回転、表示時間等) +- `OVERLAY_LARGE_LOG`: 大ログオーバーレイ有効 +- `OVERLAY_LARGE_LOG_SETTINGS`: 大ログオーバーレイ設定 +- `OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES`: 翻訳メッセージのみ表示 + +**その他設定** +- `HOTKEYS`: ホットキー設定辞書(即座保存) +- `PLUGINS_STATUS`: プラグイン状態リスト(即座保存) +- `USE_EXCLUDE_WORDS`: 除外ワード機能使用フラグ +- `AUTO_CLEAR_MESSAGE_BOX`: メッセージボックス自動クリア +- `SEND_ONLY_TRANSLATED_MESSAGES`: 翻訳メッセージのみ送信 +- `SEND_MESSAGE_TO_VRC`: VRChatへメッセージ送信 +- `SEND_RECEIVED_MESSAGE_TO_VRC`: 受信メッセージをVRChatへ送信 +- `LOGGER_FEATURE`: ロガー機能有効 +- `VRC_MIC_MUTE_SYNC`: VRChatマイクミュート同期 +- `NOTIFICATION_VRC_SFX`: VRChat通知効果音 + +## 内部実装の詳細 + +### デバウンス保存の実装 + +```python +def saveConfig(self, key: str, value: Any, immediate_save: bool = False) -> None: + self._config_data[key] = value + + # 既存のタイマーをキャンセル + if isinstance(self._timer, threading.Timer) and self._timer.is_alive(): + self._timer.cancel() + + if immediate_save: + self.saveConfigToFile() + else: + # 2秒後に保存するタイマーをセット + self._timer = threading.Timer(self._debounce_time, self.saveConfigToFile) + self._timer.daemon = True + self._timer.start() +``` + +### プロパティのバリデーション例 + +```python +@SELECTED_TAB_NO.setter +def SELECTED_TAB_NO(self, value): + if isinstance(value, str): + if value in self.SELECTABLE_TAB_NO_LIST: + self._SELECTED_TAB_NO = value + self.saveConfig(inspect.currentframe().f_code.co_name, value) +``` + +各setterは以下のパターンを実装: +1. 型チェック (`isinstance`) +2. 値の範囲・有効性チェック +3. 内部変数への代入 +4. `saveConfig` 呼び出し(永続化対象の場合) + +### メッセージフォーマット構造 + +```python +{ + "message": { + "prefix": "", # メッセージ前置文字列 + "suffix": "" # メッセージ後置文字列 + }, + "separator": "\n", # メッセージと翻訳の区切り + "translation": { + "prefix": "", # 翻訳前置文字列 + "separator": "\n", # 複数翻訳の区切り + "suffix": "" # 翻訳後置文字列 + }, + "translation_first": False # 翻訳を先に表示するか +} +``` + +### オーバーレイ設定構造 + +```python +{ + "x_pos": 0.0, # X座標 + "y_pos": 0.0, # Y座標 + "z_pos": 0.0, # Z座標 + "x_rotation": 0.0, # X軸回転 + "y_rotation": 0.0, # Y軸回転 + "z_rotation": 0.0, # Z軸回転 + "display_duration": 5, # 表示時間(秒) + "fadeout_duration": 2, # フェードアウト時間(秒) + "opacity": 1.0, # 不透明度(0.0-1.0) + "ui_scaling": 1.0, # UIスケーリング + "tracker": "HMD" # トラッカー ("HMD", "LeftHand", "RightHand") +} +``` + +## エラーハンドリング + +### セーフガードインポート +```python +try: + from device_manager import device_manager +except Exception: + device_manager = None # フォールバック値 +``` + +全ての外部モジュールインポートはtry-exceptでラップされており、インポート失敗時でも `Config` クラスは正常に動作する。 + +### 初期化エラー +```python +def __new__(cls): + if cls._instance is None: + cls._instance = super(Config, cls).__new__(cls) + try: + cls._instance.init_config() + except Exception: + errorLogging() # エラーをログに記録 + try: + cls._instance.load_config() + except Exception: + errorLogging() + return cls._instance +``` + +初期化とロード処理はそれぞれ独立してエラーハンドリングされる。 + +### 設定ロード時のエラー +```python +for key, value in self._config_data.items(): + try: + setattr(self, key, value) + except Exception: + errorLogging() # 個別設定の読み込み失敗は継続 +``` + +JSONから読み込んだ設定のうち、不正な値があっても他の設定の読み込みは継続される。 + +## パフォーマンス考慮事項 + +1. **デバウンス保存**: 頻繁な設定変更時にI/Oを削減 +2. **遅延初期化**: オプションモジュールは必要時のみロード +3. **シングルトン**: 設定オブジェクトの複製を防止 +4. **デーモンスレッド**: タイマースレッドはメインスレッド終了時に自動終了 + +## セキュリティ考慮事項 + +1. **認証キー**: `AUTH_KEYS` に格納される外部APIキーは平文でJSON保存される +2. **パス検証**: IP アドレスは `isValidIpAddress` でバリデーション +3. **型安全性**: 全てのセッターで型チェック実施 + +## テスト推奨事項 + +### 単体テスト +```python +def test_config_singleton(): + config1 = Config() + config2 = Config() + assert config1 is config2 + +def test_debounce_save(): + config.UI_LANGUAGE = "ja" + time.sleep(1) + config.UI_LANGUAGE = "en" + # 2秒以内の変更は1回のみ保存される + time.sleep(2.5) + # ここで保存完了 +``` + +### バリデーションテスト +```python +def test_invalid_tab_no(): + config.SELECTED_TAB_NO = "invalid" # 無視される + assert config.SELECTED_TAB_NO != "invalid" +``` + +### オプション依存のテスト +```python +def test_missing_device_manager(): + # device_manager が None でも動作すること + assert config.SELECTABLE_COMPUTE_DEVICE_LIST is not None +``` + +## マイグレーション + +### 設定ファイルのバージョンアップ +`load_config()` は存在しないキーを無視し、`init_config()` のデフォルト値を使用する。新しいバージョンでキーが追加された場合: + +1. 既存キーはJSONから読み込まれる +2. 新規キーは `init_config()` のデフォルト値が使用される +3. 次回保存時に全てのキーがJSON に書き込まれる + +## 制限事項 + +1. **マルチプロセス**: シングルトンはプロセス単位。マルチプロセス環境では各プロセスが独立したインスタンスを持つ +2. **スレッドセーフティ**: プロパティアクセス自体はスレッドセーフではない(保存タイマーのみスレッド対応) +3. **循環参照**: `device_manager` と `config` 間の循環参照に注意 +4. **JSON制限**: JSON にシリアライズ可能な型のみ保存可能 + +## ライセンス +プロジェクトのルートディレクトリの `LICENSE` ファイルを参照 + +## 関連ドキュメント +- `controller.md`: Controller クラスの設定使用方法 +- `mainloop.md`: メインループでの設定参照 +- `仕様書.md`: 全体仕様 +- `設計書.md`: システム設計 + +## 変更履歴 + +### v3.3.0 +- 現行バージョン +- WebSocket サーバー設定追加 +- オーバーレイ設定の拡張 diff --git a/src-python/docs/controller.md b/src-python/docs/controller.md new file mode 100644 index 00000000..d27ebb08 --- /dev/null +++ b/src-python/docs/controller.md @@ -0,0 +1,1225 @@ +# controller.py 設計書 + +## 概要 + +`controller.py` は VRCT アプリケーションのビジネスロジック層であり、フロントエンド(UI)とバックエンド(Model)の間の制御フローを担当する。音声認識、翻訳、OSC通信、オーバーレイ表示など、VRCT の全機能の調整役として動作し、各種設定の取得・更新、デバイス管理、エラーハンドリングを提供する。 + +## アーキテクチャ上の位置づけ + +``` +┌─────────────┐ +│ Frontend │ (Tauri/React) +│ (UI Layer) │ +└──────┬──────┘ + │ JSON-RPC (stdin/stdout) +┌──────▼──────┐ +│ mainloop.py │ (Communication Layer) +└──────┬──────┘ + │ Function Calls +┌──────▼──────┐ +│controller.py│ ◄── このファイル +└──────┬──────┘ + │ Facade Pattern +┌──────▼──────┐ +│ model.py │ (Business Logic Facade) +└──────┬──────┘ + │ +┌──────▼──────┐ +│ Subsystems │ (transcription, translation, osc, overlay, etc.) +└─────────────┘ +``` + +## 主要コンポーネント + +### 1. Controllerクラス + +#### コンストラクタ `__init__()` + +**責務:** Controller インスタンスの初期化と依存関係のセットアップ + +**初期化処理:** +1. **マッピング辞書の初期化:** + - `init_mapping`: 初期化時に実行するエンドポイント群 + - `run_mapping`: フロントエンドへの通知用エンドポイント +2. **コールバック関数の設定:** + - `run`: フロントエンドへの通知を送信する関数(デフォルトは no-op) +3. **Model の初期化:** + - `model.init()` を呼び出し、サブシステムを準備 + - 失敗時は `errorLogging()` でログ記録して継続 +4. **デバイスアクセス状態:** + - `device_access_status`: デバイスへの排他アクセス制御用フラグ + +**型ヒント:** +```python +self.init_mapping: dict +self.run_mapping: dict +self.run: Callable[[int, str, Any], None] +self.device_access_status: bool +``` + +#### セットアップメソッド + +##### `setInitMapping(init_mapping: dict) -> None` +初期化時に実行するエンドポイントマッピングを設定。`mainloop.py` から呼び出される。 + +##### `setRunMapping(run_mapping: dict) -> None` +フロントエンド通知用のエンドポイントマッピングを設定。 + +##### `setRun(run: Callable[[int, str, Any], None]) -> None` +フロントエンドへの通知関数を設定。`mainloop.py` の `printResponse()` ラッパーが渡される。 + +#### ヘルパーメソッド + +##### `_is_overlay_available() -> bool` +オーバーレイ機能が利用可能かを安全にチェック。Model が未初期化の場合の `AttributeError` を回避。 + +**実装:** +```python +try: + overlay = getattr(model, "overlay", None) + return overlay is not None and getattr(overlay, "initialized", False) +except Exception: + errorLogging() + return False +``` + +--- + +### 2. 通知メソッド(Response Functions) + +フロントエンドに状態変化を通知するメソッド群。すべて `self.run()` を介して JSON を stdout に送信。 + +#### ネットワーク関連 + +##### `connectedNetwork() -> None` +ネットワーク接続を検出したことを通知。 + +##### `disconnectedNetwork() -> None` +ネットワーク切断を検出したことを通知。 + +#### AI モデル関連 + +##### `enableAiModels() -> None` +AI モデル(CTranslate2/Whisper)が利用可能であることを通知。 + +##### `disableAiModels() -> None` +AI モデルが利用不可(ダウンロード失敗等)であることを通知。 + +#### デバイス管理関連 + +##### `updateMicHostList() -> None` +マイクホスト一覧(MME/WASAPI等)を更新。 + +##### `updateMicDeviceList() -> None` +マイクデバイス一覧を更新。 + +##### `updateSpeakerDeviceList() -> None` +スピーカーデバイス一覧を更新。 + +##### `updateSelectedMicDevice(host: str, device: str) -> None` +選択されたマイクデバイスを通知。自動デバイス選択時に使用。 + +##### `updateSelectedSpeakerDevice(device: str) -> None` +選択されたスピーカーデバイスを通知。 + +#### エネルギーレベル通知 + +##### `progressBarMicEnergy(energy: Union[bool, int]) -> None` +マイクの音量レベルを通知。`False` の場合はデバイスエラーを送信。 + +##### `progressBarSpeakerEnergy(energy: Union[bool, int]) -> None` +スピーカーの音量レベルを通知。 + +#### 設定同期 + +##### `updateConfigSettings() -> None` +初期化完了時に全設定値をフロントエンドに送信。`init_mapping` の全エンドポイントを実行。 + +--- + +### 3. デバイス制御メソッド + +#### 再起動系 + +##### `restartAccessMicDevices() -> None` +マイクアクセスを再起動。以下の条件で各機能を開始: +- `config.ENABLE_TRANSCRIPTION_SEND` が True: 音声認識開始 +- `config.ENABLE_CHECK_ENERGY_SEND` が True: 音量監視開始 + +##### `restartAccessSpeakerDevices() -> None` +スピーカーアクセスを再起動。 + +#### 停止系 + +##### `stopAccessMicDevices() -> None` +マイク関連機能を停止。 + +##### `stopAccessSpeakerDevices() -> None` +スピーカー関連機能を停止。 + +**使用場面:** +- デバイス変更時 +- 自動デバイス選択によるデバイス切り替え時 +- アプリケーション終了時 + +--- + +### 4. メッセージ処理メソッド + +#### `micMessage(result: dict) -> None` + +**責務:** マイク音声認識結果の処理と配信 + +**処理フロー:** +1. **結果の検証:** + - `result["text"]` と `result["language"]` を取得 + - `False` の場合はデバイスエラーを通知して終了 +2. **フィルタリング:** + - `model.checkKeywords()`: 禁止ワードチェック + - `model.detectRepeatSendMessage()`: 重複メッセージチェック +3. **翻訳処理:** + - `config.ENABLE_TRANSLATION` が True の場合: + - `model.getInputTranslate()` で翻訳実行 + - 翻訳エンジンエラー時は CTranslate2 に切り替え + - VRAM不足エラー時は翻訳機能を無効化 +4. **音訳処理:** + - `config.CONVERT_MESSAGE_TO_HIRAGANA/ROMAJI` が True の場合: + - `model.convertMessageToTransliteration()` で変換 +5. **配信処理:** + - **VRChat OSC:** `config.SEND_MESSAGE_TO_VRC` が True の場合 + - `messageFormatter()` でフォーマット + - `model.oscSendMessage()` で送信 + - **UI通知:** `self.run()` で transcription_mic エンドポイントに通知 + - **オーバーレイ:** `config.OVERLAY_LARGE_LOG` が True の場合 + - `model.createOverlayImageLargeLog()` で画像生成 + - `model.updateOverlayLargeLog()` で表示更新 + - **WebSocket:** サーバーが起動中の場合 + - `model.websocketSendMessage()` でブロードキャスト + - **ログファイル:** `config.LOGGER_FEATURE` が True の場合 + +**VRAM エラーハンドリング:** +```python +try: + translation, success = model.getInputTranslate(message, source_language=language) +except Exception as e: + is_vram_error, error_message = model.detectVRAMError(e) + if is_vram_error: + # 翻訳機能を無効化 + self.setDisableTranslation() + self.run(400, self.run_mapping["error_translation_mic_vram_overflow"], {...}) + return +``` + +#### `speakerMessage(result: dict) -> None` + +**責務:** スピーカー音声認識結果の処理と配信 + +**処理フロー:** `micMessage()` と同様だが、以下の違いがある: +- **オーバーレイ:** + - Small Log: 受信メッセージ用の小さなログウィンドウ + - Large Log: 送受信両方を表示するログウィンドウ +- **OSC送信:** `config.SEND_RECEIVED_MESSAGE_TO_VRC` の設定に依存 +- **翻訳:** `model.getOutputTranslate()` を使用(受信メッセージ用) + +#### `chatMessage(data: dict) -> dict` + +**責務:** UI のチャットボックスからのメッセージ処理 + +**パラメータ:** +- `data["id"]`: メッセージ ID(UI でのレスポンスマッピング用) +- `data["message"]`: 送信メッセージ + +**特殊処理:** +- **除外ワード処理:** + - `config.USE_EXCLUDE_WORDS` が True の場合 + - `replaceExclamationsWithRandom()`: `![word]` を一時的なトークンに置換 + - 翻訳後に `restoreText()` で復元 + - 最終メッセージから `![...]` を削除 +- **同期レスポンス:** + - 他のメッセージ処理と異なり、結果を `dict` で返却 + - UI が翻訳結果を待機する必要があるため + +**レスポンス形式:** +```python +{ + "status": 200, + "result": { + "id": "msg-123", + "original": { + "message": "Hello", + "transliteration": ["he", "ro"] + }, + "translations": [ + { + "message": "こんにちは", + "transliteration": ["ko", "n", "ni", "chi", "wa"] + } + ] + } +} +``` + +--- + +### 5. メッセージフォーマット + +#### `messageFormatter(format_type: str, translation: list, message: str) -> str` + +**責務:** OSC 送信用メッセージの整形 + +**パラメータ:** +- `format_type`: "SEND" または "RECEIVED" +- `translation`: 翻訳結果のリスト +- `message`: 元のメッセージ + +**処理ロジック:** +1. フォーマット設定を取得: + - `config.SEND_MESSAGE_FORMAT_PARTS` または `config.RECEIVED_MESSAGE_FORMAT_PARTS` +2. 各部分を構築: + - `message_part`: prefix + message + suffix + - `translation_part`: prefix + separator.join(translation) + suffix +3. 組み合わせ: + - 両方存在: `translation_first` の設定に応じて順序決定 + - 翻訳のみ: translation_part のみ + - メッセージのみ: message_part のみ + +**設定例:** +```python +config.SEND_MESSAGE_FORMAT_PARTS = { + "message": {"prefix": "[", "suffix": "] "}, + "translation": {"prefix": "", "suffix": "", "separator": " / "}, + "translation_first": False, + "separator": "" +} +# 出力例: [Hello] こんにちは / 你好 +``` + +--- + +### 6. 除外ワード処理 + +#### `replaceExclamationsWithRandom(text: str) -> Tuple[str, dict]` + +**責務:** 翻訳対象外の単語を保護 + +**処理:** +1. `![word]` パターンを検出 +2. 各マッチを `$` に置換(4096から連番) +3. 置換マップを辞書で返却 + +**用途:** 固有名詞や翻訳不要な単語を保護 + +#### `restoreText(escaped_text: str, escape_dict: dict) -> str` + +**責務:** 翻訳後のテキストに元の単語を復元 + +**処理:** 正規表現で `$` を検出し、元の単語に置換(大文字小文字を無視) + +#### `removeExclamations(text: str) -> str` + +**責務:** 最終メッセージから `![...]` マーカーを削除 + +**処理:** `![word]` を `word` に置換 + +--- + +### 7. 設定取得・更新メソッド(GET/SET) + +Controller には約200個の設定項目に対する getter/setter が定義されている。以下、代表的なパターンを示す。 + +#### パターン1: 単純な設定値 + +```python +@staticmethod +def getTransparency(*args, **kwargs) -> dict: + return {"status": 200, "result": config.TRANSPARENCY} + +@staticmethod +def setTransparency(data, *args, **kwargs) -> dict: + config.TRANSPARENCY = int(data) + return {"status": 200, "result": config.TRANSPARENCY} +``` + +#### パターン2: 有効/無効の切り替え + +```python +@staticmethod +def getOverlaySmallLog(*args, **kwargs) -> dict: + return {"status": 200, "result": config.OVERLAY_SMALL_LOG} + +@staticmethod +def setEnableOverlaySmallLog(*args, **kwargs) -> dict: + if config.OVERLAY_SMALL_LOG is False: + if config.OVERLAY_LARGE_LOG is False: + model.startOverlay() # 副作用: オーバーレイシステムを起動 + config.OVERLAY_SMALL_LOG = True + return {"status": 200, "result": config.OVERLAY_SMALL_LOG} + +@staticmethod +def setDisableOverlaySmallLog(*args, **kwargs) -> dict: + if config.OVERLAY_SMALL_LOG is True: + model.clearOverlayImageSmallLog() + if config.OVERLAY_LARGE_LOG is False: + model.shutdownOverlay() # 副作用: オーバーレイシステムを停止 + config.OVERLAY_SMALL_LOG = False + return {"status": 200, "result": config.OVERLAY_SMALL_LOG} +``` + +#### パターン3: バリデーション付き設定 + +```python +@staticmethod +def setMicThreshold(data, *args, **kwargs) -> dict: + try: + data = int(data) + if 0 <= data <= config.MAX_MIC_THRESHOLD: + config.MIC_THRESHOLD = data + status = 200 + else: + raise ValueError() + except Exception: + response = { + "status": 400, + "result": { + "message": "Mic energy threshold value is out of range", + "data": config.MIC_THRESHOLD + } + } + else: + response = {"status": status, "result": config.MIC_THRESHOLD} + return response +``` + +#### パターン4: 依存関係のある設定 + +```python +def setSelectedTranslationComputeDevice(self, device: str, *args, **kwargs) -> dict: + config.SELECTED_TRANSLATION_COMPUTE_DEVICE = device + config.SELECTED_TRANSLATION_COMPUTE_TYPE = "auto" + # 依存する設定を自動更新 + self.run(200, self.run_mapping["selected_translation_compute_type"], + config.SELECTED_TRANSLATION_COMPUTE_TYPE) + # モデルの再読み込みフラグを設定 + model.setChangedTranslatorParameters(True) + return {"status": 200, "result": config.SELECTED_TRANSLATION_COMPUTE_DEVICE} +``` + +--- + +### 8. 翻訳機能制御 + +#### `setEnableTranslation(*args, **kwargs) -> dict` + +**責務:** 翻訳機能の有効化とモデルのロード + +**処理フロー:** +1. 既に有効な場合は何もしない +2. モデル未ロードまたはパラメータ変更時: + - `model.changeTranslatorCTranslate2Model()` でモデルをロード + - VRAM不足エラーの場合: + - デフォルト設定に戻す + - エラー通知を送信 + - 翻訳を無効化 +3. `config.ENABLE_TRANSLATION = True` に設定 + +**エラーハンドリング:** +```python +try: + model.changeTranslatorCTranslate2Model() +except Exception as e: + is_vram_error, error_message = model.detectVRAMError(e) + if is_vram_error: + self.run(400, self.run_mapping["error_translation_enable_vram_overflow"], {...}) + self.setDisableTranslation() +``` + +#### `setDisableTranslation(*args, **kwargs) -> dict` + +**責務:** 翻訳機能の無効化(メモリ解放) + +#### `changeToCTranslate2Process() -> None` + +**責務:** 外部翻訳APIエラー時に CTranslate2 へ切り替え + +**処理:** +1. 現在の翻訳エンジンを無効化 +2. CTranslate2 に切り替え +3. フロントエンドに通知 + +--- + +### 9. 音声認識制御 + +#### スレッド管理メソッド + +##### `startTranscriptionSendMessage() -> None` +マイク音声認識を開始。デバイスアクセスの排他制御を行う。 + +**排他制御:** +```python +while self.device_access_status is False: + sleep(1) # 他の処理がデバイスを使用中なら待機 +self.device_access_status = False # ロック取得 +try: + model.startMicTranscript(self.micMessage) +finally: + self.device_access_status = True # ロック解放 +``` + +**VRAMエラーハンドリング:** +- `model.detectVRAMError()` でエラーを検出 +- 音声認識を停止 +- フロントエンドに通知 + +##### `stopTranscriptionSendMessage() -> None` +マイク音声認識を停止。 + +##### `startThreadingTranscriptionSendMessage() -> None` +別スレッドで音声認識を開始。 + +##### `stopThreadingTranscriptionSendMessage() -> None` +別スレッドで音声認識を停止し、完了を待機(`join()`)。 + +**対応するスピーカー用メソッド:** +- `startTranscriptionReceiveMessage()` +- `stopTranscriptionReceiveMessage()` +- `startThreadingTranscriptionReceiveMessage()` +- `stopThreadingTranscriptionReceiveMessage()` + +--- + +### 10. エネルギー監視 + +#### `startCheckMicEnergy() -> None` +マイクの音量レベル監視を開始。`progressBarMicEnergy()` をコールバックとして渡す。 + +#### `stopCheckMicEnergy() -> None` +マイクの音量レベル監視を停止。 + +#### `startThreadingCheckMicEnergy() -> None` +別スレッドでエネルギー監視を開始。 + +#### `stopThreadingCheckMicEnergy() -> None` +別スレッドでエネルギー監視を停止し、完了を待機。 + +**対応するスピーカー用メソッド:** +- `startCheckSpeakerEnergy()` +- `stopCheckSpeakerEnergy()` +- `startThreadingCheckSpeakerEnergy()` +- `stopThreadingCheckSpeakerEnergy()` + +--- + +### 11. モデルウェイト管理 + +#### DownloadCTranslate2 クラス + +**責務:** CTranslate2 モデルのダウンロード進捗管理 + +**メソッド:** +- `progressBar(progress: float)`: 進捗率をフロントエンドに通知 +- `downloaded()`: ダウンロード完了時の処理 + - モデルの存在確認 + - 選択可能モデルリストに追加 + - フロントエンドに通知 + +#### DownloadWhisper クラス + +**責務:** Whisper モデルのダウンロード進捗管理(CTranslate2 と同様の構造) + +#### `downloadCtranslate2Weight(data: str, asynchronous: bool = True, *args, **kwargs) -> dict` + +**責務:** CTranslate2 モデルのダウンロード開始 + +**パラメータ:** +- `data`: モデルタイプ("tiny", "small", "medium" 等) +- `asynchronous`: 非同期ダウンロードの有効化 + +**処理:** +1. `DownloadCTranslate2` インスタンスを作成 +2. `asynchronous` が True の場合: + - `startThreadingDownloadCtranslate2Weight()` で別スレッド実行 +3. `asynchronous` が False の場合: + - `model.downloadCTranslate2ModelWeight()` で同期実行(初期化時に使用) +4. トークナイザーのダウンロード + +#### `downloadWhisperWeight(data: str, asynchronous: bool = True, *args, **kwargs) -> dict` + +**責務:** Whisper モデルのダウンロード開始(CTranslate2 と同様の構造) + +--- + +### 12. 自動デバイス選択 + +#### `applyAutoMicSelect() -> None` + +**責務:** マイクの自動選択機能を適用 + +**処理:** +1. コールバック設定: + - `device_manager.setCallbackProcessBeforeUpdateMicDevices(self.stopAccessMicDevices)` + - `device_manager.setCallbackDefaultMicDevice(self.updateSelectedMicDevice)` + - `device_manager.setCallbackProcessAfterUpdateMicDevices(self.restartAccessMicDevices)` +2. デバイス更新を強制実行: `device_manager.forceUpdateAndSetMicDevices()` +3. 監視開始: `device_manager.startMonitoring()` + +**動作フロー:** +``` +デバイス変更検出 + ↓ +stopAccessMicDevices() ← デバイス使用中の処理を停止 + ↓ +updateSelectedMicDevice() ← 新しいデフォルトデバイスを選択 + ↓ +restartAccessMicDevices() ← 新しいデバイスで処理を再開 +``` + +#### `setEnableAutoMicSelect(*args, **kwargs) -> dict` +自動マイク選択を有効化。 + +#### `setDisableAutoMicSelect(*args, **kwargs) -> dict` +自動マイク選択を無効化。両方の自動選択が無効になった場合のみ監視を停止。 + +**対応するスピーカー用メソッド:** +- `applyAutoSpeakerSelect()` +- `setEnableAutoSpeakerSelect()` +- `setDisableAutoSpeakerSelect()` + +--- + +### 13. 言語・翻訳エンジン管理 + +#### `updateTranslationEngineAndEngineList() -> None` + +**責務:** 選択された言語に応じて利用可能な翻訳エンジンを更新 + +**処理:** +1. 現在のタブの選択エンジンを取得 +2. `getTranslationEngines()` で利用可能なエンジンリストを取得 +3. 選択中のエンジンが利用不可の場合、CTranslate2 にフォールバック +4. **特殊ケース:** 入力言語と出力言語が同一の場合: + - CTranslate2 のみ利用可能(音訳のみ) +5. フロントエンドに通知 + +#### `getTranslationEngines(*args, **kwargs) -> dict` + +**責務:** 現在の言語設定で利用可能な翻訳エンジンを返却 + +**ロジック:** +1. `model.findTranslationEngines()` で言語ペアをサポートするエンジンを検索 +2. 入力言語と出力言語が同一の場合: + - CTranslate2 が有効なら ["CTranslate2"] + - それ以外は [] + +#### `setSelectedYourLanguages(select: dict, *args, **kwargs) -> dict` +入力言語を設定し、`updateTranslationEngineAndEngineList()` を呼び出す。 + +#### `setSelectedTargetLanguages(select: dict, *args, **kwargs) -> dict` +出力言語を設定し、`updateTranslationEngineAndEngineList()` を呼び出す。 + +#### `swapYourLanguageAndTargetLanguage(*args, **kwargs) -> dict` + +**責務:** 入力言語と出力言語を入れ替え + +**処理:** +1. 現在のタブの入力言語と出力言語(最初の1つ)を取得 +2. 相互に入れ替え +3. `setSelectedYourLanguages()` と `setSelectedTargetLanguages()` を呼び出し +4. 両方の結果を返却 + +--- + +### 14. 音声認識エンジン管理 + +#### `updateTranscriptionEngine() -> None` + +**責務:** Whisper モデルの利用可能状況に応じて音声認識エンジンを更新 + +**処理:** +1. 現在選択されている Whisper モデルの存在確認 +2. 利用可能なエンジンリストを取得 +3. 現在のエンジンが利用不可の場合: + - Whisper ⇔ Google で切り替え + - どちらも利用不可なら Whisper にフォールバック + +#### `updateDownloadedWhisperModelWeight() -> None` + +**責務:** ダウンロード済み Whisper モデルの一覧を更新 + +**処理:** +全てのモデルタイプについて `model.checkTranscriptionWhisperModelWeight()` で存在確認。 + +--- + +### 15. OSC 通信制御 + +#### `setOscIpAddress(data, *args, **kwargs) -> dict` + +**責務:** VRChat への送信先 IP アドレスを設定 + +**処理:** +1. `isValidIpAddress()` でバリデーション +2. `model.setOscIpAddress()` で設定を適用 +3. OSC Query の状態に応じて再初期化: + - 有効な場合: `enableOscQuery()` を呼び出し + - 無効な場合: `disableOscQuery()` を呼び出し + - マイクミュート同期が有効だった場合は無効化して通知 + +**エラーハンドリング:** +- IP アドレスが無効: status 400 +- 設定適用失敗: 元の IP に戻して status 400 + +#### `setOscPort(data, *args, **kwargs) -> dict` +OSC ポート番号を設定。 + +#### `enableOscQuery() -> None` +OSC Query 機能が有効になったことをフロントエンドに通知。 + +#### `disableOscQuery(mute_sync_info: bool = False) -> None` +OSC Query 機能が無効になったことを通知。無効化された機能リストも送信。 + +--- + +### 16. DeepL API 認証 + +#### `setDeeplAuthKey(data, *args, **kwargs) -> dict` + +**責務:** DeepL API キーを設定し、認証を実行 + +**処理:** +1. キー長のバリデーション(36 または 39 文字) +2. `model.authenticationTranslatorDeepLAuthKey()` で認証 +3. 認証成功時: + - `config.AUTH_KEYS["DeepL_API"]` に保存 + - `config.SELECTABLE_TRANSLATION_ENGINE_STATUS["DeepL_API"]` を True に + - `updateTranslationEngineAndEngineList()` を呼び出し +4. 認証失敗時: status 400 を返却 + +#### `delDeeplAuthKey(*args, **kwargs) -> dict` + +**責務:** DeepL API キーを削除 + +**処理:** +1. `config.AUTH_KEYS["DeepL_API"]` を None に +2. `config.SELECTABLE_TRANSLATION_ENGINE_STATUS["DeepL_API"]` を False に +3. `updateTranslationEngineAndEngineList()` を呼び出し + +--- + +### 17. WebSocket サーバー制御 + +#### `setWebSocketHost(data, *args, **kwargs) -> dict` + +**責務:** WebSocket サーバーのホストアドレスを変更 + +**処理:** +1. `isValidIpAddress()` でバリデーション +2. サーバーが停止中の場合: + - 設定のみ変更 +3. サーバーが起動中の場合: + - 新しいホストが利用可能か確認(`isAvailableWebSocketServer()`) + - サーバーを停止 → 再起動 + - 利用不可の場合は status 400 + +#### `setWebSocketPort(data, *args, **kwargs) -> dict` +WebSocket サーバーのポート番号を変更(ロジックは `setWebSocketHost()` と同様)。 + +#### `setEnableWebSocketServer(*args, **kwargs) -> dict` + +**責務:** WebSocket サーバーを起動 + +**処理:** +1. 既に起動中なら何もしない +2. ホストとポートが利用可能か確認 +3. `model.startWebSocketServer()` で起動 +4. 利用不可の場合は status 400 + +#### `setDisableWebSocketServer(*args, **kwargs) -> dict` +WebSocket サーバーを停止。 + +--- + +### 18. VRChat マイクミュート同期 + +#### `setEnableVrcMicMuteSync(*args, **kwargs) -> dict` + +**責務:** VRChat のマイクミュート状態と音声認識の連動を有効化 + +**前提条件:** OSC Query が有効であること + +**処理:** +1. OSC Query が無効の場合は status 400 を返却 +2. `model.setMuteSelfStatus()`: 現在のミュート状態を取得 +3. `model.changeMicTranscriptStatus()`: ミュート状態に応じて音声認識を制御 +4. `config.VRC_MIC_MUTE_SYNC = True` + +#### `setDisableVrcMicMuteSync(*args, **kwargs) -> dict` +マイクミュート同期を無効化し、`model.changeMicTranscriptStatus()` を呼び出す。 + +--- + +### 19. Watchdog 管理 + +Watchdog は UI とバックエンド間の通信監視機能。UI からの定期的な "feed" 信号がない場合、バックエンドを強制終了する。 + +#### `startWatchdog(*args, **kwargs) -> dict` +Watchdog を起動。 + +#### `feedWatchdog(*args, **kwargs) -> dict` +Watchdog にハートビート信号を送信(UI が定期的に呼び出す)。 + +#### `setWatchdogCallback(callback) -> dict` +Watchdog タイムアウト時に呼び出すコールバック関数を設定。`mainloop.stop()` が渡される。 + +#### `stopWatchdog(*args, **kwargs) -> dict` +Watchdog を停止。 + +--- + +### 20. ソフトウェアアップデート + +#### `checkSoftwareUpdated() -> dict` + +**責務:** 最新バージョンの確認 + +**処理:** +1. `model.checkSoftwareUpdated()` でバージョン情報を取得 +2. フロントエンドに通知(`software_update_info` エンドポイント) +3. 結果を返却 + +**バージョン情報形式:** +```python +{ + "current_version": "1.2.3", + "latest_version": "1.2.4", + "update_available": True, + "download_url": "https://..." +} +``` + +#### `updateSoftware(*args, **kwargs) -> dict` + +**責務:** 通常版のアップデートを実行 + +**処理:** +1. 別スレッドで `model.updateSoftware()` を起動(ブロッキングを避けるため) +2. 即座に status 200 を返却 + +#### `updateCudaSoftware(*args, **kwargs) -> dict` + +**責務:** CUDA版のアップデートを実行 + +**処理:** `updateSoftware()` と同様だが、`model.updateCudaSoftware()` を呼び出す。 + +--- + +### 21. 初期化処理 + +#### `init(*args, **kwargs) -> None` + +**責務:** アプリケーションの完全な初期化 + +**処理フロー:** + +**1. ログのクリア** +```python +removeLog() +printLog("Start Initialization") +``` + +**2. ネットワーク接続確認** +```python +connected_network = isConnectedNetwork() +if connected_network: + self.connectedNetwork() +else: + self.disconnectedNetwork() +``` + +**3. モデルウェイトのダウンロード(進捗1/4)** +```python +self.initializationProgress(1) +if connected_network: + # CTranslate2 と Whisper を並列ダウンロード + th_download_ctranslate2 = Thread(target=self.downloadCtranslate2Weight, args=(weight_type, False)) + th_download_whisper = Thread(target=self.downloadWhisperWeight, args=(weight_type, False)) + th_download_ctranslate2.start() + th_download_whisper.start() + th_download_ctranslate2.join() + th_download_whisper.join() +``` + +**4. AI モデル状態の確認** +```python +if (model.checkTranslatorCTranslate2ModelWeight(...) is False or + model.checkTranscriptionWhisperModelWeight(...) is False): + self.disableAiModels() +else: + self.enableAiModels() +``` + +**5. 翻訳・音声認識エンジンの初期化(進捗2/4)** +```python +self.initializationProgress(2) +# 翻訳エンジン +for engine in config.SELECTABLE_TRANSLATION_ENGINE_LIST: + match engine: + case "CTranslate2": + # モデルウェイトの存在確認 + case "DeepL_API": + # API キーの認証 + case _: + # ネットワーク接続が必要なエンジン + +# 音声認識エンジン +for engine in config.SELECTABLE_TRANSCRIPTION_ENGINE_LIST: + # 同様のロジック +``` + +**6. エンジンと音訳の設定(進捗3/4)** +```python +self.updateDownloadedCTranslate2ModelWeight() +self.updateTranslationEngineAndEngineList() +self.updateDownloadedWhisperModelWeight() +self.updateTranscriptionEngine() + +if config.CONVERT_MESSAGE_TO_ROMAJI or config.CONVERT_MESSAGE_TO_HIRAGANA: + model.startTransliteration() +``` + +**7. 周辺機能の初期化(進捗4/4)** +```python +self.initializationProgress(4) +model.addKeywords() # ワードフィルター +self.checkSoftwareUpdated() # バージョンチェック +if config.LOGGER_FEATURE: + model.startLogger() # ログ記録 +model.startReceiveOSC() # OSC 受信 + +# OSC Query +osc_query_enabled = model.getIsOscQueryEnabled() +if osc_query_enabled: + self.enableOscQuery() + if config.VRC_MIC_MUTE_SYNC: + self.setEnableVrcMicMuteSync() +else: + # マイクミュート同期を無効化 + self.disableOscQuery(...) +``` + +**8. デバイス管理の初期化** +```python +device_manager.setCallbackHostList(self.updateMicHostList) +device_manager.setCallbackMicDeviceList(self.updateMicDeviceList) +device_manager.setCallbackSpeakerDeviceList(self.updateSpeakerDeviceList) + +if config.AUTO_MIC_SELECT: + self.applyAutoMicSelect() +if config.AUTO_SPEAKER_SELECT: + self.applyAutoSpeakerSelect() +``` + +**9. オーバーレイと WebSocket の起動** +```python +if config.OVERLAY_SMALL_LOG or config.OVERLAY_LARGE_LOG: + model.startOverlay() + +if config.WEBSOCKET_SERVER: + if isAvailableWebSocketServer(...): + model.startWebSocketServer(...) +``` + +**10. 設定の同期と完了** +```python +self.updateConfigSettings() # 全設定をフロントエンドに送信 +printLog("End Initialization") +self.startWatchdog() # 監視開始 +``` + +--- + +## エラーハンドリング戦略 + +### 1. VRAM不足エラー + +**検出箇所:** +- 翻訳実行時(`micMessage()`, `speakerMessage()`, `chatMessage()`) +- 翻訳機能有効化時(`setEnableTranslation()`) +- 音声認識開始時(`startTranscriptionSendMessage()`, `startTranscriptionReceiveMessage()`) + +**処理:** +1. `model.detectVRAMError(e)` で VRAM エラーを検出 +2. 該当機能を無効化 +3. フロントエンドにエラー通知 +4. ログファイルに記録 + +**自動リカバリ:** +- 翻訳機能: 無効化して継続 +- 音声認識: 停止して継続 + +### 2. デバイスアクセスエラー + +**検出箇所:** +- マイク・スピーカーのアクセス時 + +**処理:** +1. `energy` が `False` の場合 +2. `error_device` エンドポイントにエラー通知 +3. 処理を継続(他の機能は影響を受けない) + +### 3. ネットワークエラー + +**検出箇所:** +- 翻訳APIの呼び出し時 +- モデルウェイトのダウンロード時 + +**処理:** +1. 外部API エラー: `changeToCTranslate2Process()` で CTranslate2 に切り替え +2. ダウンロードエラー: エラー通知を送信、AI機能を無効化 + +### 4. 設定バリデーションエラー + +**処理:** +- status 400 とエラーメッセージを返却 +- 現在の有効な設定値を `data` フィールドに含める + +**例:** +```python +{ + "status": 400, + "result": { + "message": "Mic energy threshold value is out of range", + "data": 1000 # 現在の有効な値 + } +} +``` + +--- + +## スレッド安全性 + +### 排他制御 + +#### デバイスアクセス制御 + +**問題:** 複数の機能が同時にデバイスにアクセスすると衝突 + +**解決策:** `device_access_status` フラグによる排他制御 +```python +while self.device_access_status is False: + sleep(1) # 待機 +self.device_access_status = False # ロック取得 +try: + # デバイスアクセス処理 +finally: + self.device_access_status = True # ロック解放 +``` + +**使用箇所:** +- `startTranscriptionSendMessage()` +- `startTranscriptionReceiveMessage()` +- `startCheckMicEnergy()` +- `startCheckSpeakerEnergy()` + +### デーモンスレッド + +**すべてのワーカースレッドは `daemon = True`:** +- メインスレッド終了時に自動的に終了 +- 明示的な join は必要に応じて実行(停止処理等) + +**例:** +```python +th_startTranscriptionSendMessage = Thread(target=self.startTranscriptionSendMessage) +th_startTranscriptionSendMessage.daemon = True +th_startTranscriptionSendMessage.start() +``` + +--- + +## パフォーマンス考慮事項 + +### 1. 非同期ダウンロード + +**初期化時:** 同期ダウンロード(`asynchronous=False`) +- UI をブロックして確実にダウンロード完了を待つ + +**ユーザー操作時:** 非同期ダウンロード(`asynchronous=True`) +- 別スレッドで実行し、進捗バーで通知 + +### 2. 並列初期化 + +CTranslate2 と Whisper のダウンロードを並列実行: +```python +th_download_ctranslate2.start() +th_download_whisper.start() +th_download_ctranslate2.join() +th_download_whisper.join() +``` + +### 3. モデルの遅延ロード + +翻訳モデルは `setEnableTranslation()` が呼ばれるまでロードされない。 + +--- + +## 依存関係 + +### 外部モジュール + +```python +from typing import Callable, Any, List, Optional +from time import sleep +from subprocess import Popen +from threading import Thread +import re +``` + +### 内部モジュール + +```python +from device_manager import device_manager +from config import config +from model import model +from utils import removeLog, printLog, errorLogging, isConnectedNetwork, isValidIpAddress, isAvailableWebSocketServer +``` + +--- + +## 設定項目の分類 + +### UI関連(約20項目) +- 透明度、スケーリング、フォント、言語、ウィンドウ位置等 + +### 音声認識関連(約30項目) +- デバイス選択、閾値、タイムアウト、フィルター等 + +### 翻訳関連(約25項目) +- エンジン選択、言語ペア、モデルタイプ、計算デバイス等 + +### OSC通信関連(約15項目) +- IP アドレス、ポート、メッセージフォーマット、送信設定等 + +### オーバーレイ関連(約10項目) +- 表示設定、位置、サイズ、透明度等 + +### その他(約20項目) +- WebSocket、ログ、ホットキー、プラグイン等 + +**合計:** 約120の設定項目(getter/setter で約240メソッド) + +--- + +## 制限事項 + +### 1. グローバル状態依存 + +すべての設定が `config` モジュールのグローバル変数として管理されている。 +- **利点:** シンプルなアクセス +- **欠点:** テスタビリティの低下、並列実行時の競合リスク + +### 2. 同期レスポンスの制限 + +ほとんどのメソッドが同期的にレスポンスを返すため、重い処理(モデルロード等)は UI をブロックする可能性がある。 + +**対策:** 重い処理は別スレッドで実行し、完了通知は `self.run()` で送信 + +### 3. エラー回復の限界 + +一部のエラー(VRAM不足等)は自動回復するが、設定ファイル破損やモデルファイル破損等は手動対処が必要。 + +--- + +## テストシナリオ + +### 1. 初期化テスト + +**ケース:** +- ネットワーク接続あり・なし +- モデルウェイトあり・なし +- 不正な設定値 + +**確認項目:** +- 全エンジンの状態が正しく設定されているか +- エラーがログに記録されているか +- フロントエンドに正しい初期設定が送信されているか + +### 2. 音声認識テスト + +**ケース:** +- デバイス切り替え中に音声認識 +- VRAM不足エラーの発生 +- 重複メッセージのフィルタリング + +**確認項目:** +- 排他制御が正しく動作しているか +- エラー発生時に適切にリカバリしているか + +### 3. 翻訳テスト + +**ケース:** +- 複数の翻訳エンジンの切り替え +- API制限エラー +- 除外ワードの処理 + +**確認項目:** +- エンジン切り替えが正しく動作するか +- 除外ワードが正しく復元されるか + +### 4. 設定変更テスト + +**ケース:** +- 無効な値の設定 +- 依存関係のある設定の変更 +- 有効/無効の切り替え + +**確認項目:** +- バリデーションが正しく動作するか +- 依存する設定が自動更新されるか + +--- + +## 今後の拡張性 + +### 1. 非同期化の推進 + +`asyncio` への移行で UI ブロッキングを完全に排除。 + +### 2. 依存性注入 + +`config` と `model` を DI コンテナで管理し、テスタビリティを向上。 + +### 3. イベント駆動アーキテクチャ + +設定変更時のイベントを発火し、各サブシステムが独立して反応。 + +### 4. エラーリカバリの強化 + +- 自動再試行メカニズム +- フォールバック設定の自動適用 +- エラー発生時の部分的な機能継続 + +--- + +## 関連ファイル + +- **mainloop.py** - 通信レイヤー、リクエストルーティング +- **model.py** - ビジネスロジックのファサード +- **config.py** - 設定管理 +- **device_manager.py** - デバイス監視・自動選択 +- **utils.py** - ログとユーティリティ関数 + +--- + +## コーディング規約 + +- **PEP 8 スタイルガイド** +- **型ヒント:** `typing` モジュールを使用 +- **Docstring:** Google スタイル(一部未実装) +- **静的メソッド:** 状態を持たないメソッドは `@staticmethod` +- **エラーハンドリング:** 防御的プログラミングを徹底 + +--- + +## まとめ + +`controller.py` は VRCT の中核となるビジネスロジック制御レイヤーであり、約120の設定項目と約200のエンドポイントを管理する。フロントエンドとバックエンドの橋渡しとして、設定の取得・更新、機能の有効化・無効化、エラーハンドリング、デバイス管理など、アプリケーション全体の動作を制御する。排他制御とスレッド管理により、複数の機能が同時に動作する環境でも安定性を保っている。VRAM不足エラーや外部APIエラーに対する自動リカバリ機能により、ユーザーエクスペリエンスの向上を実現している。 diff --git a/src-python/docs/device_manager.md b/src-python/docs/device_manager.md new file mode 100644 index 00000000..858acdc9 --- /dev/null +++ b/src-python/docs/device_manager.md @@ -0,0 +1,1427 @@ +# device_manager.py 設計書 + +## 概要 + +`device_manager.py` は VRCT アプリケーションの音声デバイス管理を担当するモジュールであり、マイクとスピーカーデバイスの検出、監視、自動選択機能を提供する。Windows の WASAPI や pycaw ライブラリを使用してリアルタイムなデバイス変更を検知し、登録されたコールバック関数を通じてアプリケーションに通知する。シングルトンパターンで実装され、遅延初期化により import 時のパフォーマンス低下を回避している。 + +## アーキテクチャ上の位置づけ + +``` +┌─────────────┐ +│controller.py│ (Business Logic Control Layer) +└──────┬──────┘ + │ Callback Registration & Query +┌──────▼──────────┐ +│device_manager.py│ ◄── このファイル +└──────┬──────────┘ + │ Device Monitoring & Enumeration +┌──────▼─────────────────────────────┐ +│ OS Audio Subsystems │ +│ - PyAudio (PortAudio wrapper) │ +│ - pyaudiowpatch (WASAPI loopback) │ +│ - pycaw (COM notifications) │ +│ - comtypes (COM initialization) │ +└────────────────────────────────────┘ +``` + +## 主要コンポーネント + +### 1. Client クラス + +**責務:** Windows の COM イベントコールバックを受け取り、デバイス変更を検知 + +**継承:** `pycaw.callbacks.MMNotificationClient` + +**設計パターン:** Observer パターンのコールバック実装 + +#### コンストラクタ `__init__()` + +**処理:** +```python +try: + super().__init__() +except Exception: + pass # 非 Windows 環境ではプレースホルダーオブジェクトのため例外を無視 +self.loop: bool = True +``` + +**`self.loop` フラグ:** +- True: デバイス変更なし、監視継続 +- False: デバイス変更検知、監視ループを中断 + +#### イベントハンドラー + +##### `on_default_device_changed(*args, **kwargs) -> None` +デフォルトデバイスが変更された時に Windows から呼び出される。 + +##### `on_device_added(*args, **kwargs) -> None` +新しいデバイスが接続された時に呼び出される。 + +##### `on_device_removed(*args, **kwargs) -> None` +デバイスが取り外された時に呼び出される。 + +##### `on_device_state_changed(*args, **kwargs) -> None` +デバイスの状態(有効/無効/存在しない等)が変更された時に呼び出される。 + +**すべてのハンドラーの動作:** +```python +self.loop = False # 監視ループに変更を通知 +``` + +**コメントアウトされたメソッド:** +```python +# def on_property_value_changed(self, device_id, key): +# self.loop = False +``` +デバイスプロパティの変更イベント。使用しない理由は不明だが、頻繁なイベント発火によるパフォーマンス低下を避けるためと推測される。 + +--- + +### 2. DeviceManager クラス + +**責務:** アプリケーション全体のデバイス管理機能を提供 + +**パターン:** シングルトン(`__new__` で制御) + +**プラットフォーム対応:** +- Windows: 完全な機能サポート(COM イベント監視、WASAPI loopback) +- 非 Windows: グレースフルデグレード(デフォルト値を返却、監視機能は制限的) + +--- + +### 3. 初期化メソッド + +#### `__new__(cls) -> DeviceManager` + +**責務:** シングルトンインスタンスの生成と軽量な初期化 + +**処理フロー:** +1. **インスタンスチェック:** + ```python + if cls._instance is None: + cls._instance = super(DeviceManager, cls).__new__(cls) + ``` +2. **軽量な初期化:** + ```python + cls._instance._initialized = False + try: + cls._instance.init() + except Exception: + try: + errorLogging() + except Exception: + pass # import 時のクラッシュを絶対に避ける + ``` +3. **既存インスタンスの返却:** + ```python + return cls._instance + ``` + +**設計思想:** +- `__new__` では重い初期化を避ける(スレッド起動、OS API アクセスなし) +- `init()` を呼び出すが、監視スレッドは起動しない +- エラー時も必ずインスタンスを返却(防御的プログラミング) + +#### `init() -> None` + +**責務:** 内部状態の初期化とデバイス情報の初回取得 + +**処理フロー:** + +**1. 初期化済みチェック:** +```python +if getattr(self, "_initialized", False): + return # 既に初期化済みなら何もしない +``` + +**2. デバイス情報の初期化(デフォルト値):** +```python +self.mic_devices: Dict[str, List[Dict[str, Any]]] = { + "NoHost": [{"index": -1, "name": "NoDevice"}] +} +self.default_mic_device: Dict[str, Any] = { + "host": {"index": -1, "name": "NoHost"}, + "device": {"index": -1, "name": "NoDevice"} +} +self.speaker_devices: List[Dict[str, Any]] = [ + {"index": -1, "name": "NoDevice"} +] +self.default_speaker_device: Dict[str, Any] = { + "device": {"index": -1, "name": "NoDevice"} +} +``` + +**3. 前回状態のトラッカー:** +```python +self.prev_mic_host: List[str] = [host for host in self.mic_devices] +self.prev_mic_devices: Dict[str, List[Dict[str, Any]]] = self.mic_devices +self.prev_default_mic_device: Dict[str, Any] = self.default_mic_device +self.prev_speaker_devices: List[Dict[str, Any]] = self.speaker_devices +self.prev_default_speaker_device: Dict[str, Any] = self.default_speaker_device +``` + +**4. 更新フラグ:** +```python +self.update_flag_default_mic_device: bool = False +self.update_flag_default_speaker_device: bool = False +self.update_flag_host_list: bool = False +self.update_flag_mic_device_list: bool = False +self.update_flag_speaker_device_list: bool = False +``` + +**5. コールバック関数:** +```python +self.callback_default_mic_device: Optional[Callable[..., None]] = None +self.callback_default_speaker_device: Optional[Callable[..., None]] = None +self.callback_host_list: Optional[Callable[..., None]] = None +self.callback_mic_device_list: Optional[Callable[..., None]] = None +self.callback_speaker_device_list: Optional[Callable[..., None]] = None +self.callback_process_before_update_mic_devices: Optional[Callable[..., None]] = None +self.callback_process_after_update_mic_devices: Optional[Callable[..., None]] = None +self.callback_process_before_update_speaker_devices: Optional[Callable[..., None]] = None +self.callback_process_after_update_speaker_devices: Optional[Callable[..., None]] = None +``` + +**6. 監視制御:** +```python +self.monitoring_flag: bool = False +self.th_monitoring: Optional[Thread] = None +``` + +**7. 初期化完了フラグ:** +```python +self._initialized = True +``` + +**8. ベストエフォートのデバイス情報取得:** +```python +try: + if PyAudio is not None: + try: + self.update() # 実デバイス情報を取得 + except Exception: + errorLogging() +except Exception: + pass # 初期化失敗でもクラッシュしない +``` + +**設計思想:** +- すべての属性をデフォルト値で初期化(未初期化エラーを回避) +- `update()` の失敗は許容(デバイスがない環境でも動作) +- エラーは記録するが、例外を外部に投げない + +--- + +### 4. デバイス情報更新メソッド + +#### `update() -> None` + +**責務:** 現在の音声デバイス一覧とデフォルトデバイスを取得 + +**処理フロー:** + +**1. バッファの初期化:** +```python +buffer_mic_devices: Dict[str, List[Dict[str, Any]]] = {} +buffer_default_mic_device: Dict[str, Any] = { + "host": {"index": -1, "name": "NoHost"}, + "device": {"index": -1, "name": "NoDevice"} +} +buffer_speaker_devices: List[Dict[str, Any]] = [] +buffer_default_speaker_device: Dict[str, Any] = { + "device": {"index": -1, "name": "NoDevice"} +} +``` + +**2. PyAudio 可用性チェック:** +```python +if PyAudio is None: + # デフォルト値のまま終了 + self.mic_devices = buffer_mic_devices or {"NoHost": [{"index": -1, "name": "NoDevice"}]} + # ... 他のデバイス情報も設定 + return +``` + +**3. マイクデバイスの収集:** +```python +with PyAudio() as p: + for host_index in range(p.get_host_api_count()): + host = p.get_host_api_info_by_index(host_index) + device_count = host.get('deviceCount', 0) + for device_index in range(device_count): + device = p.get_device_info_by_host_api_device_index(host_index, device_index) + # 入力チャンネルがあり、ループバックではないデバイス + if device.get("maxInputChannels", 0) > 0 and not device.get("isLoopbackDevice", True): + buffer_mic_devices.setdefault(host["name"], []).append(device) +``` + +**ホスト API の例:** +- Windows: "MME", "Windows DirectSound", "Windows WASAPI" +- Linux: "ALSA", "PulseAudio" +- macOS: "Core Audio" + +**4. デフォルトマイクデバイスの取得:** +```python +api_info = p.get_default_host_api_info() +default_mic_device = api_info.get("defaultInputDevice", -1) + +for host_index in range(p.get_host_api_count()): + host = p.get_host_api_info_by_index(host_index) + device_count = host.get('deviceCount', 0) + for device_index in range(device_count): + device = p.get_device_info_by_host_api_device_index(host_index, device_index) + if device.get("index") == default_mic_device: + buffer_default_mic_device = {"host": host, "device": device} + break + else: + continue + break +``` + +**5. スピーカーループバックデバイスの収集:** +```python +speaker_devices: List[Dict[str, Any]] = [] +if paWASAPI is not None: + try: + wasapi_info = p.get_host_api_info_by_type(paWASAPI) + wasapi_name = wasapi_info.get("name") + for host_index in range(p.get_host_api_count()): + host = p.get_host_api_info_by_index(host_index) + if host.get("name") == wasapi_name: + device_count = host.get('deviceCount', 0) + for device_index in range(device_count): + device = p.get_device_info_by_host_api_device_index(host_index, device_index) + if not device.get("isLoopbackDevice", True): + # ループバックデバイスを検索 + for loopback in p.get_loopback_device_info_generator(): + if device.get("name") in loopback.get("name", ""): + speaker_devices.append(loopback) + except Exception: + pass # WASAPI が利用できない場合は無視 +``` + +**ループバックデバイスとは:** +- スピーカーから出力される音声を「録音」できる仮想デバイス +- "Stereo Mix" や "What U Hear" のような名前 +- VRChat の相手の音声を認識するために使用 + +**6. 重複排除とソート:** +```python +speaker_devices = [dict(t) for t in {tuple(d.items()) for d in speaker_devices}] or [{"index": -1, "name": "NoDevice"}] +buffer_speaker_devices = sorted(speaker_devices, key=lambda d: d.get('index', -1)) +``` + +**7. デフォルトスピーカーデバイスの取得:** +```python +if paWASAPI is not None: + try: + wasapi_info = p.get_host_api_info_by_type(paWASAPI) + default_speaker_device_index = wasapi_info.get("defaultOutputDevice", -1) + for host_index in range(p.get_host_api_count()): + host_info = p.get_host_api_info_by_index(host_index) + device_count = host_info.get('deviceCount', 0) + for device_index in range(0, device_count): + device = p.get_device_info_by_host_api_device_index(host_index, device_index) + if device.get("index") == default_speaker_device_index: + default_speakers = device + if not default_speakers.get("isLoopbackDevice", True): + for loopback in p.get_loopback_device_info_generator(): + if default_speakers.get("name") in loopback.get("name", ""): + buffer_default_speaker_device = {"device": loopback} + break + break + if buffer_default_speaker_device["device"].get("name") != "NoDevice": + break + except Exception: + pass +``` + +**8. エラーハンドリングと最終設定:** +```python +except Exception: + errorLogging() + +self.mic_devices = buffer_mic_devices +self.default_mic_device = buffer_default_mic_device +self.speaker_devices = buffer_speaker_devices +self.default_speaker_device = buffer_default_speaker_device +``` + +**デバイス情報の構造例:** +```python +# マイクデバイス +self.mic_devices = { + "Windows WASAPI": [ + {"index": 0, "name": "Microphone (Realtek)", "maxInputChannels": 2, ...}, + {"index": 3, "name": "Line In (USB Audio)", "maxInputChannels": 2, ...} + ], + "MME": [ + {"index": 10, "name": "マイク (Realtek)", "maxInputChannels": 2, ...} + ] +} + +# デフォルトマイクデバイス +self.default_mic_device = { + "host": {"index": 0, "name": "Windows WASAPI", ...}, + "device": {"index": 0, "name": "Microphone (Realtek)", ...} +} +``` + +--- + +### 5. 変更検出メソッド + +#### `checkUpdate() -> bool` + +**責務:** 前回取得したデバイス情報との差分を検出し、更新フラグを設定 + +**処理:** + +**1. デフォルトマイクデバイスの変更チェック:** +```python +if self.prev_default_mic_device["device"]["name"] != self.default_mic_device["device"]["name"]: + self.update_flag_default_mic_device = True + self.prev_default_mic_device = self.default_mic_device +``` + +**2. デフォルトスピーカーデバイスの変更チェック:** +```python +if self.prev_default_speaker_device["device"]["name"] != self.default_speaker_device["device"]["name"]: + self.update_flag_default_speaker_device = True + self.prev_default_speaker_device = self.default_speaker_device +``` + +**3. マイクホストリストの変更チェック:** +```python +if self.prev_mic_host != [host for host in self.mic_devices]: + self.update_flag_host_list = True + self.prev_mic_host = [host for host in self.mic_devices] +``` + +**4. マイクデバイスリストの変更チェック:** +```python +if ({key: [device['name'] for device in devices] for key, devices in self.prev_mic_devices.items()} != + {key: [device['name'] for device in devices] for key, devices in self.mic_devices.items()}): + self.update_flag_mic_device_list = True + self.prev_mic_devices = self.mic_devices +``` + +**比較方法:** +- デバイス名のリストのみを比較(`index` の変化は無視) +- ホストごとにグループ化して比較 + +**5. スピーカーデバイスリストの変更チェック:** +```python +if [device['name'] for device in self.prev_speaker_devices] != [device['name'] for device in self.speaker_devices]: + self.update_flag_speaker_device_list = True + self.prev_speaker_devices = self.speaker_devices +``` + +**6. 総合的な更新フラグの判定:** +```python +update_flag = ( + self.update_flag_default_mic_device or + self.update_flag_default_speaker_device or + self.update_flag_host_list or + self.update_flag_mic_device_list or + self.update_flag_speaker_device_list +) +return update_flag +``` + +**戻り値:** +- `True`: いずれかのデバイス情報が変更された +- `False`: すべてのデバイス情報が前回と同一 + +--- + +### 6. 監視メソッド + +#### `monitoring() -> None` + +**責務:** バックグラウンドでデバイス変更を監視し、変更時にコールバックを実行 + +**実行環境:** 別スレッド(`startMonitoring()` で起動) + +**処理フロー:** + +**1. 監視ループ:** +```python +try: + while self.monitoring_flag is True: + try: + # 監視処理 + except Exception: + errorLogging() +except Exception: + errorLogging() +``` + +**2. COM イベント監視(Windows のみ):** +```python +if comtypes is not None and AudioUtilities is not None: + try: + comtypes.CoInitialize() # COM の初期化 + cb = Client() + enumerator = AudioUtilities.GetDeviceEnumerator() + enumerator.RegisterEndpointNotificationCallback(cb) + + while cb.loop is True and self.monitoring_flag is True: + sleep(1) # イベント待機 + + try: + enumerator.UnregisterEndpointNotificationCallback(cb) + except Exception: + pass # ベストエフォート + comtypes.CoUninitialize() + except Exception: + errorLogging() +``` + +**COM 監視の動作:** +- `Client` クラスのイベントハンドラーがデバイス変更を検知 +- `cb.loop` が `False` になるとループを抜ける +- COM が利用できない場合はポーリングにフォールバック + +**3. ポーリングと更新サイクル:** +```python +# 更新前の処理 +self.runProcessBeforeUpdateMicDevices() +self.runProcessBeforeUpdateSpeakerDevices() + +sleep(2) # デバイス状態の安定を待つ + +# 最大10回(20秒間)ポーリング +for _ in range(10): + self.update() + if self.checkUpdate(): + break # 変更を検知したら終了 + sleep(2) + +# コールバック通知 +self.noticeUpdateDevices() + +# 更新後の処理 +self.runProcessAfterUpdateMicDevices() +self.runProcessAfterUpdateSpeakerDevices() +``` + +**ポーリング戦略:** +- 初回 2 秒待機: デバイスの接続/切断後の不安定期間を回避 +- 最大 10 回ポーリング: デバイス変更を見逃さない +- 変更検知後は即座に次の処理へ + +**4. 監視サイクルの繰り返し:** +```python +# while self.monitoring_flag is True の先頭に戻る +``` + +#### `startMonitoring() -> None` + +**責務:** 監視スレッドの起動 + +**処理:** +```python +if self.monitoring_flag: + return # 既に起動中 +self.monitoring_flag = True +self.th_monitoring = Thread(target=self.monitoring) +self.th_monitoring.daemon = True +self.th_monitoring.start() +``` + +**デーモンスレッド:** +- メインスレッド終了時に自動的に終了 +- アプリケーション終了を妨げない + +#### `stopMonitoring() -> None` + +**責務:** 監視スレッドの停止 + +**処理:** +```python +self.monitoring_flag = False +if getattr(self, "th_monitoring", None) is not None: + try: + self.th_monitoring.join(timeout=5) # 最大5秒待機 + except Exception: + pass # ベストエフォート +``` + +**タイムアウト設定:** +- 5 秒以内に終了しない場合は待機を諦める +- スレッドの join に失敗してもエラーを無視(防御的) + +--- + +### 7. コールバック管理メソッド + +#### デフォルトデバイス変更コールバック + +##### `setCallbackDefaultMicDevice(callback: Callable[..., None]) -> None` +デフォルトマイクデバイス変更時のコールバックを登録。 + +**コールバックシグネチャ:** +```python +def callback(host_name: str, device_name: str) -> None: + pass +``` + +##### `clearCallbackDefaultMicDevice() -> None` +コールバックをクリア。 + +##### `setCallbackDefaultSpeakerDevice(callback: Callable[..., None]) -> None` +デフォルトスピーカーデバイス変更時のコールバックを登録。 + +**コールバックシグネチャ:** +```python +def callback(device_name: str) -> None: + pass +``` + +##### `clearCallbackDefaultSpeakerDevice() -> None` +コールバックをクリア。 + +#### デバイスリスト変更コールバック + +##### `setCallbackHostList(callback: Callable[..., None]) -> None` +マイクホストリスト変更時のコールバックを登録。 + +##### `clearCallbackHostList() -> None` +コールバックをクリア。 + +##### `setCallbackMicDeviceList(callback: Callable[..., None]) -> None` +マイクデバイスリスト変更時のコールバックを登録。 + +##### `clearCallbackMicDeviceList() -> None` +コールバックをクリア。 + +##### `setCallbackSpeakerDeviceList(callback: Callable[..., None]) -> None` +スピーカーデバイスリスト変更時のコールバックを登録。 + +##### `clearCallbackSpeakerDeviceList() -> None` +コールバックをクリア。 + +#### 処理フックコールバック + +##### `setCallbackProcessBeforeUpdateMicDevices(callback: Callable[..., None]) -> None` +マイクデバイス更新前の処理を登録。 + +**使用例:** 音声認識を停止してデバイスを解放 + +##### `clearCallbackProcessBeforeUpdateMicDevices() -> None` +コールバックをクリア。 + +##### `setCallbackProcessAfterUpdateMicDevices(callback: Callable[..., None]) -> None` +マイクデバイス更新後の処理を登録。 + +**使用例:** 新しいデバイスで音声認識を再開 + +##### `clearCallbackProcessAfterUpdateMicDevices() -> None` +コールバックをクリア。 + +##### `setCallbackProcessBeforeUpdateSpeakerDevices(callback: Callable[..., None]) -> None` +スピーカーデバイス更新前の処理を登録。 + +##### `clearCallbackProcessBeforeUpdateSpeakerDevices() -> None` +コールバックをクリア。 + +##### `setCallbackProcessAfterUpdateSpeakerDevices(callback: Callable[..., None]) -> None` +スピーカーデバイス更新後の処理を登録。 + +##### `clearCallbackProcessAfterUpdateSpeakerDevices() -> None` +コールバックをクリア。 + +--- + +### 8. コールバック実行メソッド + +#### `runProcessBeforeUpdateMicDevices() -> None` + +**責務:** マイクデバイス更新前の処理コールバックを実行 + +**処理:** +```python +if isinstance(self.callback_process_before_update_mic_devices, Callable): + try: + self.callback_process_before_update_mic_devices() + except Exception: + errorLogging() +``` + +**型チェック:** +- `isinstance(callback, Callable)` で呼び出し可能性を確認 +- `None` の場合は何もしない + +#### `runProcessAfterUpdateMicDevices() -> None` +マイクデバイス更新後の処理コールバックを実行(同様の実装)。 + +#### `runProcessBeforeUpdateSpeakerDevices() -> None` +スピーカーデバイス更新前の処理コールバックを実行(同様の実装)。 + +#### `runProcessAfterUpdateSpeakerDevices() -> None` +スピーカーデバイス更新後の処理コールバックを実行(同様の実装)。 + +--- + +### 9. 通知メソッド + +#### `noticeUpdateDevices() -> None` + +**責務:** 更新フラグに応じて対応するコールバックを呼び出し、フラグをリセット + +**処理:** +```python +if self.update_flag_default_mic_device is True: + self.setMicDefaultDevice() +if self.update_flag_default_speaker_device is True: + self.setSpeakerDefaultDevice() +if self.update_flag_host_list is True: + self.setMicHostList() +if self.update_flag_mic_device_list is True: + self.setMicDeviceList() +if self.update_flag_speaker_device_list is True: + self.setSpeakerDeviceList() + +# すべてのフラグをリセット +self.update_flag_default_mic_device = False +self.update_flag_default_speaker_device = False +self.update_flag_host_list = False +self.update_flag_mic_device_list = False +self.update_flag_speaker_device_list = False +``` + +#### `setMicDefaultDevice() -> None` + +**責務:** デフォルトマイクデバイス変更コールバックの実行 + +**処理:** +```python +if isinstance(self.callback_default_mic_device, Callable): + try: + self.callback_default_mic_device( + self.default_mic_device["host"]["name"], + self.default_mic_device["device"]["name"] + ) + except Exception: + errorLogging() +``` + +#### `setSpeakerDefaultDevice() -> None` + +**責務:** デフォルトスピーカーデバイス変更コールバックの実行 + +**処理:** +```python +if isinstance(self.callback_default_speaker_device, Callable): + try: + self.callback_default_speaker_device( + self.default_speaker_device["device"]["name"] + ) + except Exception: + errorLogging() +``` + +#### `setMicHostList() -> None` +マイクホストリスト変更コールバックの実行(引数なし)。 + +#### `setMicDeviceList() -> None` +マイクデバイスリスト変更コールバックの実行(引数なし)。 + +#### `setSpeakerDeviceList() -> None` +スピーカーデバイスリスト変更コールバックの実行(引数なし)。 + +--- + +### 10. デバイス情報取得メソッド + +#### `getMicDevices() -> Dict[str, List[Dict[str, Any]]]` + +**責務:** マイクデバイス一覧を取得 + +**処理:** +```python +if not getattr(self, '_initialized', False): + try: + self.init() + except Exception: + try: + errorLogging() + except Exception: + pass +return getattr(self, 'mic_devices', {"NoHost": [{"index": -1, "name": "NoDevice"}]}) +``` + +**安全性:** +- 未初期化の場合は `init()` を呼び出す +- 失敗時はデフォルト値を返却 + +**戻り値の例:** +```python +{ + "Windows WASAPI": [ + {"index": 0, "name": "Microphone (Realtek)", ...}, + {"index": 3, "name": "Line In (USB Audio)", ...} + ], + "MME": [ + {"index": 10, "name": "マイク (Realtek)", ...} + ] +} +``` + +#### `getDefaultMicDevice() -> Dict[str, Any]` + +**責務:** デフォルトマイクデバイスを取得 + +**戻り値の例:** +```python +{ + "host": {"index": 0, "name": "Windows WASAPI", ...}, + "device": {"index": 0, "name": "Microphone (Realtek)", ...} +} +``` + +#### `getSpeakerDevices() -> List[Dict[str, Any]]` + +**責務:** スピーカーデバイス一覧を取得 + +**戻り値の例:** +```python +[ + {"index": 5, "name": "Stereo Mix (Realtek)", "isLoopbackDevice": True, ...}, + {"index": 7, "name": "Speakers (USB Audio) [Loopback]", ...} +] +``` + +#### `getDefaultSpeakerDevice() -> Dict[str, Any]` + +**責務:** デフォルトスピーカーデバイスを取得 + +**戻り値の例:** +```python +{ + "device": {"index": 5, "name": "Stereo Mix (Realtek)", ...} +} +``` + +--- + +### 11. 強制更新メソッド + +#### `forceUpdateAndSetMicDevices() -> None` + +**責務:** マイクデバイス情報を強制的に更新し、すべてのコールバックを実行 + +**処理:** +```python +self.update() +self.setMicHostList() +self.setMicDeviceList() +self.setMicDefaultDevice() +``` + +**使用場面:** +- 自動デバイス選択機能の初回適用時 +- ユーザーが手動で更新を要求した時 + +#### `forceUpdateAndSetSpeakerDevices() -> None` + +**責務:** スピーカーデバイス情報を強制的に更新 + +**処理:** +```python +self.update() +self.setSpeakerDeviceList() +self.setSpeakerDefaultDevice() +``` + +--- + +### 12. モジュールレベルの使用方法 + +#### シングルトンインスタンス + +```python +device_manager = DeviceManager() +``` + +**モジュールをインポートするだけで使用可能:** +```python +from device_manager import device_manager + +# デバイス情報取得 +mic_devices = device_manager.getMicDevices() +``` + +#### デモスクリプト + +```python +if __name__ == "__main__": + print("DeviceManager demo. Call device_manager.init() and device_manager.startMonitoring() to run live monitoring.") + try: + while True: + sleep(1) + except KeyboardInterrupt: + print("exiting") +``` + +**実行方法:** +```powershell +python device_manager.py +``` + +--- + +## 依存関係 + +### 外部ライブラリ + +```python +from typing import Callable, Dict, List, Optional, Any +from time import sleep +from threading import Thread +``` + +### オプショナル依存(Windows 専用) + +```python +import comtypes # COM 初期化・終了 +from pyaudiowpatch import PyAudio, paWASAPI # WASAPI loopback サポート +from pycaw.callbacks import MMNotificationClient # デバイス変更イベント +from pycaw.utils import AudioUtilities # デバイス列挙 +``` + +**非 Windows 環境での動作:** +- すべてのオプショナル依存は `try-except` でガード +- インポート失敗時は `None` または placeholder を設定 +- デフォルト値(`NoDevice`)を返す機能は維持 + +### 内部モジュール + +```python +from utils import errorLogging +``` + +--- + +## 自動デバイス選択の動作フロー + +### Controller 側の設定(例) + +```python +# controller.py の applyAutoMicSelect() メソッド + +def applyAutoMicSelect(self) -> None: + # 1. 更新前の処理: デバイス使用中の機能を停止 + device_manager.setCallbackProcessBeforeUpdateMicDevices( + self.stopAccessMicDevices + ) + + # 2. デフォルトデバイス変更時: 新しいデバイスを選択 + device_manager.setCallbackDefaultMicDevice( + self.updateSelectedMicDevice + ) + + # 3. 更新後の処理: 新しいデバイスで機能を再開 + device_manager.setCallbackProcessAfterUpdateMicDevices( + self.restartAccessMicDevices + ) + + # 4. 初回実行 + device_manager.forceUpdateAndSetMicDevices() + + # 5. 監視開始 + device_manager.startMonitoring() +``` + +### デバイス変更時のシーケンス図 + +``` +[ユーザーがヘッドセットを接続] + ↓ +[Windows がデフォルトデバイスを変更] + ↓ +[pycaw の Client.on_device_added() が呼ばれる] + ↓ +[client.loop = False に設定] + ↓ +[monitoring() の COM 監視ループが終了] + ↓ +[runProcessBeforeUpdateMicDevices() 実行] + ↓ +[controller.stopAccessMicDevices()] + - 音声認識を停止 + - デバイスを解放 + ↓ +[update() でデバイス情報を更新] + ↓ +[checkUpdate() で変更を検出] + ↓ +[noticeUpdateDevices() でコールバック呼び出し] + ↓ +[setMicDefaultDevice() 実行] + ↓ +[controller.updateSelectedMicDevice(host, device)] + - 設定を更新 + - フロントエンドに通知 + ↓ +[runProcessAfterUpdateMicDevices() 実行] + ↓ +[controller.restartAccessMicDevices()] + - 新しいデバイスで音声認識を開始 + ↓ +[COM 監視ループが再開] +``` + +--- + +## エラーハンドリング戦略 + +### 1. import 時のエラー + +**問題:** Windows 専用ライブラリが非 Windows 環境でインポートされる + +**対策:** +```python +try: + import comtypes +except Exception: + comtypes = None # type: ignore +``` + +**結果:** +- インポートエラーは発生しない +- `comtypes is None` で可用性を判定 +- 機能は制限されるがアプリケーションは動作 + +### 2. 初期化時のエラー + +**問題:** デバイス情報の取得に失敗 + +**対策:** +```python +try: + if PyAudio is not None: + try: + self.update() + except Exception: + errorLogging() +except Exception: + pass # デフォルト値のまま継続 +``` + +**結果:** +- 初期化は完了(`_initialized = True`) +- デバイス情報はデフォルト値(`NoDevice`) +- ログにエラーを記録 + +### 3. 監視スレッド内のエラー + +**問題:** デバイス更新中の予期しない例外 + +**対策:** +```python +try: + while self.monitoring_flag is True: + try: + # 監視処理 + except Exception: + errorLogging() # ログに記録して継続 +except Exception: + errorLogging() # 外側のループでもキャッチ +``` + +**結果:** +- エラーが発生しても監視は継続 +- ログにエラーを記録 +- スレッドはクラッシュしない + +### 4. コールバック実行時のエラー + +**問題:** 登録されたコールバック関数内で例外が発生 + +**対策:** +```python +if isinstance(self.callback_default_mic_device, Callable): + try: + self.callback_default_mic_device(host_name, device_name) + except Exception: + errorLogging() # ログに記録して継続 +``` + +**結果:** +- コールバックのエラーは分離される +- 他のコールバックには影響しない +- デバイス監視は継続 + +--- + +## スレッド構成 + +### メインスレッド +- アプリケーションのメインループ + +### 監視スレッド(`th_monitoring`) +- `monitoring()` メソッドを実行 +- デーモンスレッド(メインスレッド終了時に自動終了) +- `startMonitoring()` で起動 +- `stopMonitoring()` で停止 + +### スレッド同期 + +**監視フラグ:** +```python +self.monitoring_flag: bool = False +``` + +**動作:** +- `True`: 監視継続 +- `False`: 監視停止(次回ループで終了) + +**停止時の安全性:** +```python +self.monitoring_flag = False # フラグを False に +if self.th_monitoring is not None: + self.th_monitoring.join(timeout=5) # 最大5秒待機 +``` + +--- + +## パフォーマンス考慮事項 + +### 1. 遅延初期化 + +**戦略:** +- `__new__`: 軽量(インスタンス生成のみ) +- `init()`: 中程度(デバイス情報の初回取得) +- `startMonitoring()`: 重い(スレッド起動、COM 初期化) + +**利点:** +- `import device_manager` は高速 +- アプリケーション起動時のレスポンス向上 +- 使用しない機能のリソースを消費しない + +### 2. COM イベント vs ポーリング + +**COM イベント:** +- リアルタイム検知(即座に反応) +- CPU 使用率が低い(イベント待機) +- Windows 専用 + +**ポーリング:** +- 最大 20 秒の遅延(10 回 × 2 秒) +- CPU 使用率がやや高い(定期的な `update()` 呼び出し) +- クロスプラットフォーム + +**ハイブリッド方式:** +- COM が利用可能ならイベント駆動 +- COM が失敗またはポーリングにフォールバック + +### 3. デバイス情報のキャッシング + +**戦略:** +```python +self.mic_devices # キャッシュ +self.prev_mic_devices # 前回の状態 +``` + +**利点:** +- `getMicDevices()` は `update()` を呼ばない(高速) +- 変更検出が効率的(差分のみ処理) + +### 4. ポーリングの最適化 + +**初回待機(2 秒):** +```python +sleep(2) +``` +- デバイス接続後の不安定期間を回避 +- デバイスドライバーの初期化を待つ + +**最大 10 回ポーリング:** +```python +for _ in range(10): + self.update() + if self.checkUpdate(): + break # 変更検出後は即座に終了 + sleep(2) +``` +- 不要なポーリングを削減 +- 変更検出後は即座に次の処理へ + +--- + +## テストシナリオ + +### 1. 初期化テスト + +**ケース:** +- PyAudio が利用可能 +- PyAudio が利用不可(非 Windows 環境) +- デバイスが1つもない環境 + +**確認項目:** +- `_initialized` フラグが `True` になるか +- デバイス情報がデフォルト値または実デバイスで設定されているか +- エラーが適切にログされているか + +### 2. デバイス検出テスト + +**ケース:** +- 複数のホスト API(MME、WASAPI 等) +- 複数のマイクデバイス +- WASAPI ループバックデバイス + +**確認項目:** +- すべてのデバイスが検出されるか +- デフォルトデバイスが正しく識別されるか +- ループバックデバイスが正しく識別されるか + +### 3. 変更検出テスト + +**ケース:** +- デフォルトデバイスの変更 +- デバイスの接続・切断 +- ホスト API の変更 + +**確認項目:** +- 変更が正しく検出されるか +- 適切なフラグが設定されるか +- コールバックが呼び出されるか + +### 4. 監視スレッドテスト + +**ケース:** +- 監視の起動・停止 +- デバイス変更時の動作 +- エラー発生時の継続性 + +**確認項目:** +- スレッドが正しく起動・停止するか +- デバイス変更が検知されるか +- エラー発生時もスレッドが継続するか + +### 5. 自動デバイス選択テスト + +**ケース:** +- デフォルトデバイスの変更 +- デバイスの接続中に音声認識が動作中 +- コールバック内でエラーが発生 + +**確認項目:** +- デバイス変更前に処理が停止されるか +- デバイス変更後に処理が再開されるか +- エラーが分離されるか + +--- + +## 制限事項 + +### 1. Windows 依存機能 + +**問題:** COM イベント監視と WASAPI ループバックは Windows 専用 + +**影響:** +- 非 Windows 環境ではポーリングのみ +- リアルタイム性が低下 +- ループバックデバイスが利用不可 + +**緩和策:** +- グレースフルデグレード(デフォルト値を返却) +- プラットフォーム固有のコードを分離 + +### 2. デバイス名の曖昧性 + +**問題:** デバイス名に特殊文字やロケール依存の名前が含まれる + +**影響:** +- 名前による比較が不正確になる可能性 +- ループバックデバイスのマッチングが失敗する可能性 + +**緩和策:** +- `index` による識別も併用 +- 部分一致でループバックデバイスを検索 + +### 3. ポーリング遅延 + +**問題:** 最大 20 秒の遅延が発生する可能性 + +**影響:** +- デバイス変更の検知が遅れる +- ユーザー体験の低下 + +**緩和策:** +- COM イベント監視を優先使用 +- ポーリング間隔を短縮(2 秒) + +### 4. エラーの握りつぶし + +**問題:** 多くのエラーがログに記録されるのみで例外が投げられない + +**影響:** +- デバッグが困難 +- エラーの発生に気づきにくい + +**緩和策:** +- 詳細なエラーログ(`errorLogging()`) +- 重要なエラーは status を返却(future work) + +--- + +## 今後の改善案 + +### 1. クロスプラットフォーム対応の強化 + +**Linux (PulseAudio / ALSA):** +```python +# PulseAudio の D-Bus API でデバイス監視 +# ALSA の udev イベントでデバイス変更を検知 +``` + +**macOS (Core Audio):** +```python +# Core Audio の kAudioDevicePropertyDataSource 監視 +# IOKit でデバイスイベントを検知 +``` + +### 2. デバイス識別の改善 + +**問題:** 名前のみによる識別は不安定 + +**解決策:** +```python +device_id = { + "index": device["index"], + "name": device["name"], + "host": host["name"], + "unique_id": device.get("uniqueDeviceID", "") # WASAPI 固有 ID +} +``` + +### 3. 非同期化(asyncio) + +**問題:** スレッド管理の複雑性 + +**解決策:** +```python +async def monitoring_async(self): + while self.monitoring_flag: + await asyncio.sleep(2) + await self.update_async() + if self.checkUpdate(): + await self.noticeUpdateDevices_async() +``` + +**利点:** +- スレッド管理が不要 +- エラーハンドリングが統一 +- パフォーマンスの向上 + +### 4. イベントログの記録 + +**問題:** デバイス変更の履歴が残らない + +**解決策:** +```python +device_change_history = [] + +def log_device_change(event_type, device_info): + device_change_history.append({ + "timestamp": datetime.now(), + "event": event_type, + "device": device_info + }) +``` + +**利点:** +- デバッグが容易 +- ユーザーサポートの向上 + +### 5. 設定の永続化 + +**問題:** 選択されたデバイスが再起動後に失われる + +**解決策:** +```python +# config.py に保存 +config.SELECTED_MIC_DEVICE_ID = { + "host": "Windows WASAPI", + "name": "Microphone (Realtek)", + "unique_id": "{0.0.0.00000000}.{...}" +} + +# 起動時に復元 +def restore_selected_device(): + saved_id = config.SELECTED_MIC_DEVICE_ID + current_devices = device_manager.getMicDevices() + # unique_id でマッチング +``` + +--- + +## 関連ファイル + +- **controller.py** - デバイス管理のコールバックを登録 +- **model.py** - デバイス情報を使用して音声認識を開始 +- **config.py** - デバイス選択の設定を保存 +- **utils.py** - エラーロギング関数 + +--- + +## コーディング規約への準拠 + +### 命名規則 + +- クラス名: `DeviceManager`, `Client` (PascalCase) +- メソッド名: `startMonitoring`, `getMicDevices` (snake_case) +- 変数名: `mic_devices`, `default_mic_device` (snake_case) +- 定数: 使用していない(`config.py` で管理) + +### 型注釈 + +**現状:** +```python +def init(self) -> None: + self.mic_devices: Dict[str, List[Dict[str, Any]]] = {...} +``` + +**改善案:** +```python +DeviceInfo = Dict[str, Any] +DeviceList = List[DeviceInfo] +HostDeviceMap = Dict[str, DeviceList] + +def init(self) -> None: + self.mic_devices: HostDeviceMap = {...} +``` + +### Docstring + +**現状:** 一部のメソッドのみ docstring あり + +**改善案:** +```python +def getMicDevices(self) -> Dict[str, List[Dict[str, Any]]]: + """Get the list of microphone devices grouped by host API. + + Returns: + A dict mapping host names (e.g., "Windows WASAPI") to lists of device info dicts. + Each device dict contains keys like "index", "name", "maxInputChannels", etc. + If no devices are available, returns {"NoHost": [{"index": -1, "name": "NoDevice"}]}. + """ +``` + +--- + +## まとめ + +`device_manager.py` は VRCT のデバイス管理機能を提供する重要なモジュールであり、以下の特徴を持つ: + +1. **シングルトンパターン:** アプリケーション全体で1つのインスタンスのみ +2. **遅延初期化:** import 時のパフォーマンス低下を回避 +3. **プラットフォーム対応:** Windows で完全な機能、非 Windows でもグレースフルデグレード +4. **リアルタイム監視:** COM イベントとポーリングのハイブリッド方式 +5. **コールバックパターン:** 柔軟なイベント通知機構 +6. **防御的プログラミング:** エラーが発生してもクラッシュしない + +このモジュールは自動デバイス選択機能の中核として動作し、ユーザーがデバイスを切り替えた際に音声認識を自動的に再開することで、シームレスな体験を提供する。 diff --git a/src-python/docs/diagrams.md b/src-python/docs/diagrams.md deleted file mode 100644 index 71b42975..00000000 --- a/src-python/docs/diagrams.md +++ /dev/null @@ -1,51 +0,0 @@ -# システム図 - -以下はシステム構成の概要(Mermaid シーケンス図とテキスト版の両方)です。Mermaid がサポートされているビューアでは下のシーケンス図が描画されます。 - -```mermaid -sequenceDiagram - participant GUI as GUI (stdin/stdout) - participant Main as mainloop - participant Controller as Controller - participant Model as Model - participant Recorder as Recorder - participant Transcriber as Transcriber - participant Translator as Translator - participant Overlay as Overlay - participant OSC as OSC - participant WS as WebSocket - - GUI->>Main: send JSON endpoint - Main->>Controller: dispatch - Controller->>Model: startMicTranscript(callback) - Recorder->>Transcriber: audio data - Transcriber->>Controller: result (text, language) - Controller->>Translator: getInputTranslate(text) - Translator-->>Controller: translations - Controller->>Overlay: updateOverlay(translation) - Controller->>OSC: sendMessage(osc_message) - Controller->>WS: websocketSendMessage(event) - Controller-->>GUI: run(status, endpoint, result) -``` - -## テキスト版(簡易) - -Main process (`mainloop.py`) - - stdin -> JSON コマンド -> Main.receiver -> queue - - Main.handler -> Controller (コマンド実行) - - run(status, endpoint, result) -> stdout (GUI に通知) - -Controller - - config (読み書き) - - model (起動/停止/アクション) - -Model サブシステム - - device_manager (デバイス列挙/監視) - - transcription (recorder -> transcriber) - - translation (Translator) - - overlay (OverlayImage -> Overlay) - - osc (OSCHandler) - - websocket (WebSocketServer) - -データフロー(代表): 録音 -> audio_queue -> AudioTranscriber -> Controller.micMessage -> Translator -> (OSC / Overlay / WebSocket / ログ) - diff --git a/src-python/docs/mainloop.md b/src-python/docs/mainloop.md new file mode 100644 index 00000000..4536d49e --- /dev/null +++ b/src-python/docs/mainloop.md @@ -0,0 +1,346 @@ +# mainloop.py 設計書 + +## 概要 + +`mainloop.py` は VRCT アプリケーションのバックエンドエントリーポイントであり、stdin/stdout を介したフロントエンド(Tauri/React UI)との通信を担当する。JSON ベースのリクエスト/レスポンスプロトコルを実装し、複数のワーカースレッドによる並列処理と排他制御を提供する。 + +## 主要コンポーネント + +### 1. グローバル変数 + +#### `run_mapping` (dict) +フロントエンドへの通知用エンドポイントマッピング。Controllerが `run()` コールバックを通じてフロントエンドに状態変化を通知する際に使用。 + +**主要なエンドポイント:** +- `/run/enable_translation` - 翻訳機能の有効/無効状態 +- `/run/transcription_mic_message` - マイク音声認識結果 +- `/run/transcription_speaker_message` - スピーカー音声認識結果 +- `/run/error_*` - 各種エラー通知 +- `/run/initialization_complete` - 初期化完了通知 + +#### `mapping` (dict) +フロントエンドからのリクエストを処理する関数マッピング。各エンドポイントに対して: +- `status`: ロック状態(True: 処理可能, False: ロック中) +- `variable`: 実行する Controller メソッド + +**エンドポイント分類:** +- `/get/data/*` - 設定値の取得(初期化時に使用) +- `/set/data/*` - 設定値の更新 +- `/set/enable/*` - 機能の有効化 +- `/set/disable/*` - 機能の無効化 +- `/run/*` - アクション実行(メッセージ送信、ダウンロード等) + +#### `init_mapping` (dict) +初期化時に実行される `/get/data/*` エンドポイントのサブセット。アプリケーション起動時に全設定値をフロントエンドに送信するために使用。 + +### 2. Mainクラス + +#### コンストラクタ `__init__(controller_instance, mapping_data, worker_count)` + +**パラメータ:** +- `controller_instance`: Controller インスタンス +- `mapping_data`: エンドポイントマッピング辞書 +- `worker_count`: ハンドラワーカースレッド数(デフォルト: 3) + +**初期化処理:** +1. リクエストキュー (`Queue[Tuple[str, Any]]`) の作成 +2. 停止イベント (`Event`) の作成 +3. エンドポイント別 Lock の生成: + - `/set/enable/xxx` と `/set/disable/xxx` を `/lock/set/xxx` に正規化 + - 同一機能の有効化/無効化リクエストが競合しないよう排他制御 + +**正規化ロジックの例:** +```python +"/set/enable/translation" → "/lock/set/translation" +"/set/disable/translation" → "/lock/set/translation" +# 両方が同じロックを共有 → 排他的に実行される +``` + +#### `receiver()` メソッド + +**責務:** stdin から JSON リクエストを読み取り、キューに投入 + +**処理フロー:** +1. `sys.stdin.readline()` でブロッキング読み取り +2. JSON パース (`json.loads()`) +3. エンドポイントとデータを抽出 +4. データが存在する場合は Base64 デコード (`encodeBase64()`) +5. ログ出力 (`printLog()`) +6. キューに投入 `self.queue.put((endpoint, data))` + +**エラー処理:** +- JSON パースエラー: ログ出力して継続 +- EOF 到達: 0.1秒待機して再試行 +- その他の例外: `errorLogging()` でトレースバック記録 + +**スレッド:** デーモンスレッド `main_receiver` として起動 + +#### `handler()` メソッド + +**責務:** キューからリクエストを取り出し、適切なロックを取得して処理 + +**処理フロー:** +1. キューから `(endpoint, data)` を取得(0.5秒タイムアウト) +2. エンドポイントを正規化キーに変換 +3. 対応する Lock を取得試行(非ブロッキング) + - 取得成功 → 処理実行 → ロック解放 + - 取得失敗 → 0.05秒待機して再キュー +4. `_call_handler(endpoint, data)` を呼び出し +5. レスポンスを stdout に出力 (`printResponse()`) + +**排他制御の意義:** +- 例: 翻訳機能の有効化中に無効化リクエストが来た場合、無効化は待機 +- 異なる機能のリクエストは並列実行可能 + +**再キューロジック:** +- status == 423 (Locked): 0.1秒待機して再キュー +- これにより、初期化中の設定変更リクエストが適切にリトライされる + +**ワーカー数:** `worker_count` 個のスレッド `main_handler_0`, `main_handler_1`, ... として起動 + +#### `_call_handler(endpoint, data)` メソッド + +**責務:** 実際のビジネスロジック実行 + +**処理フロー:** +1. `mapping` から対応するハンドラを取得 +2. エンドポイントが存在しない → status 404 +3. ハンドラの `status` が False → status 423 (Locked) +4. ハンドラの `variable` 関数を実行 → `response = handler["variable"](data)` +5. 0.2秒待機(処理安定化のため) +6. status と result を抽出して返却 + +**エラー処理:** +- 例外発生時: `errorLogging()` でトレースバック記録、status 500 を返却 + +#### `start()` / `stop(wait)` メソッド + +**start():** +- `startReceiver()` - stdin 読み取りスレッド起動 +- `startHandler()` - ハンドラワーカースレッド起動 + +**stop(wait):** +- `_stop_event.set()` - 全スレッドに停止シグナル送信 +- 各スレッドを `join(timeout=remaining)` で待機(最大 `wait` 秒) + +### 3. 初期化シーケンス + +**`if __name__ == "__main__":` ブロック:** + +1. `main_instance` 作成 +2. `startReceiver()` - stdin リスニング開始 +3. `startHandler()` - リクエスト処理開始 +4. **Watchdog 設定:** + - `controller.setWatchdogCallback(main_instance.stop)` + - Watchdog がタイムアウトした場合にプロセス全体を停止 +5. **Controller 初期化:** + - `controller.init()` + - Model の遅延初期化、デバイス列挙、ネットワーク接続チェック + - `init_mapping` のすべてのエンドポイントを実行して初期設定をフロントエンドに送信 +6. **マッピングのアンロック:** + - すべての `mapping[key]["status"]` を True に設定 + - これにより初期化中だった機能が利用可能になる +7. `main_instance.start()` - 実質的には何もしない(既に起動済み) + +## 並列処理とスレッドセーフティ + +### スレッド構成 + +| スレッド名 | 役割 | 生存期間 | +|-----------|------|---------| +| `main_receiver` | stdin からの JSON 読み取り | プロセス終了まで | +| `main_handler_0` ~ `main_handler_N` | リクエスト処理ワーカー | プロセス終了まで | + +### 同期メカニズム + +1. **キュー (`Queue`):** + - スレッドセーフな FIFO キュー + - receiver → handler への通信チャネル + +2. **エンドポイント別 Lock (`dict[str, Lock]`):** + - 同一リソースへの競合アクセスを防止 + - 正規化キーによる enable/disable ペアの統合 + +3. **停止イベント (`Event`):** + - グレースフルシャットダウン用のシグナル + +### デッドロック回避 + +- **非ブロッキング Lock 取得:** `lock.acquire(blocking=False)` +- **失敗時の再キュー:** ロック取得失敗時は即座に諦めて再キュー +- **タイムアウト付きキュー取得:** `queue.get(timeout=0.5)` で無限待機を回避 + +## プロトコル仕様 + +### リクエストフォーマット (stdin) + +```json +{ + "endpoint": "/set/data/transparency", + "data": "ODU=" // Base64 encoded: "85" +} +``` + +**フィールド:** +- `endpoint`: 実行するエンドポイント(必須) +- `data`: パラメータ(オプション、Base64 エンコード) + +### レスポンスフォーマット (stdout) + +```json +{ + "status": 200, + "endpoint": "/set/data/transparency", + "result": 85 +} +``` + +**フィールド:** +- `status`: HTTP ステータスコード相当 + - 200: 成功 + - 400: バリデーションエラー + - 404: 無効なエンドポイント + - 423: ロック中(リトライされる) + - 500: 内部エラー +- `endpoint`: リクエストされたエンドポイント +- `result`: 処理結果(型はエンドポイントに依存) + +### ログフォーマット (stdout) + +```json +{ + "status": 348, // 専用ステータスコード + "log": "setSelectedTabNo", + "data": "1" +} +``` + +## エラーハンドリング + +### 1. JSON パースエラー +- **発生箇所:** `receiver()` の `json.loads()` +- **処理:** `errorLogging()` でトレースバック記録、リクエストをスキップ + +### 2. ハンドラ実行エラー +- **発生箇所:** `_call_handler()` の `handler["variable"](data)` +- **処理:** + - `errorLogging()` でトレースバック記録 + - status 500 と "Internal error" を返却 + - プロセスは継続 + +### 3. JSON シリアライズエラー +- **発生箇所:** `printResponse()` の `json.dumps()` +- **処理:** + - エラーログに詳細を記録 + - フォールバック JSON を出力(status 500) + - プロセスは継続 + +### 4. EOF (stdin 終了) +- **発生箇所:** `receiver()` の `readline()` +- **処理:** 0.1秒待機して再試行(フロントエンドの再起動待ち) + +## パフォーマンス最適化 + +### 1. 複数ワーカースレッド +- デフォルト3スレッドで並列処理 +- CPU バウンドな処理(翻訳、文字起こし)を効率化 + +### 2. 非ブロッキングロック +- ロック競合時に即座に再キュー +- スレッドのブロッキング時間を最小化 + +### 3. 処理安定化待機 +- 各ハンドラ実行後に 0.2秒待機 +- 連続リクエストによる競合状態を回避 + +## 制限事項 + +### 1. 初期化中の制限 +- `mapping[key]["status"] = False` の間はリクエストが 423 でリトライされる +- 初期化完了まで最大数秒のレイテンシが発生 + +### 2. stdin の単方向性 +- stdin → キュー → ハンドラの一方向フロー +- 複数のフロントエンドからの同時接続は非対応 + +### 3. シリアル実行の保証 +- 同一エンドポイントのリクエストは排他的に実行されるが、 +- 異なるエンドポイントは並列実行される可能性がある +- 依存関係のある操作は呼び出し側で順序制御が必要 + +## デバッグとトラブルシューティング + +### ログファイル + +| ファイル名 | 内容 | +|-----------|------| +| `process.log` | 全リクエスト/レスポンスの記録 | +| `error.log` | 例外トレースバック | + +### デバッグ手法 + +1. **リクエストトレース:** + - `process.log` で endpoint と data を確認 + - Base64 デコードは `base64.b64decode(data).decode('utf-8')` で手動実行 + +2. **ロック競合の検出:** + - 同一エンドポイントで status 423 が頻発する場合 + - `_canonical_lock_key()` の正規化ロジックを確認 + +3. **パフォーマンス分析:** + - 各リクエストの処理時間は status 前後のタイムスタンプから算出 + - worker_count を増やして並列度を調整 + +## 今後の拡張性 + +### 1. 双方向通信 +- WebSocket への移行でリアルタイム通知を改善 +- stdin/stdout は互換性のため維持 + +### 2. 動的ワーカー数調整 +- キューの深さに応じてスレッド数を自動調整 +- CPU 負荷に応じた適応的なスケーリング + +### 3. 優先度キュー +- 重要なリクエスト(エラー通知等)を優先処理 +- `queue.PriorityQueue` への移行 + +## 関連ファイル + +- `controller.py` - ビジネスロジック実装 +- `model.py` - 機能ファサード +- `utils.py` - ログとユーティリティ +- `config.py` - 設定管理 + +## コーディング規約 + +本ファイルは以下の規約に従う: +- PEP 8 スタイルガイド +- 型ヒント (`typing` モジュール) +- Docstring は Google スタイル +- エラーハンドリングは防御的に実装 + +## テストシナリオ + +### 1. 基本動作テスト +```python +# stdin に JSON を送信 +echo '{"endpoint": "/get/data/version", "data": null}' | python mainloop.py +# 期待される出力: {"status": 200, "endpoint": "/get/data/version", "result": "1.0.0"} +``` + +### 2. 並列リクエストテスト +- 複数の設定変更リクエストを同時送信 +- すべてが正常に処理されることを確認 + +### 3. ロック競合テスト +- 翻訳の有効化と無効化を連続送信 +- 両方が排他的に実行されることを確認 + +### 4. エラー回復テスト +- 不正なJSON、無効なエンドポイント、不正なデータを送信 +- プロセスがクラッシュせずエラーレスポンスを返すことを確認 + +## まとめ + +`mainloop.py` は VRCT の中核となる通信レイヤーであり、stdin/stdout を介したフロントエンドとの JSON ベースプロトコルを実装する。複数のワーカースレッドと細粒度のロックにより、高い並列性と排他制御を両立させている。初期化シーケンスとエラーハンドリングは堅牢に設計されており、プロセスの安定稼働を保証する。 diff --git a/src-python/docs/model.md b/src-python/docs/model.md new file mode 100644 index 00000000..41e3ee3f --- /dev/null +++ b/src-python/docs/model.md @@ -0,0 +1,1277 @@ +# model.py 設計書 + +## 概要 + +`model.py` は VRCT アプリケーションのビジネスロジックファサードとして機能し、音声認識、翻訳、オーバーレイ表示、OSC通信、WebSocket通信など、すべてのサブシステムへの統一されたインターフェースを提供する。シングルトンパターンで実装され、重い初期化処理を遅延実行することで、アプリケーションの起動時間を短縮している。 + +## アーキテクチャ上の位置づけ + +``` +┌─────────────┐ +│controller.py│ (Business Logic Control Layer) +└──────┬──────┘ + │ Facade Pattern +┌──────▼──────┐ +│ model.py │ ◄── このファイル +└──────┬──────┘ + │ Aggregation & Delegation +┌──────▼────────────────────────────────┐ +│ Subsystems │ +│ - Translator │ +│ - AudioTranscriber │ +│ - Overlay / OverlayImage │ +│ - OSCHandler │ +│ - WebSocketServer │ +│ - Transliterator │ +│ - Watchdog │ +│ - DeviceManager (via device_manager) │ +└───────────────────────────────────────┘ +``` + +## 主要コンポーネント + +### 1. threadFnc クラス + +**責務:** 関数を繰り返し実行するスレッドラッパー + +**特徴:** +- デーモンスレッドとして動作 +- ループ制御(停止・一時停止・再開)機能を提供 +- 終了時のクリーンアップ関数をサポート + +**メソッド:** + +#### `__init__(fnc, end_fnc=None, daemon=True, *args, **kwargs)` + +**パラメータ:** +- `fnc`: 繰り返し実行する関数 +- `end_fnc`: スレッド終了時に実行する関数(オプション) +- `daemon`: デーモンフラグ(デフォルト: True) +- `*args, **kwargs`: `fnc` に渡す引数 + +#### `stop() -> None` +ループを停止し、スレッドを終了させる。 + +#### `pause() -> None` +ループを一時停止する(関数の実行を停止)。 + +#### `resume() -> None` +一時停止したループを再開する。 + +#### `run() -> None` +スレッドのメインループ。`self.loop` が True の間、`self.fnc()` を繰り返し呼び出す。 + +**使用例:** +```python +def print_message(): + print("Hello") + sleep(1) + +def cleanup(): + print("Thread ended") + +th = threadFnc(print_message, end_fnc=cleanup) +th.start() +# ... しばらく実行 ... +th.stop() +th.join() +``` + +--- + +### 2. Model クラス + +**責務:** アプリケーションのすべてのサブシステムへのファサードインターフェース + +**パターン:** シングルトン(`__new__` で制御) + +**初期化戦略:** 遅延初期化(Lazy Initialization) +- `__new__`: インスタンスの生成のみ(軽量) +- `init()`: 重い初期化処理(明示的な呼び出しが必要) +- `ensure_initialized()`: 初期化が必要なメソッドで自動的に呼び出される + +--- + +### 3. 初期化メソッド + +#### `__new__(cls) -> Model` + +**責務:** シングルトンインスタンスの生成 + +**処理:** +1. `cls._instance` が None の場合のみ新規インスタンスを生成 +2. `_inited` フラグを False に設定(実際の初期化は未実施) +3. 既存のインスタンスがあればそれを返却 + +**重要:** このメソッドでは重い初期化を行わない(import 時のパフォーマンス向上) + +#### `init() -> None` + +**責務:** すべてのサブシステムの初期化 + +**処理:** +1. **初期化済みチェック:** `_inited` フラグが True なら何もしない +2. **属性の初期化:** + ```python + self.logger = None + self.mic_audio_queue = None + self.mic_mute_status = None + self.previous_send_message = "" + self.previous_receive_message = "" + ``` +3. **サブシステムの初期化:** + - `Translator()`: 翻訳エンジン + - `KeywordProcessor()`: 禁止ワードフィルター + - `Overlay()`: オーバーレイシステム + - `OverlayImage()`: オーバーレイ画像生成 + - `Transliterator()`: 音訳(ひらがな・ローマ字変換) + - `Watchdog()`: プロセス監視 + - `OSCHandler()`: OSC通信 + - `WebSocketServer()`: WebSocket通信 +4. **コールバック関数の初期化:** + ```python + self.check_mic_energy_fnc: Callable[[float], None] = lambda v: None + self.check_speaker_energy_fnc: Callable[[float], None] = lambda v: None + ``` +5. **初期化完了フラグ:** `_inited = True` + +#### `ensure_initialized() -> None` + +**責務:** 初期化が未実施の場合に `init()` を呼び出す + +**使用箇所:** 初期化が必要なすべての public メソッド + +**エラーハンドリング:** +```python +try: + self.init() +except Exception: + errorLogging() +``` + +--- + +### 4. 翻訳機能 + +#### モデルウェイト管理 + +##### `checkTranslatorCTranslate2ModelWeight(weight_type: str) -> bool` +指定されたモデルウェイトが存在するかチェック。 + +**パラメータ:** +- `weight_type`: "tiny", "small", "medium", "large" 等 + +**戻り値:** モデルが存在する場合 True + +##### `downloadCTranslate2ModelWeight(weight_type, callback=None, end_callback=None) -> bool` + +**責務:** CTranslate2 モデルウェイトのダウンロード + +**パラメータ:** +- `weight_type`: モデルタイプ +- `callback`: 進捗通知用コールバック(`progress: float` を受け取る) +- `end_callback`: 完了時のコールバック + +**実装:** `downloadCTranslate2Weight()` ユーティリティ関数に委譲 + +##### `downloadCTranslate2ModelTokenizer(weight_type) -> bool` +トークナイザーファイルのダウンロード。 + +#### 翻訳モデル制御 + +##### `changeTranslatorCTranslate2Model() -> None` + +**責務:** 翻訳モデルの変更・再ロード + +**処理:** +```python +self.translator.changeCTranslate2Model( + path=config.PATH_LOCAL, + model_type=config.CTRANSLATE2_WEIGHT_TYPE, + device=config.SELECTED_TRANSLATION_COMPUTE_DEVICE["device"], + device_index=config.SELECTED_TRANSLATION_COMPUTE_DEVICE["device_index"], + compute_type=config.SELECTED_TRANSLATION_COMPUTE_TYPE +) +``` + +**VRAMエラー:** `ValueError("VRAM_OUT_OF_MEMORY")` を送出する可能性がある + +##### `isLoadedCTranslate2Model() -> bool` +CTranslate2 モデルがロード済みかチェック。 + +##### `isChangedTranslatorParameters() -> bool` +翻訳パラメータが変更されたかチェック。 + +##### `setChangedTranslatorParameters(is_changed: bool) -> None` +翻訳パラメータ変更フラグを設定。 + +#### DeepL 認証 + +##### `authenticationTranslatorDeepLAuthKey(auth_key: str) -> bool` + +**責務:** DeepL API キーの検証 + +**処理:** `translator.authenticationDeepLAuthKey()` に委譲 + +**戻り値:** 認証成功時 True + +#### 翻訳実行 + +##### `getTranslate(translator_name, source_language, target_language, target_country, message) -> Tuple[str, bool]` + +**責務:** メッセージの翻訳 + +**パラメータ:** +- `translator_name`: "CTranslate2", "DeepL", "DeepL_API" 等 +- `source_language`: 元言語("ja", "en" 等) +- `target_language`: 翻訳先言語 +- `target_country`: 翻訳先国(方言対応用) +- `message`: 翻訳するテキスト + +**戻り値:** +- `translation`: 翻訳結果(文字列) +- `success_flag`: 成功時 True + +**エラーハンドリング:** +```python +translation = self.translator.translate(...) +if isinstance(translation, str): + success_flag = True +else: + # 翻訳失敗時のリトライロジック + while True: + # フェールセーフ処理 +``` + +##### `getInputTranslate(message, source_language=None) -> Tuple[list, list]` + +**責務:** 送信メッセージの翻訳(複数言語対応) + +**処理:** +1. `config.SELECTED_TRANSLATION_ENGINES[config.SELECTED_TAB_NO]` で翻訳エンジンを取得 +2. `config.SELECTED_TARGET_LANGUAGES` で翻訳先言語リストを取得 +3. 有効な各言語について `getTranslate()` を呼び出し + +**戻り値:** +- `translations`: 翻訳結果のリスト +- `success_flags`: 各翻訳の成功フラグのリスト + +##### `getOutputTranslate(message, source_language=None) -> Tuple[list, list]` + +**責務:** 受信メッセージの翻訳(単一言語) + +**処理:** `getInputTranslate()` と同様だが、翻訳先が自分の言語(1つ)のみ + +--- + +### 5. 音声認識機能 + +#### Whisper モデル管理 + +##### `checkTranscriptionWhisperModelWeight(weight_type: str) -> bool` +Whisper モデルウェイトの存在確認。 + +##### `downloadWhisperModelWeight(weight_type, callback=None, end_callback=None) -> bool` +Whisper モデルウェイトのダウンロード。 + +#### マイク音声認識 + +##### `startMicTranscript(fnc: Callable[[dict], None]) -> None` + +**責務:** マイク音声認識の開始 + +**パラメータ:** +- `fnc`: 認識結果を受け取るコールバック関数 + +**処理フロー:** +1. **デバイス取得:** + ```python + mic_host_name = config.SELECTED_MIC_HOST + mic_device_name = config.SELECTED_MIC_DEVICE + mic_device_list = device_manager.getMicDevices().get(mic_host_name, [...]) + selected_mic_device = [device for device in mic_device_list if device["name"] == mic_device_name] + ``` +2. **デバイス検証:** + - デバイスがない場合、`fnc({"text": False, "language": None})` を呼び出して終了 +3. **音声キューの作成:** + ```python + self.mic_audio_queue = Queue() + ``` +4. **レコーダーの初期化:** + ```python + self.mic_audio_recorder = SelectedMicEnergyAndAudioRecorder( + device=mic_device, + energy_threshold=config.MIC_THRESHOLD, + dynamic_energy_threshold=config.MIC_AUTOMATIC_THRESHOLD, + phrase_time_limit=config.MIC_RECORD_TIMEOUT, + ) + self.mic_audio_recorder.recordIntoQueue(self.mic_audio_queue, None) + ``` +5. **文字起こし器の初期化:** + ```python + self.mic_transcriber = AudioTranscriber( + speaker=False, + source=self.mic_audio_recorder.source, + phrase_timeout=config.MIC_PHRASE_TIMEOUT, + max_phrases=config.MIC_MAX_PHRASES, + transcription_engine=config.SELECTED_TRANSCRIPTION_ENGINE, + root=config.PATH_LOCAL, + whisper_weight_type=config.WHISPER_WEIGHT_TYPE, + device=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device"], + device_index=config.SELECTED_TRANSCRIPTION_COMPUTE_DEVICE["device_index"], + compute_type=config.SELECTED_TRANSCRIPTION_COMPUTE_TYPE, + ) + ``` +6. **文字起こしスレッドの起動:** + ```python + def sendMicTranscript(): + # キューから音声データを取得 + # AudioTranscriber で文字起こし + # fnc() で結果を送信 + + def endMicTranscript(): + # クリーンアップ処理 + + self.mic_print_transcript = threadFnc(sendMicTranscript, end_fnc=endMicTranscript) + self.mic_print_transcript.start() + ``` +7. **ミュート状態の同期:** + ```python + self.changeMicTranscriptStatus() + ``` + +##### `resumeMicTranscript() -> None` + +**責務:** 一時停止したマイク音声認識の再開 + +**処理:** +1. 音声キューをクリア +2. レコーダーを再開: `self.mic_audio_recorder.resume()` + +##### `pauseMicTranscript() -> None` + +**責務:** マイク音声認識の一時停止 + +**処理:** `self.mic_audio_recorder.pause()` + +##### `changeMicTranscriptStatus() -> None` + +**責務:** VRChat のマイクミュート状態に応じて音声認識を制御 + +**処理:** +```python +if config.VRC_MIC_MUTE_SYNC is True: + match self.mic_mute_status: + case True: + self.pauseMicTranscript() + case False: + self.resumeMicTranscript() + case None: + self.resumeMicTranscript() # 不明な場合は一時停止しない +else: + self.resumeMicTranscript() +``` + +##### `stopMicTranscript() -> None` + +**責務:** マイク音声認識の停止とリソース解放 + +**処理:** +1. 文字起こしスレッドの停止 +2. レコーダーの再開(一時停止中の場合)と停止 +3. インスタンスの破棄 + +**VRAMエラー検出:** + +##### `detectVRAMError(error: Exception) -> Tuple[bool, Optional[str]]` + +**責務:** VRAM不足エラーの検出 + +**処理:** +```python +error_str = str(error) +if isinstance(error, ValueError) and len(error.args) > 0 and error.args[0] == "VRAM_OUT_OF_MEMORY": + return True, error_str +if "CUDA out of memory" in error_str or "CUBLAS_STATUS_ALLOC_FAILED" in error_str: + return True, error_str +return False, None +``` + +**使用箇所:** +- 翻訳実行時 +- 音声認識開始時 + +#### スピーカー音声認識 + +以下のメソッドはマイク音声認識と同様の構造: +- `startSpeakerTranscript(fnc)` +- `stopSpeakerTranscript()` + +**相違点:** +- `speaker=True` で AudioTranscriber を初期化 +- `SelectedSpeakerEnergyAndAudioRecorder` を使用 + +#### エネルギーレベル監視 + +##### `startCheckMicEnergy(fnc: Optional[Callable[[float], None]] = None) -> None` + +**責務:** マイクの音量レベル監視の開始 + +**処理:** +1. コールバック関数を設定: `self.check_mic_energy_fnc = fnc` +2. マイクデバイスを取得 +3. エネルギーレコーダーを初期化: + ```python + mic_energy_queue = Queue() + self.mic_energy_recorder = SelectedMicEnergyRecorder(mic_device) + self.mic_energy_recorder.recordIntoQueue(mic_energy_queue) + ``` +4. エネルギー送信スレッドを起動: + ```python + def sendMicEnergy(): + if not mic_energy_queue.empty(): + energy = mic_energy_queue.get() + self.check_mic_energy_fnc(energy) + sleep(0.01) + + self.mic_energy_plot_progressbar = threadFnc(sendMicEnergy) + self.mic_energy_plot_progressbar.start() + ``` + +##### `stopCheckMicEnergy() -> None` +エネルギー監視の停止とリソース解放。 + +**対応するスピーカー用メソッド:** +- `startCheckSpeakerEnergy(fnc)` +- `stopCheckSpeakerEnergy()` + +--- + +### 6. オーバーレイ機能 + +#### 画像生成 + +##### `createOverlayImageSmallLog(message, your_language, translation, target_language) -> object` + +**責務:** 小さなログウィンドウ用の画像生成 + +**パラメータ:** +- `message`: 元のメッセージ(オプション) +- `your_language`: 元の言語(オプション) +- `translation`: 翻訳結果のリスト +- `target_language`: 翻訳先言語の辞書(オプション) + +**処理:** +```python +target_language_list = [] +if isinstance(target_language, dict): + target_language_list = list(target_language.values()) +return self.overlay_image.createOverlayImageSmallLog( + message, your_language, translation, target_language_list +) +``` + +##### `createOverlayImageSmallMessage(message: str) -> object` + +**責務:** 小さなメッセージウィンドウ用の画像生成(単一言語) + +**処理:** +```python +ui_language = config.UI_LANGUAGE +convert_languages = { + "en": "Default", + "jp": "Japanese", + "ko": "Korean", + "zh-Hans": "Chinese Simplified", + "zh-Hant": "Chinese Traditional", +} +language = convert_languages.get(ui_language, "Default") +return self.overlay_image.createOverlayImageSmallLog(message, language) +``` + +##### `createOverlayImageLargeLog(message_type, message, your_language, translation, target_language=None) -> object` + +**責務:** 大きなログウィンドウ用の画像生成 + +**パラメータ:** +- `message_type`: "send" または "received" + +**処理:** `createOverlayImageSmallLog()` と同様 + +##### `createOverlayImageLargeMessage(message: str) -> object` + +**責務:** 大きなメッセージウィンドウ用の画像生成 + +**特殊処理:** +```python +overlay_image = OverlayImage(config.PATH_LOCAL) +for _ in range(2): + # 2回繰り返して画像を生成(理由は不明、バグ修正のため?) + overlay_image.createOverlayImageLargeLog("send", message, language) +return overlay_image.createOverlayImageLargeLog("send", message, language) +``` + +#### 表示制御 + +##### `clearOverlayImageSmallLog() -> None` +小さなログウィンドウをクリア。 + +##### `updateOverlaySmallLog(img: object) -> None` +小さなログウィンドウの画像を更新。 + +##### `updateOverlaySmallLogSettings() -> None` + +**責務:** 小さなログウィンドウの設定更新 + +**処理:** 設定の変更を検出し、オーバーレイに反映: +```python +size = "small" +if (self.overlay.settings[size]["x_pos"] != config.OVERLAY_SMALL_LOG_SETTINGS["x_pos"] or + # ... 他の設定項目 ...): + self.overlay.updateSettings(config.OVERLAY_SMALL_LOG_SETTINGS, size) +``` + +**設定項目:** +- 位置(x_pos, y_pos, z_pos) +- 回転(x_rotation, y_rotation, z_rotation) +- トラッカー(tracker) +- 表示時間(display_duration) +- フェードアウト時間(fadeout_duration) +- 透明度(opacity) +- UIスケーリング(ui_scaling) + +##### `clearOverlayImageLargeLog() -> None` +大きなログウィンドウをクリア。 + +##### `updateOverlayLargeLog(img: object) -> None` +大きなログウィンドウの画像を更新。 + +##### `updateOverlayLargeLogSettings() -> None` +大きなログウィンドウの設定更新(`updateOverlaySmallLogSettings()` と同様)。 + +#### オーバーレイシステム制御 + +##### `startOverlay() -> None` +オーバーレイシステムを起動(OpenVR の初期化)。 + +##### `shutdownOverlay() -> None` +オーバーレイシステムを終了(リソース解放)。 + +--- + +### 7. OSC 通信機能 + +#### 設定 + +##### `setOscIpAddress(ip_address: str) -> None` +VRChat への送信先 IP アドレスを設定。 + +##### `setOscPort(port: int) -> None` +OSC ポート番号を設定。 + +#### メッセージ送信 + +##### `oscStartSendTyping() -> None` +タイピング中の通知を送信(VRChat のチャットボックスにインジケーターが表示される)。 + +##### `oscStopSendTyping() -> None` +タイピング終了の通知を送信。 + +##### `oscSendMessage(message: str) -> None` + +**責務:** VRChat へメッセージを送信 + +**パラメータ:** +- `message`: 送信するテキスト + +**処理:** +```python +self.osc_handler.sendMessage( + message=message, + notification=config.NOTIFICATION_VRC_SFX +) +``` + +#### OSC 受信 + +##### `setMuteSelfStatus() -> None` +VRChat の現在のマイクミュート状態を取得。 + +##### `startReceiveOSC() -> None` + +**責務:** OSC パラメータの受信開始 + +**処理:** +```python +def changeHandlerMute(address, osc_arguments): + if config.ENABLE_TRANSCRIPTION_SEND is True: + self.mic_mute_status = osc_arguments[0] + self.changeMicTranscriptStatus() + +dict_filter_and_target = { + self.osc_handler.osc_parameter_muteself: changeHandlerMute, +} +self.osc_handler.setDictFilterAndTarget(dict_filter_and_target) +self.osc_handler.receiveOscParameters() +``` + +**監視パラメータ:** +- `/avatar/parameters/MuteSelf`: マイクミュート状態 + +##### `stopReceiveOSC() -> None` +OSC 受信を停止。 + +##### `getIsOscQueryEnabled() -> bool` +OSC Query 機能が有効かチェック。 + +--- + +### 8. 音訳機能 + +#### 音訳システム制御 + +##### `startTransliteration() -> None` +音訳システムを起動(`Transliterator` インスタンスを生成)。 + +##### `stopTransliteration() -> None` +音訳システムを停止(インスタンスを破棄)。 + +#### 音訳実行 + +##### `convertMessageToTransliteration(message, hiragana=True, romaji=True) -> list` + +**責務:** メッセージをひらがな・ローマ字に変換 + +**パラメータ:** +- `message`: 変換するテキスト +- `hiragana`: ひらがなを含める +- `romaji`: ローマ字を含める + +**処理:** +```python +if hiragana is False and romaji is False: + return [] + +keys_to_keep = {"orig"} +if hiragana: + keys_to_keep.add("hira") +if romaji: + keys_to_keep.add("hepburn") + +if self.transliterator is None: + self.startTransliteration() + +data_list = self.transliterator.analyze(message, use_macron=False) +filtered_list = [ + {key: value for key, value in item.items() if key in keys_to_keep} + for item in data_list +] +return filtered_list +``` + +**戻り値の例:** +```python +[ + {"orig": "こんにちは", "hira": "こんにちは", "hepburn": "konnichiwa"}, + {"orig": "世界", "hira": "せかい", "hepburn": "sekai"} +] +``` + +--- + +### 9. キーワードフィルター + +#### フィルター管理 + +##### `resetKeywordProcessor() -> None` +キーワードプロセッサをリセット(すべてのキーワードを削除)。 + +##### `addKeywords() -> None` +禁止ワードをキーワードプロセッサに追加。 + +**処理:** +```python +for f in config.MIC_WORD_FILTER: + self.keyword_processor.add_keyword(f) +``` + +#### フィルタリング + +##### `checkKeywords(message: str) -> bool` +メッセージに禁止ワードが含まれているかチェック。 + +**戻り値:** 禁止ワードが含まれている場合 True + +**実装:** +```python +return len(self.keyword_processor.extract_keywords(message)) != 0 +``` + +--- + +### 10. 重複検出 + +##### `detectRepeatSendMessage(message: str) -> bool` + +**責務:** 送信メッセージの重複検出 + +**処理:** +```python +repeat_flag = False +if self.previous_send_message == message: + repeat_flag = True +self.previous_send_message = message +return repeat_flag +``` + +##### `detectRepeatReceiveMessage(message: str) -> bool` +受信メッセージの重複検出(`detectRepeatSendMessage()` と同様)。 + +--- + +### 11. デバイス管理 + +#### マイクデバイス + +##### `getListMicHost() -> list` + +**責務:** マイクホストのリスト取得 + +**戻り値:** ["MME", "WASAPI", ...] 等 + +**処理:** +```python +try: + dm = device_manager.getMicDevices() + result = [host for host in dm.keys()] +except Exception: + errorLogging() + result = [] +return result +``` + +##### `getMicDefaultDevice() -> str` +選択されたホストのデフォルトマイクデバイス名を取得。 + +##### `getListMicDevice() -> list` +選択されたホストのマイクデバイス一覧を取得。 + +#### スピーカーデバイス + +##### `getListSpeakerDevice() -> list` +スピーカーデバイス一覧を取得。 + +**処理:** +```python +try: + sd = device_manager.getSpeakerDevices() + result = [device["name"] for device in sd] +except Exception: + errorLogging() + result = ["NoDevice"] +return result +``` + +--- + +### 12. 言語管理 + +##### `getListLanguageAndCountry() -> list` + +**責務:** 音声認識と翻訳の両方をサポートする言語・国のリスト取得 + +**処理:** +1. `transcription_lang` から音声認識サポート言語を取得 +2. `translation_lang` から翻訳サポート言語を取得 +3. 両方でサポートされている言語を抽出 +4. 各言語の国バリエーションを列挙 + +**戻り値の例:** +```python +[ + {"language": "en", "country": "US"}, + {"language": "en", "country": "UK"}, + {"language": "ja", "country": "JP"}, + # ... +] +``` + +##### `findTranslationEngines(source_lang, target_lang, engines_status) -> list` + +**責務:** 指定された言語ペアをサポートする翻訳エンジンの検索 + +**パラメータ:** +- `source_lang`: 元言語の辞書(複数の言語が有効化されている可能性) +- `target_lang`: 翻訳先言語の辞書 +- `engines_status`: 各エンジンの有効/無効状態 + +**処理:** +```python +selectable_engines = [key for key, value in engines_status.items() if value is True] +compatible_engines = [] +for engine in list(translation_lang.keys()): + languages = translation_lang.get(engine, {}).get("source", {}) + source_langs = [e["language"] for e in list(source_lang.values()) if e["enable"] is True] + target_langs = [e["language"] for e in list(target_lang.values()) if e["enable"] is True] + language_list = list(languages.keys()) + + if all(e in language_list for e in source_langs) and all(e in language_list for e in target_langs): + if engine in selectable_engines: + compatible_engines.append(engine) + +return compatible_engines +``` + +--- + +### 13. ロギング + +##### `startLogger() -> None` + +**責務:** ファイルロギングの開始 + +**処理:** +```python +os_makedirs(config.PATH_LOGS, exist_ok=True) +file_name = os_path.join(config.PATH_LOGS, f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log") +self.logger = setupLogger("log", file_name) +self.logger.disabled = False +``` + +**ログファイル名の例:** `2023-10-13_15-30-45.log` + +##### `stopLogger() -> None` +ファイルロギングの停止。 + +--- + +### 14. ソフトウェアアップデート + +##### `checkSoftwareUpdated() -> dict` + +**責務:** 最新バージョンの確認 + +**処理:** +```python +update_flag = False +version = "" +try: + # GitHub API 等から最新バージョン情報を取得 + # packaging.version.parse でバージョン比較 +except Exception: + errorLogging() +return { + "is_update_available": update_flag, + "new_version": version, +} +``` + +##### `updateSoftware() -> None` + +**責務:** 通常版のアップデート実行 + +**処理:** +1. アップデーターをダウンロード(最大5回リトライ) +2. `Popen()` でアップデーターを起動 +3. 現在のプロセスを終了 + +##### `updateCudaSoftware() -> None` +CUDA版のアップデート実行(`--cuda` オプション付きでアップデーターを起動)。 + +--- + +### 15. Watchdog 機能 + +##### `startWatchdog() -> None` + +**責務:** Watchdog 監視スレッドの起動 + +**処理:** +```python +self.th_watchdog = threadFnc(self.watchdog.start) +self.th_watchdog.daemon = True +self.th_watchdog.start() +``` + +##### `feedWatchdog() -> None` +Watchdog にハートビート信号を送信(タイムアウトをリセット)。 + +##### `setWatchdogCallback(callback: Callable) -> None` +Watchdog タイムアウト時のコールバック関数を設定。 + +##### `stopWatchdog() -> None` +Watchdog を停止し、スレッドの終了を待機。 + +--- + +### 16. WebSocket サーバー + +#### サーバー制御 + +##### `startWebSocketServer(host: str, port: int) -> None` + +**責務:** WebSocket サーバーの起動 + +**処理:** +1. 既に起動中なら何もしない +2. `websocket_server_loop = True` に設定 +3. 別スレッドで asyncio イベントループを実行: + ```python + async def WebSocketServerMain(): + self.websocket_server = WebSocketServer(host, port) + self.websocket_server_alive = True + await self.websocket_server.start() + # ループ終了まで待機 + self.websocket_server_alive = False + + self.th_websocket_server = Thread(target=lambda: asyncio.run(WebSocketServerMain())) + self.th_websocket_server.daemon = True + self.th_websocket_server.start() + ``` + +##### `stopWebSocketServer() -> None` + +**責務:** WebSocket サーバーの停止 + +**処理:** +1. `websocket_server_loop = False` に設定 +2. サーバーの停止を要求 +3. スレッドの終了を待機(タイムアウト付き) + +**エラーハンドリング:** +```python +try: + # サーバー停止処理 +except Exception: + errorLogging() +finally: + self.th_websocket_server = None + self.websocket_server = None + self.websocket_server_alive = False +``` + +##### `checkWebSocketServerAlive() -> bool` +WebSocket サーバーの稼働状態を確認。 + +#### メッセージ送信 + +##### `websocketSendMessage(message_dict: dict) -> bool` + +**責務:** すべての接続クライアントにメッセージをブロードキャスト + +**パラメータ:** +- `message_dict`: 送信する辞書(JSON にシリアライズされる) + +**処理:** +```python +if not self.websocket_server_alive or not self.websocket_server: + return False +try: + self.websocket_server.broadcast(message_dict) + return True +except Exception: + errorLogging() + return False +``` + +--- + +## 依存関係 + +### 外部ライブラリ + +```python +from subprocess import Popen +from os import makedirs as os_makedirs +from os import path as os_path +from datetime import datetime +from time import sleep +from queue import Queue +from threading import Thread +from requests import get as requests_get +from typing import Callable, Optional, cast +from packaging.version import parse +from flashtext import KeywordProcessor +``` + +### 内部モジュール + +```python +from device_manager import device_manager +from config import config +from models.translation.translation_translator import Translator +from models.osc.osc import OSCHandler +from models.transcription.transcription_recorder import SelectedMicEnergyAndAudioRecorder, SelectedSpeakerEnergyAndAudioRecorder +from models.transcription.transcription_recorder import SelectedMicEnergyRecorder, SelectedSpeakerEnergyRecorder +from models.transcription.transcription_transcriber import AudioTranscriber +from models.translation.translation_languages import translation_lang +from models.transcription.transcription_languages import transcription_lang +from models.translation.translation_utils import checkCTranslate2Weight, downloadCTranslate2Weight, downloadCTranslate2Tokenizer +from models.transcription.transcription_whisper import checkWhisperWeight, downloadWhisperWeight +from models.transliteration.transliteration_transliterator import Transliterator +from models.overlay.overlay import Overlay +from models.overlay.overlay_image import OverlayImage +from models.watchdog.watchdog import Watchdog +from models.websocket.websocket_server import WebSocketServer +from utils import errorLogging, setupLogger +``` + +--- + +## スレッド構成 + +### メインスレッド +- アプリケーションのメインループ(`mainloop.py` が管理) + +### Model 管理のスレッド + +#### 音声認識スレッド +- `mic_print_transcript`: マイク音声認識結果の処理 +- `speaker_print_transcript`: スピーカー音声認識結果の処理 + +#### エネルギー監視スレッド +- `mic_energy_plot_progressbar`: マイクの音量レベル監視 +- `speaker_energy_plot_progressbar`: スピーカーの音量レベル監視 + +#### その他のスレッド +- `th_watchdog`: Watchdog 監視 +- `th_websocket_server`: WebSocket サーバー(asyncio イベントループ) + +### サブシステム管理のスレッド +- `device_manager.th_monitoring`: デバイス変更監視 +- `mic_audio_recorder.th_record`: マイク音声録音 +- `speaker_audio_recorder.th_record`: スピーカー音声録音 +- `osc_handler.th_receive`: OSC パラメータ受信 + +--- + +## エラーハンドリング + +### VRAM不足エラー + +**検出:** +```python +is_vram_error, error_message = self.detectVRAMError(e) +``` + +**対応:** +1. エラーを `ValueError("VRAM_OUT_OF_MEMORY")` として送出 +2. Controller 側でキャッチして機能を無効化 +3. ユーザーに通知 + +### デバイスアクセスエラー + +**検出:** +- デバイスが見つからない場合: `NoDevice` +- アクセス失敗時: コールバックに `False` を渡す + +**対応:** +1. エラーをログに記録 +2. Controller に通知 +3. 処理を継続(他の機能に影響なし) + +### ネットワークエラー + +**検出:** +- 翻訳API呼び出し失敗 +- モデルウェイトダウンロード失敗 + +**対応:** +1. リトライロジック(翻訳の場合) +2. フォールバック(CTranslate2 への切り替え) +3. エラー通知 + +--- + +## パフォーマンス最適化 + +### 1. 遅延初期化 + +重い初期化処理を `init()` に分離し、必要になるまで実行しない。 + +**利点:** +- アプリケーションの起動時間を短縮 +- 未使用の機能のリソースを消費しない + +### 2. シングルトンパターン + +Model クラスはアプリケーション全体で1つのインスタンスのみ存在。 + +**利点:** +- メモリ使用量の削減 +- 状態の一貫性 + +### 3. スレッドによる並列処理 + +音声認識、エネルギー監視、WebSocket サーバーなど、ブロッキング処理を別スレッドで実行。 + +**利点:** +- UI のレスポンス性向上 +- 複数機能の同時実行 + +--- + +## テストシナリオ + +### 1. 初期化テスト + +**ケース:** +- 初回初期化 +- 既に初期化済みの場合 +- 初期化失敗時 + +**確認項目:** +- `_inited` フラグが正しく設定されているか +- すべてのサブシステムが初期化されているか +- エラーが適切にログされているか + +### 2. 音声認識テスト + +**ケース:** +- デバイスがない場合 +- 音声認識開始・停止・一時停止・再開 +- VRAMエラーの発生 + +**確認項目:** +- コールバックが正しく呼び出されているか +- スレッドが適切に管理されているか +- エラーが検出されているか + +### 3. 翻訳テスト + +**ケース:** +- 単一言語翻訳 +- 複数言語翻訳 +- 翻訳エンジンの切り替え +- API エラー + +**確認項目:** +- 翻訳結果が正しいか +- エラー時のフォールバックが動作するか + +### 4. オーバーレイテスト + +**ケース:** +- 画像生成 +- 設定更新 +- オーバーレイの起動・停止 + +**確認項目:** +- 画像が正しく生成されるか +- 設定変更が反映されるか + +--- + +## 制限事項 + +### 1. シングルトンの制約 + +**問題:** テストやマルチインスタンスが困難 + +**影響:** +- ユニットテストでモックが難しい +- 複数の VRChat インスタンスへの対応が不可能 + +### 2. グローバル状態依存 + +**問題:** `config` モジュールへの強い依存 + +**影響:** +- テスタビリティの低下 +- 設定変更の追跡が困難 + +### 3. エラーハンドリングの不完全性 + +**問題:** 一部のエラーは握りつぶされる + +**影響:** +- デバッグが困難 +- ユーザーへの適切なエラー通知が不足 + +### 4. スレッドの管理複雑性 + +**問題:** 多数のスレッドとその状態管理 + +**影響:** +- デッドロックのリスク +- リソースリークの可能性 + +--- + +## 今後の改善案 + +### 1. 依存性注入(DI)の導入 + +```python +class Model: + def __init__(self, config, device_manager, translator, ...): + self.config = config + self.device_manager = device_manager + self.translator = translator + # ... +``` + +**利点:** +- テスタビリティの向上 +- モジュール間の疎結合 + +### 2. 非同期化(asyncio) + +```python +async def startMicTranscript(self, callback): + async for result in self.mic_transcriber.transcribe(): + await callback(result) +``` + +**利点:** +- スレッド管理の簡素化 +- パフォーマンスの向上 + +### 3. イベント駆動アーキテクチャ + +```python +class Model: + def __init__(self): + self.event_bus = EventBus() + + def on_transcription_result(self, result): + self.event_bus.emit("transcription_result", result) +``` + +**利点:** +- モジュール間の疎結合 +- 拡張性の向上 + +### 4. エラーハンドリングの統一 + +```python +class ModelError(Exception): + pass + +class VRAMError(ModelError): + pass + +class DeviceError(ModelError): + pass +``` + +**利点:** +- エラーの分類と処理の統一 +- エラー情報の追跡 + +--- + +## 関連ファイル + +- **controller.py** - ビジネスロジック制御レイヤー +- **config.py** - 設定管理 +- **device_manager.py** - デバイス監視・自動選択 +- **mainloop.py** - 通信レイヤー +- **utils.py** - ログとユーティリティ関数 +- **models/** - サブシステムの実装 + +--- + +## まとめ + +`model.py` は VRCT のすべてのサブシステムへの統一されたファサードインターフェースを提供し、音声認識、翻訳、オーバーレイ、OSC通信、WebSocket通信など、複雑な機能を簡潔なAPIで公開する。シングルトンパターンと遅延初期化により、リソースの効率的な利用を実現している。スレッドを活用した並列処理により、複数の機能を同時に実行しながらUIのレスポンス性を維持している。VRAMエラーやデバイスエラーに対する適切なハンドリングにより、ユーザーエクスペリエンスを向上させている。 diff --git a/src-python/docs/modules/config.md b/src-python/docs/modules/config.md deleted file mode 100644 index cdd83b8c..00000000 --- a/src-python/docs/modules/config.md +++ /dev/null @@ -1,212 +0,0 @@ -# config.py クラス仕様書 - -目的: アプリケーションの全設定を集中管理するシングルトン `config`(クラス名: `Config`、インスタンス: `config`)。 - -特徴: -- JSON シリアライズ対象のプロパティには `@json_serializable('KEY_NAME')` デコレータが付いており、`load_config()` / `saveConfig()` によって `config.json` に永続化されます。 -- プロパティは「読み取り専用 (Read Only)」と「読み書き (Read/Write)」に分類されます。読み書き可能なプロパティはバリデーション処理とともに setter が用意されています。 -- 設定は内部的に `_config_data` に保持され、`saveConfig()` はデバウンス(2秒)でファイルへ書き込みます。即時書き込みオプションも可能です(saveConfig(..., immediate_save=True))。 - -## 生成とライフサイクル -- `Config()` はシングルトン(__new__ で単一インスタンスを生成)。 -- `init_config()` でデフォルト値を初期化し、その後 `load_config()` が `config.json` を読み込んで既存値を適用します。 - -## 主要プロパティ一覧(型・デフォルト・説明) - -注: 下は `config.py` の初期化ロジックに基づく抜粋です。`json_serializable` が付与されたキーは `config.json` に書き出されます。 - -- Read only - - `VERSION` (str) = "3.2.2" - - `PATH_LOCAL` (str) = フォロー実行ファイルのディレクトリか、ソースの __file__ のディレクトリ - - `PATH_CONFIG` (str) = PATH_LOCAL/config.json - - `PATH_LOGS` (str) = PATH_LOCAL/logs - - `GITHUB_URL`, `UPDATER_URL`, `BOOTH_URL`, `DOCUMENTS_URL`, `DEEPL_AUTH_KEY_PAGE_URL` (str) - - `MAX_MIC_THRESHOLD` (int) = 2000 - - `MAX_SPEAKER_THRESHOLD` (int) = 4000 - - `WATCHDOG_TIMEOUT` (int) = 60 - - `WATCHDOG_INTERVAL` (int) = 20 - - `SELECTABLE_*` 系: 各種選択肢のリスト/イテレータ(モデルの重みや言語、UI 言語等)。 - -- Read/Write(主な項目) - - `SEND_MESSAGE_FORMAT_PARTS` (dict) = デフォルトで message/translation/translation_first 等を含むフォーマット定義。json_serializable キー: 'SEND_MESSAGE_FORMAT_PARTS' - - `RECEIVED_MESSAGE_FORMAT_PARTS` (dict) - - `ENABLE_TRANSLATION` (bool) = False - - `ENABLE_TRANSCRIPTION_SEND` (bool) = False - - `ENABLE_TRANSCRIPTION_RECEIVE` (bool) = False - - `ENABLE_FOREGROUND` (bool) = False - - `ENABLE_CHECK_ENERGY_SEND` (bool) = False - - `ENABLE_CHECK_ENERGY_RECEIVE` (bool) = False - - `SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT` (dict) = {: False, ...} - - `SELECTABLE_WHISPER_WEIGHT_TYPE_DICT` (dict) - - `SELECTABLE_TRANSLATION_ENGINE_STATUS` (dict) - - `SELECTABLE_TRANSCRIPTION_ENGINE_STATUS` (dict) - - `SELECTED_TAB_NO` (str) = "1" (json_serializable: 'SELECTED_TAB_NO') - - `SELECTED_TRANSLATION_ENGINES` (dict) = tab毎に選択 ('CTranslate2' 等) - - `SELECTED_YOUR_LANGUAGES`, `SELECTED_TARGET_LANGUAGES` (dict) = 翻訳元/先の選択と有効フラグ - - `SELECTED_TRANSCRIPTION_ENGINE` (str) = 'Google' - - `CONVERT_MESSAGE_TO_ROMAJI` / `CONVERT_MESSAGE_TO_HIRAGANA` (bool) - - UI 設定: `TRANSPARENCY` (int), `UI_SCALING` (int), `TEXTBOX_UI_SCALING` (int), `MESSAGE_BOX_RATIO` (int) - - `SEND_MESSAGE_BUTTON_TYPE` (str) = 'show'(候補は SEND_MESSAGE_BUTTON_TYPE_LIST) - - `SHOW_RESEND_BUTTON` (bool) - - `FONT_FAMILY` (str) = 'Yu Gothic UI' - - `UI_LANGUAGE` (str) = 'en'(候補は SELECTABLE_UI_LANGUAGE_LIST) - - `MAIN_WINDOW_GEOMETRY` (dict) = {x_pos, y_pos, width, height} - - マイク/スピーカー関係: `AUTO_MIC_SELECT`, `SELECTED_MIC_HOST`, `SELECTED_MIC_DEVICE`, `MIC_THRESHOLD`, `MIC_AUTOMATIC_THRESHOLD`, `MIC_RECORD_TIMEOUT`, `MIC_PHRASE_TIMEOUT`, `MIC_MAX_PHRASES`, `MIC_WORD_FILTER`, `HOTKEYS` 等 - - `PLUGINS_STATUS` (list) - - マイク転写確度閾値: `MIC_AVG_LOGPROB`, `MIC_NO_SPEECH_PROB` - - スピーカー関連(同様の項目): `AUTO_SPEAKER_SELECT`, `SELECTED_SPEAKER_DEVICE`, `SPEAKER_THRESHOLD`, ... - - `OSC_IP_ADDRESS` (str) = '127.0.0.1' - - `OSC_PORT` (int) = 9000 - - `AUTH_KEYS` (dict) = {'DeepL_API': None} - - `USE_EXCLUDE_WORDS` (bool) = True - - 計算デバイス選択: `SELECTED_TRANSLATION_COMPUTE_DEVICE` / `SELECTED_TRANSCRIPTION_COMPUTE_DEVICE`(`getComputeDeviceList()` に基づくデバイス辞書) - - 重み/計算タイプ: `CTRANSLATE2_WEIGHT_TYPE`, `WHISPER_WEIGHT_TYPE`, `SELECTED_TRANSLATION_COMPUTE_TYPE`, `SELECTED_TRANSCRIPTION_COMPUTE_TYPE` - - オーバーレイ設定: `OVERLAY_SMALL_LOG`, `OVERLAY_SMALL_LOG_SETTINGS`, `OVERLAY_LARGE_LOG`, `OVERLAY_LARGE_LOG_SETTINGS`, `OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES` 等 - - VRC/ログ/WebSocket: `SEND_MESSAGE_TO_VRC`, `SEND_RECEIVED_MESSAGE_TO_VRC`, `LOGGER_FEATURE`, `VRC_MIC_MUTE_SYNC`, `NOTIFICATION_VRC_SFX`, `WEBSOCKET_SERVER`, `WEBSOCKET_HOST`, `WEBSOCKET_PORT` - -# config.py — 完全上書きドキュメント - -目的: アプリケーションの全設定を集中管理するシングルトン `config`(クラス名: `Config`、インスタンス: `config`)。 - -特徴: -- JSON シリアライズ対象のプロパティには `@json_serializable('KEY_NAME')` デコレータが付いており、`load_config()` / `saveConfig()` によって `config.json` に永続化されます。 -- プロパティは「読み取り専用 (Read Only)」と「読み書き (Read/Write)」に分類されます。読み書き可能なプロパティはバリデーション処理とともに setter が用意されています。 -- 設定は内部的に `_config_data` に保持され、`saveConfig()` はデバウンス(2秒)でファイルへ書き込みます。即時書き込みオプションも可能です(saveConfig(..., immediate_save=True))。 - -## 生成とライフサイクル -- `Config()` はシングルトン(__new__ で単一インスタンスを生成)。 -- `init_config()` でデフォルト値を初期化し、その後 `load_config()` が `config.json` を読み込んで既存値を適用します。 - -## 主要プロパティ一覧(型・デフォルト・説明) - -注: 下は `config.py` の初期化ロジックに基づく抜粋です。`json_serializable` が付与されたキーは `config.json` に書き出されます。 - -- Read only - - `VERSION` (str) = "3.2.2" - - `PATH_LOCAL` (str) = フォロー実行ファイルのディレクトリか、ソースの __file__ のディレクトリ - - `PATH_CONFIG` (str) = PATH_LOCAL/config.json - - `PATH_LOGS` (str) = PATH_LOCAL/logs - - `GITHUB_URL`, `UPDATER_URL`, `BOOTH_URL`, `DOCUMENTS_URL`, `DEEPL_AUTH_KEY_PAGE_URL` (str) - - `MAX_MIC_THRESHOLD` (int) = 2000 - - `MAX_SPEAKER_THRESHOLD` (int) = 4000 - - `WATCHDOG_TIMEOUT` (int) = 60 - - `WATCHDOG_INTERVAL` (int) = 20 - - `SELECTABLE_*` 系: 各種選択肢のリスト/イテレータ(モデルの重みや言語、UI 言語等)。 - -- Read/Write(主な項目) - - `SEND_MESSAGE_FORMAT_PARTS` (dict) = デフォルトで message/translation/translation_first 等を含むフォーマット定義。json_serializable キー: 'SEND_MESSAGE_FORMAT_PARTS' - - `RECEIVED_MESSAGE_FORMAT_PARTS` (dict) - - `ENABLE_TRANSLATION` (bool) = False - - `ENABLE_TRANSCRIPTION_SEND` (bool) = False - - `ENABLE_TRANSCRIPTION_RECEIVE` (bool) = False - - `ENABLE_FOREGROUND` (bool) = False - - `ENABLE_CHECK_ENERGY_SEND` (bool) = False - - `ENABLE_CHECK_ENERGY_RECEIVE` (bool) = False - - `SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT` (dict) = {: False, ...} - - `SELECTABLE_WHISPER_WEIGHT_TYPE_DICT` (dict) - - `SELECTABLE_TRANSLATION_ENGINE_STATUS` (dict) - - `SELECTABLE_TRANSCRIPTION_ENGINE_STATUS` (dict) - - `SELECTED_TAB_NO` (str) = "1" (json_serializable: 'SELECTED_TAB_NO') - - `SELECTED_TRANSLATION_ENGINES` (dict) = tab毎に選択 ('CTranslate2' 等) - - `SELECTED_YOUR_LANGUAGES`, `SELECTED_TARGET_LANGUAGES` (dict) = 翻訳元/先の選択と有効フラグ - - `SELECTED_TRANSCRIPTION_ENGINE` (str) = 'Google' - - `CONVERT_MESSAGE_TO_ROMAJI` / `CONVERT_MESSAGE_TO_HIRAGANA` (bool) - - UI 設定: `TRANSPARENCY` (int), `UI_SCALING` (int), `TEXTBOX_UI_SCALING` (int), `MESSAGE_BOX_RATIO` (int) - - `SEND_MESSAGE_BUTTON_TYPE` (str) = 'show'(候補は SEND_MESSAGE_BUTTON_TYPE_LIST) - - `SHOW_RESEND_BUTTON` (bool) - - `FONT_FAMILY` (str) = 'Yu Gothic UI' - - `UI_LANGUAGE` (str) = 'en'(候補は SELECTABLE_UI_LANGUAGE_LIST) - - `MAIN_WINDOW_GEOMETRY` (dict) = {x_pos, y_pos, width, height} - - マイク/スピーカー関係: `AUTO_MIC_SELECT`, `SELECTED_MIC_HOST`, `SELECTED_MIC_DEVICE`, `MIC_THRESHOLD`, `MIC_AUTOMATIC_THRESHOLD`, `MIC_RECORD_TIMEOUT`, `MIC_PHRASE_TIMEOUT`, `MIC_MAX_PHRASES`, `MIC_WORD_FILTER`, `HOTKEYS` 等 - - `PLUGINS_STATUS` (list) - - マイク転写確度閾値: `MIC_AVG_LOGPROB`, `MIC_NO_SPEECH_PROB` - - スピーカー関連(同様の項目): `AUTO_SPEAKER_SELECT`, `SELECTED_SPEAKER_DEVICE`, `SPEAKER_THRESHOLD`, ... - - `OSC_IP_ADDRESS` (str) = '127.0.0.1' - - `OSC_PORT` (int) = 9000 - - `AUTH_KEYS` (dict) = {'DeepL_API': None} - - `USE_EXCLUDE_WORDS` (bool) = True - - 計算デバイス選択: `SELECTED_TRANSLATION_COMPUTE_DEVICE` / `SELECTED_TRANSCRIPTION_COMPUTE_DEVICE`(`getComputeDeviceList()` に基づくデバイス辞書) - - 重み/計算タイプ: `CTRANSLATE2_WEIGHT_TYPE`, `WHISPER_WEIGHT_TYPE`, `SELECTED_TRANSLATION_COMPUTE_TYPE`, `SELECTED_TRANSCRIPTION_COMPUTE_TYPE` - - オーバーレイ設定: `OVERLAY_SMALL_LOG`, `OVERLAY_SMALL_LOG_SETTINGS`, `OVERLAY_LARGE_LOG`, `OVERLAY_LARGE_LOG_SETTINGS`, `OVERLAY_SHOW_ONLY_TRANSLATED_MESSAGES` 等 - - VRC/ログ/WebSocket: `SEND_MESSAGE_TO_VRC`, `SEND_RECEIVED_MESSAGE_TO_VRC`, `LOGGER_FEATURE`, `VRC_MIC_MUTE_SYNC`, `NOTIFICATION_VRC_SFX`, `WEBSOCKET_SERVER`, `WEBSOCKET_HOST`, `WEBSOCKET_PORT` - -## セッタのバリデーション -- 多くの setter は型チェックと候補値チェック(リストや辞書のキー整合性)を行います。例: - - `SELECTED_MIC_DEVICE` は `device_manager.getMicDevices()` の一覧に存在する名前であること。 - - `SELECTED_TRANSLATION_COMPUTE_TYPE` は `SELECTED_TRANSLATION_COMPUTE_DEVICE['compute_types']` に含まれる文字列であること。 - - UI 関連の集合は `SELECTABLE_UI_LANGUAGE_LIST` などの一覧に従う。 - -## 永続化の詳細 -- `load_config()` は `config.json` が存在し、かつ中身がある場合に読み込みを試み、ファイル中のキーを `setattr(self, key, value)` して既存の setter を利用して適用します。 -- 読み込み後、`json_serializable` 指定された全キーを `_config_data` に書き戻し、ファイルを上書き(常に書く)。 - -## 使い方の例 - -以下は `config` を使った典型的なコード例です。 - -```python -from config import config - -# 値の参照 -print('App version:', config.VERSION) -print('Current UI language:', config.UI_LANGUAGE) - -# 値の更新(setter を通す) -config.UI_LANGUAGE = 'ja' -config.SEND_MESSAGE_TO_VRC = False - -# 複雑な dict を設定する例(メッセージフォーマットを上書き) -config.SEND_MESSAGE_FORMAT_PARTS = { - 'message': {'prefix': '[YOU] ', 'suffix': ''}, - 'separator': '\n', - 'translation': {'prefix': '[TR] ', 'separator': '\n', 'suffix': ''}, - 'translation_first': True, -} - -# 即時保存したい場合(即座に config.json を上書き) -config.saveConfig('CUSTOM_SAVE', {'foo': 'bar'}, immediate_save=True) -``` - -## エッジケース / 注意点 -- `load_config()` はファイル値を setter 経由で当てはめるため、ファイルに古いキーや予期しない型があると setter によって無視されることがあります(例: 言語キーが不正の場合)。 -- `saveConfig()` はデバウンスされるため、高頻度の設定変更では複数の変更がまとめて書き込まれます。即時書き込みが必要な操作(重要な鍵の更新など)は `immediate_save=True` を使ってください。 -- `SELECTABLE_*` 系や `*_DICT` 系は初期化時に外部モジュール(翻訳リソース、whisper_models、device_manager 等)から生成されます。これらが利用できない環境ではデフォルトが空になる可能性があります。 - -### 2025-10-13 の変更(device_manager / config に関する挙動改善) - -- `DeviceManager` のシングルトン生成時に軽量 `init()` を実行するようになりました。これにより、モジュールのインポート順序に依存して `config` の `SELECTED_*` が `NoDevice` のままになる問題が軽減されます(監視スレッドは自動起動しません)。 -- `config.init_config()` はこれまで `device_manager._initialized` をチェックしていた箇所を見直し、`device_manager.getDefaultMicDevice()` / `getDefaultSpeakerDevice()` といったアクセサを利用して値を取得するように変更しました。アクセサは必要なら遅延初期化を行うため、`controller` と `config` のトップレベルインポート順に依存しません。 -- 影響: 起動時に PyAudio 等の依存が利用可能であれば、起動中に実機デバイス名が `config` に反映される確率が高くなります。依存がない場合は従来どおり `NoDevice` にフォールバックします。 - -推奨運用: -- `controller.init()` でコールバック登録後、`mainloop` の起動シーケンスで `device_manager.startMonitoring()` を明示的に呼ぶと、起動後もデバイス変更がコールバック経由で確実に届きます(この呼び出しは任意です)。 - -## 推奨改善点(将来的なドキュメント/実装) -- 設定スキーマを JSON Schema で定義し、load 時の検証を明確化すると安全性が向上します。 -- 設定変更イベントを発火する仕組み(observer パターン)を導入すると、Controller/Model 側の再初期化処理をより明確に実装できます。 - ---- - -このファイルは `config.py` の実装に基づいて自動生成的に作成されたドキュメント(overwrite)です。実装の微細な差分は `config.py` を参照してください。 - -## 詳細設計 - -目的: アプリケーションの全設定を保持するシングルトン `config`。 - -ポイント: -- JSON シリアライズ可能な設定値には `@json_serializable` デコレータが付与され、save 操作でファイルへ書き出される。 -- 多数のプロパティが定義され、読み取り専用 (Read Only) と 読み書き (Read/Write) が混在する。 -- 設定項目の例: - - ENABLE_TRANSLATION, ENABLE_TRANSCRIPTION_SEND, ENABLE_TRANSCRIPTION_RECEIVE - - SELECTED_MIC_HOST, SELECTED_MIC_DEVICE, SELECTED_SPEAKER_DEVICE - - SELECTED_TRANSLATION_ENGINES, SELECTED_YOUR_LANGUAGES, SELECTED_TARGET_LANGUAGES - - PATH_LOCAL, PATH_LOGS, VERSION, GITHUB_URL, UPDATER_URL - - SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT / SELECTABLE_WHISPER_WEIGHT_TYPE_DICT - - COMPUTE 関連: SELECTABLE_COMPUTE_DEVICE_LIST, SELECTED_TRANSLATION_COMPUTE_DEVICE, SELECTED_TRANSCRIPTION_COMPUTE_DEVICE - -設計上の契約: -- 全ての get/set は辞書形で status/result を返す Controller の呼び出しに合わせて変換される。 -- 外部から設定を変更した際は必要に応じて Model/Controller による再初期化処理を呼ぶ。 - -検討事項: -- 現状は設定変更が即時反映されるが、一部操作は再初期化(モデルロード、デバイス再取得)を要求するため Controller 側で連携している。 diff --git a/src-python/docs/modules/config_ref.md b/src-python/docs/modules/config_ref.md deleted file mode 100644 index 776a2f1a..00000000 --- a/src-python/docs/modules/config_ref.md +++ /dev/null @@ -1,39 +0,0 @@ -# config.py 変更参照ドキュメント - -このファイルは `config.py` に対して行った最近のリファクタリング / 安全化についての参照資料です。 - -目的: import 時の副作用を抑止し、`device_manager` などの外部モジュールがない環境でも安全に `config` をインポートできるようにすること。 - -主な変更点 - -- import-time の初期化保護 - - `Config.__new__` の中で `init_config()` / `load_config()` を呼び出しますが、これらを try/except で保護し、初期化に失敗しても例外を上位に伝播させずログ記録のみで処理を継続します。 - - このため、アプリ起動環境に必須ではない外部依存が欠けている場合でも、`import config` によるクラッシュを防止します。 - -- 外部モジュールの呼び出しをガード - - `device_manager`、翻訳/文字起こし関連のモデル一覧 (`whisper_models`, `ctranslate2_weights`) などは import 時に直接呼び出さず、存在チェック(try/except)を行って安全なデフォルト(空リストや "NoDevice" など)にフォールバックします。 - - これによりヘビーな依存(Windows 固有パッケージや大きな ML ライブラリ)がない CI 環境や軽量実行環境での import が安定します。 - -- エラーロギング - - 初期化やデフォルト取得に失敗した場合は、例外を握りつぶすのではなく `utils.errorLogging()` を経由してエラーメッセージを残します。これにより問題の診断が容易になります。 - -- 設定デフォルト値の扱い - - `getDefaultMicDevice()` / `getDefaultSpeakerDevice()` などを呼ぶ箇所は try/except で保護され、失敗時には `"NoHost"` / `"NoDevice"` 等の安全な文字列で代替されます。 - -利用上の注意 - -- 既存のコードは `config` をインポートしただけで `device_manager` を起動することを想定している箇所があるかもしれません。今回のリファクタリングでは "import 時に副作用を起こさない" ことを優先しているため、もし明示的な初期化を必要とする場合は、呼び出し側で `device_manager.init()` を明示的に行ってください。 - -- もし `config` のロードで致命的な設定エラーが発生した場合でも、アプリは継続動作しますが、ログを確認して手動で修復することが必要になる場合があります。 - -ドキュメントの提案差分 - -- 既存 `docs/modules/config.md` の "生成とライフサイクル" セクションに次の一文を追加することを推奨します: - - > 注意: `Config()` のインポートは副作用を起こさないよう保護されています。プラットフォーム依存のコンポーネント(例: `device_manager`)は明示的に初期化してください。 - -- `SELECTABLE_*` 系の説明に、起動環境に依存して空になる可能性があることを明示するパラグラフを追加してください(CI 環境や headless 環境では空になる)。 - ---- - -作業済み: このファイルはワークスペースに `docs/modules/config_ref.md` として作成済みです。既存 `docs/modules/config.md` は上書きしていません。上書き/マージの希望があれば続けます。 diff --git a/src-python/docs/modules/controller.md b/src-python/docs/modules/controller.md deleted file mode 100644 index ccbb6cfd..00000000 --- a/src-python/docs/modules/controller.md +++ /dev/null @@ -1,162 +0,0 @@ -## Controller クラス仕様書 - -概要 -- `Controller` はアプリケーションのコントロール層(Facade)で、`model` と `device_manager`、および外部 UI / mainloop とを仲介します。 -- UI からのコマンドを受け取り、`model` の開始/停止、設定の変更、ダウンロードの開始、各種フラグの切り替え、進捗通知(`run` コールバック経由)を行います。 -- 多くのメソッドは JSON 系の応答オブジェクトを返します: {"status": int, "result": Any}。副作用で `self.run(status, run_mapping[key], payload)` を呼び出して UI に通知します。 - -### mainloop のマルチワーカー化とカノニカルロックについて (2025-10-13) - -- `mainloop.Main` はデフォルトで複数(デフォルト 3)のハンドラワーカースレッドを動かすようになりました。これにより、モデルロードなどの重い操作で他のリクエストが待たされることが少なくなります。 -- `/set/enable/` と `/set/disable/` のように同一機能の ON/OFF を切り替えるエンドポイントは、内部的にカノニカルロックキー(例: `/lock/set/`)に正規化してロック取得されます。これにより、遅い disable の処理が後から来て最終状態を書き換えてしまうレースが防がれます。 -初期化とランタイムフック -- __init__() -> None - - フィールド: `init_mapping: dict`, `run_mapping: dict`, `run: Callable`, `device_access_status: bool` - - `setInitMapping(init_mapping: dict)` / `setRunMapping(run_mapping: dict)` / `setRun(run: Callable)` で mainloop からマッピング・コールバックを注入されることを想定。 - -コールバック通知用メソッド(UI への通知) -- connectedNetwork() / disconnectedNetwork() -> None -- enableAiModels() / disableAiModels() -> None -- updateMicHostList() / updateMicDeviceList() / updateSpeakerDeviceList() -> None -- updateConfigSettings() -> None - - これらは `self.run(status, run_mapping[key], payload)` を使って UI にイベントを送ります。 - -ダウンロード用ヘルパークラス -- class DownloadCTranslate2(run_mapping: dict, weight_type: str, run: Callable) - - progressBar(progress: float) -> None - - downloaded() -> None -- class DownloadWhisper(run_mapping: dict, weight_type: str, run: Callable) - - progressBar(progress: float) -> None - - downloaded() -> None - -音声・翻訳イベントハンドラ -- micMessage(result: dict) -> None - - 引数: result: {"text": str|False, "language": str} - - 挙動: ワードフィルタ、繰り返し検出、翻訳(`model.getInputTranslate`)、音声送信(OSC)・オーバーレイ更新・WebSocket ブロードキャスト等を行う。 - - エラー: 翻訳中に VRAM OOM が起きた場合は model.detectVRAMError を使って検出し、翻訳機能を無効化して UI に 400 を通知。 - -- speakerMessage(result: dict) -> None - - 引数: result: {"text": str|False, "language": str} - - micMessage と同様だが、受信(speaker)側のロジックやオーバーレイの扱いが異なる。 - -- chatMessage(data: dict) -> dict - - 引数: {"id": Any, "message": str} - - 戻り値: {"status": int, "result": {"id":..., "original":..., "translations":[...]}} - - 挙動詳細: - - 翻訳処理は `model.getInputTranslate` を呼び出します。翻訳処理中に VRAM 関連の例外が発生した場合、`model.detectVRAMError` によって検出し、翻訳機能を自動で無効化します。 - - VRAM エラー検出時は Controller は UI に対して 400 系の run イベントを発行する(例: `error_translation_chat_vram_overflow`, `enable_translation` で無効化通知)。 - - エラー発生時の戻り値: 翻訳を行わずに基本情報を含む 200 応答を返すコードパスがあり、クライアント側でのハンドリングを想定しています。 - -設定取得/変更系メソッド(代表例) -- getVersion() -> {"status":200, "result": config.VERSION} -- getComputeMode() / getComputeDeviceList() / getSelectedTranslationComputeDevice() -> dict -- setSelectedTranslationComputeDevice(device: str) -> {"status":200, "result": device} -- getSelectableCtranslate2WeightTypeDict() -> dict -- setEnableTranslation() / setDisableTranslation() -> dict - - setEnableTranslation はモデルロード時に VRAM エラーを検知するロジックを内包している。 - - 多くの setXXX / getXXX メソッドは config を直接操作して即時反映する。 - -自動デバイス選択 -- applyAutoMicSelect() / applyAutoSpeakerSelect() - - `device_manager` にコールバックを登録して自動選択を有効化する。 - -トランスクリプション制御(スレッドで実行) -- startTranscriptionSendMessage() / stopTranscriptionSendMessage() / startThreadingTranscriptionSendMessage() / stopThreadingTranscriptionSendMessage() -- startTranscriptionReceiveMessage() / stopTranscriptionReceiveMessage() / startThreadingTranscriptionReceiveMessage() / stopThreadingTranscriptionReceiveMessage() - - 実際の処理は `model.startMicTranscript` / `model.startSpeakerTranscript` に委譲される。VRAM エラーは検出して UI に通知し、自動的に停止する処理あり。 - -閾値・チェック系 -- startCheckMicEnergy() / stopCheckMicEnergy() / startThreadingCheckMicEnergy() / stopThreadingCheckMicEnergy() -- startCheckSpeakerEnergy() / stopCheckSpeakerEnergy() / startThreadingCheckSpeakerEnergy() / stopThreadingCheckSpeakerEnergy() - -ダウンロード開始(非同期/同期) -- downloadCtranslate2Weight(data: str, asynchronous: bool=True) -> dict -- downloadWhisperWeight(data: str, asynchronous: bool=True) -> dict - - 非同期なら別スレッドでダウンロードを行い progressBar コールバックを経由して UI に進捗を返す。 - -Watchdog / WebSocket / OSC 周り -- startWatchdog() / feedWatchdog() / stopWatchdog() -- getWebSocketHost() / setWebSocketHost(data) -> dict -- setEnableWebSocketServer() / setDisableWebSocketServer() -- setOscIpAddress(data) / setOscPort(data) - - ネットワーク周りの設定は検証ロジック(IP アドレス検証、サーバー利用可否のチェック)を含む。 - -ユーティリティ関数 -- messageFormatter(format_type: str, translation: list, message: str) -> str - - OSC に送る文面のフォーマットを生成(設定に基づく)。 -- replaceExclamationsWithRandom(text) -> (str, dict) -- restoreText(escaped_text, escape_dict) -> str -- removeExclamations(text) -> str - -重要な戻り値規約 -- 成功: {"status": 200, "result": ...} -- 失敗: {"status": 400, "result": {"message": str, "data": Any}} -- 多くのメソッドは UI への通知として `self.run(status, run_mapping[key], payload)` を行う。 - -エッジケース / エラー処理 -- VRAM OOM 検出: モデル例外が上がると model.detectVRAMError(e) を呼び出し、VRAM エラーが検出された場合は関連機能を自動で無効化して UI に 400 を通知する。 -- デバイスアクセスの競合: `device_access_status` による簡易ロックで、デバイス操作中は待機する。 -- ネットワーク依存: DeepL 等の外部翻訳 API 利用可否は `model.authenticationTranslatorDeepLAuthKey` で検査し、無効時は選択肢を更新する。 - -呼び出し例(Python から直接) -```python -from controller import Controller -ctrl = Controller() -# run コールバックの例: (status:int, event_name:str, payload:any) -def ui_run(status, event, payload): - print(status, event, payload) - -ctrl.setRun(ui_run) -resp = ctrl.setEnableTranslation() -print(resp) # {'status':200, 'result': True} - -data = {"id": 123, "message": "Hello"} -resp = ctrl.chatMessage(data) -print(resp) -``` - -シーケンス図(簡易: マイク入力 -> 翻訳 -> UI 通知) -```mermaid -sequenceDiagram - participant UI - participant Mainloop - participant Controller - participant Model - - UI->>Mainloop: ユーザ操作 (send message) - Mainloop->>Controller: chatMessage(data) - Controller->>Model: getInputTranslate(message) - Model-->>Controller: translation - Controller->>Model: oscSendMessage(...) - Controller->>UI: run(200, run_mapping['transcription_send_mic_message'], payload) -``` - -次の作業 -- `docs/api.md` を `mainloop.py` のマッピングに基づいて拡張し、各エンドポイントの request/response 例を追加してください。 - -参考: 実装詳細は `src-python/controller.py` を参照してください(メソッドごとに細かな条件分岐や run_mapping キーが存在します)。 -# controller.py — 詳細設計 - -目的: UI(または外部プロセス)からの操作を受け、`config` と `model` を操作して副作用を生じさせるコマンド層。 - -主要クラス/関数: -- class Controller - - 属性: - - init_mapping: アプリ起動時の読み出し用マッピング(/get/data/*) - - run_mapping: イベント通知先のエンドポイントマップ(run 関数で使用) - - run: run(status, endpoint, result) を格納 - - - 主要メソッド: - - setEnableTranslation / setDisableTranslation: 翻訳機能の切替(モデル切替や VRAM エラー回復処理を含む) - - start/stop transcription/energy checks: Model の startMicTranscript 等を呼ぶ - - downloadCtranslate2Weight / downloadWhisperWeight: ダウンロードを非同期で開始し進捗を run 経由で通知 - - micMessage / speakerMessage / chatMessage: 認識結果を受け、翻訳/OSC/Overlay/WebSocket/ログ記録を行う主要ハンドラ - - messageFormatter: OSC 用メッセージ整形 - - 多数の get/set 系関数: config の各種設定を読み書きし status/result を返す - -エラー/例外: -- VRAM 関連は特に注意し、検出時は該当機能を無効化してユーザーへ通知する。 - -API マッピング: -- `mainloop.py` の `mapping` と連携しており、多くの `/get/data/*` `/set/data/*` `/run/*` が Controller のメソッドにマッピングされる(詳細は docs/api.md を参照)。 - diff --git a/src-python/docs/modules/controller_ref.md b/src-python/docs/modules/controller_ref.md deleted file mode 100644 index 2f7cd570..00000000 --- a/src-python/docs/modules/controller_ref.md +++ /dev/null @@ -1,25 +0,0 @@ -## Controller リファクタリングノート (2025-10-09) - -概要: -このドキュメントは `controller.py` に適用した互換性修正と実装上の注意点をまとめた参照用メモです。既存の `controller.md` を直接上書きするのではなく、参照版として保存しています。 - -実施内容(要約): -- Model の lazy-init 対応に合わせ、`Controller.__init__()` 内で明示的に `model.init()` を呼び出す互換レイヤを追加しました。これにより、既存コードが import 時に model の属性へアクセスしていても安全に動作します。 -- オーバーレイの存在チェックを安全に行うため、`_is_overlay_available()` ヘルパを導入しました。以前に直接参照していた `model.overlay.initialized` をこのヘルパで置換しています(合計 5 箇所を置換)。 -- `micMessage` 内の翻訳周りで発生していたインデントの回帰を修正しました(try/except ブロックの整合性を回復)。 -- 未使用の `import copy` を削除しました。 -- ドキュメント編集は非破壊を原則とし、既存ファイルの安全な上書きが困難な場合は参照版(このファイル)を作成する方針を採りました。 - -互換性と注意点: -- Controller は起動時に model を初期化するため、多くの通常の利用ケースで変更の影響はありません。 -- ただし、外部のモジュールやテストコードが import 時に model の内部属性(例: `model.overlay` や `model.translator`)へ直接アクセスしている場合は、明示的に `model.init()` を呼ぶか、Controller を経由して初期化することを推奨します。 - -検証: -- 軽量なローカル検証を行い、`from controller import Controller; Controller()` の実行で初期化が成功することを確認しました。 - -今後の作業候補: -- 既存の `docs/modules/controller.md` とこの参照ドキュメントのマージ(必要であれば差分を反映して上書きを行う)。 -- linter/mypy を通して型安全性の追加と残存する静的解析の問題を解消する。 -- テスト: Controller の初期化・主要ハンドラ(micMessage/chatMessage)を対象にしたユニットテストを追加して、model.lazy-init による破壊的変更が再発しないことを保証する。 - -このファイルは自動生成ではなく、安全に変更履歴を残すための参照メモです。上書きを希望する場合はご指示ください。 diff --git a/src-python/docs/modules/device_manager.md b/src-python/docs/modules/device_manager.md deleted file mode 100644 index 316384c2..00000000 --- a/src-python/docs/modules/device_manager.md +++ /dev/null @@ -1,84 +0,0 @@ -# device_manager.py — デバイス検出と監視(overwrite) - -目的: システムのマイク/スピーカー(主に Windows の WASAPI)を列挙し、変更を監視してコールバックで通知する `DeviceManager` シングルトンを提供します。 - -主要コンポーネント: -- class Client(MMNotificationClient) - - オーディオデバイスのシステムイベント(追加/削除/デフォルト変更)を受け取り、監視ループの再起動をトリガーします。 - -- class DeviceManager - - シングルトンインスタンス: `device_manager` - - 主要プロパティ: - - `mic_devices` (dict): {host_name: [device_info, ...]} - - `default_mic_device` (dict): {'host': {...}, 'device': {...}} - - `speaker_devices` (list): [device_info, ...] - - `default_speaker_device` (dict) - - 各種 prev_/update_flag_: 差分検出用 - - callback 関連プロパティ: `callback_default_mic_device`, `callback_mic_device_list`, など多数 - - - 主要メソッド (抜粋): - - `update()` -> None: PyAudio を利用してホスト毎の入力デバイスとループバック(スピーカー)を列挙し内部状態を更新します。 - - `checkUpdate()` -> bool: 前回値との差分を計算して変更フラグを返します。 - - `monitoring()` -> None: pycaw/MMNotificationClient を使った長時間監視ループ。変化を検出すると各コールバックを呼び出す。 - - `startMonitoring()` / `stopMonitoring()` - - `getMicDevices()` / `getDefaultMicDevice()` / `getSpeakerDevices()` / `getDefaultSpeakerDevice()` - - `forceUpdateAndSetMicDevices()` / `forceUpdateAndSetSpeakerDevices()` - -コールバックAPI(例): -- `setCallbackMicDeviceList(callback)` — マイクデバイスリスト変更時に呼ばれる -- `setCallbackDefaultMicDevice(callback)` — デフォルトマイク変更時に呼ばれる -- `setCallbackProcessBeforeUpdateMicDevices(callback)` / `setCallbackProcessAfterUpdateMicDevices(callback)` — 更新前後のフック - -例: - -```python -from device_manager import device_manager - -def on_default_mic(host_name, device_name): - print('Default mic changed:', host_name, device_name) - -device_manager.setCallbackDefaultMicDevice(on_default_mic) -device_manager.forceUpdateAndSetMicDevices() -``` - -注意点: -- Windows 固有のモジュール(PyAudio paWASAPI, pycaw)に依存します。クロスプラットフォーム対応が必要な場合は別実装が必要です。 -- 監視スレッドは永続的に動作するため、アプリケーション終了時は `stopMonitoring()` を呼んで安全に停止してください。 - -変更点(2025-10-13): -- `DeviceManager` のシングルトン生成時(`__new__`)に軽量な `init()` を実行するようになりました。これによりモジュールのインポート順に依存せず、最小限の内部構造が常に確立されます(※監視スレッドは自動で起動しません)。 -- `init()` は監視スレッドを開始しませんが、PyAudio が利用可能な場合に限りベストエフォートで一度だけ `update()` を呼び、起動時に可能な限り実機デバイス情報を埋めるようになりました(例外は握り潰して安全性を維持)。 -- アクセサ (`getDefaultMicDevice()` / `getDefaultSpeakerDevice()` など) は遅延初期化を行い、呼び出し時に `init()` が動いていない場合は安全に初期化されるようになりました。これにより `controller` と `config` がトップレベルインポートで互いに依存している状況でも、`config` に正しいデバイス情報が入るようになります。 - -推奨起動シーケンス: -- `controller.init()` でコールバック登録が完了した直後に、`mainloop` の起動シーケンス中で明示的に `device_manager.startMonitoring()` を呼ぶことを推奨します。これにより以降のデバイス変更がコールバックを通じて確実に届きます。なお、`startMonitoring()` は任意で、軽量にしたい場合は呼ばなくても構いません(ただし動的変化は検出されません)。 - -ドキュメントにおける重要な注意: -- この変更は "import-time に重大な副作用を持たせない" という方針を維持しつつ、インポート順の違いによる初期化漏れを解消するために行われています。`init()` は監視スレッドを開始しないため、インポートだけでスレッドが走ることはありません。 - -## 詳細設計 - -目的: ローカルの入力(マイク)と出力(ループバックから抽出されたスピーカー)デバイスを列挙し、変更を監視してコールバックで通知する。Windows の WASAPI 等に依存。 - -主要クラス/関数: -- class Client(MMNotificationClient) - - Audio デバイスの変更イベントを受けると `loop = False` にして監視ループを再起動させる設計。 - -- class DeviceManager - - シングルトン: `device_manager = DeviceManager()` - - 主要属性: - - mic_devices: {host: [device_info...]} - - default_mic_device: {host, device} - - speaker_devices: [device_info...] - - default_speaker_device: {device} - - 各種 prev_*, update_flag_*: 差分検出のために保持 - - コールバック属性: callback_default_mic_device, callback_host_list など - - 主要メソッド: - - update(): PyAudio を使ってホストごとにデバイス列挙。Loopback デバイスを speaker_devices に集める。 - - monitoring(): MMNotificationClient と組み合わせてデバイスの変化を検出し、コールバックを発行 - - set/clear Callback 系: UI や Controller が登録して自動選択や再起動をトリガーできる - - forceUpdateAndSetMicDevices / forceUpdateAndSetSpeakerDevices: 即時更新とコールバック通知 - -注意点: -- Windows 固有の処理(paWASAPI, pycaw)に依存する。 -- デバイス取得はリソースに依存するので try/except で例外を吸収し errorLogging() を呼ぶ。 diff --git a/src-python/docs/modules/device_manager_ref.md b/src-python/docs/modules/device_manager_ref.md deleted file mode 100644 index 893bfb8b..00000000 --- a/src-python/docs/modules/device_manager_ref.md +++ /dev/null @@ -1,93 +0,0 @@ -# device_manager.py — デバイス検出と監視 (改訂版) - -### 概要 -`device_manager.py` はローカルのマイク(入力)とスピーカー(ループバックから抽出)を列挙し、デフォルトデバイスの変更やデバイスリストの変化を監視してコールバックで通知するユーティリティです。 - -設計上のポイント: -- Windows 固有の依存 (`comtypes`, `pyaudiowpatch` (PyAudio + WASAPI), `pycaw`) はオプショナルです。モジュールを import してもこれらが無ければ例外にならず、プレースホルダ値を返すようになっています。 -- モジュールの import 時点では監視は開始されません。リソースやスレッドの副作用を避けるため、`init()` と `startMonitoring()` は呼び出し側で明示的に実行してください。 - ---- - -### 使い方(簡単な流れ) - -1. モジュールをインポート - -```py -from device_manager import device_manager -``` - -2. 初期化(内部状態のセットアップ) - -```py -device_manager.init() -``` - -3. 監視の開始(バックグラウンドスレッド) - -```py -device_manager.startMonitoring() -``` - -4. 停止(アプリ終了時など) - -```py -device_manager.stopMonitoring() -``` - ---- - -### 主な API - -- `device_manager.init()` - - internal state の初期化。import 後に必ず呼ぶ必要はないが、実機デバイスを取得する前に呼ぶことを推奨します。 -- `device_manager.startMonitoring()` / `device_manager.stopMonitoring()` - - 監視の開始 / 停止。`startMonitoring()` はデーモンスレッドを作成します。`stopMonitoring()` は best-effort で join を試みます。 -- `device_manager.getMicDevices()` - - ホストごとにグループ化された入力デバイスの辞書を返します。例: `{ 'Realtek': [ {index: 2, name: 'Microphone (Realtek)'} ] }`。 -- `device_manager.getDefaultMicDevice()` / `device_manager.getSpeakerDevices()` / `device_manager.getDefaultSpeakerDevice()` - - デフォルトデバイスやスピーカーループバックの情報を返します。 -- `device_manager.forceUpdateAndSetMicDevices()` / `device_manager.forceUpdateAndSetSpeakerDevices()` - - 即時に update() を実行して対応するコールバックを呼びます。 - ---- - -### コールバック登録(例) - -コールバックは例外を内部で捕捉してログを出すため、コールバック実装側でもエラーハンドリングしてください。 - -- `setCallbackDefaultMicDevice(callback)` — デフォルト入力が変わったときに `callback(host_name, device_name)` が呼ばれます。 -- `setCallbackDefaultSpeakerDevice(callback)` — デフォルト出力が変わったときに `callback(device_name)` が呼ばれます。 -- `setCallbackHostList(callback)` / `setCallbackMicDeviceList(callback)` / `setCallbackSpeakerDeviceList(callback)` — それぞれ list 変更時に `callback()` が呼ばれます。 -- `setCallbackProcessBeforeUpdateMicDevices(callback)` / `setCallbackProcessAfterUpdateMicDevices(callback)` — 更新の前後に呼ばれるフックです。 - -簡単な例: - -```py -from device_manager import device_manager - -def on_default_mic(host, device): - print('default mic changed', host, device) - -device_manager.init() -device_manager.setCallbackDefaultMicDevice(on_default_mic) -device_manager.startMonitoring() - -# 後で停止 -# device_manager.stopMonitoring() -``` - ---- - -### 注意点 / トラブルシュート - -- Windows 固有の依存が無い場合、`getMicDevices()` などはデフォルトのプレースホルダ(`NoHost` / `NoDevice`)を返します。実機のデバイス検出や WASAPI によるループバック検出は Windows 環境でのみ保証されます。 -- `startMonitoring()` は監視用のデーモンスレッドを作るため、アプリケーションの終了時には `stopMonitoring()` を呼ぶかプロセスを終了してください。`stopMonitoring()` は join を行いますが、失敗した場合でも致命的にならないよう best-effort 実装です。 -- コールバック内部で例外が発生してもモジュール側で捕捉してログ出力します(`utils.errorLogging()`)。コールバック側で詳細なハンドリングやリトライが必要な場合は呼び出し側で行ってください。 - ---- - -### 実装メモ - -- `monitoring()` は可能なら Windows の COM (pycaw / MMNotificationClient) を使ってイベント駆動で待ち受け、失敗時や非Windows 環境では PyAudio を使ったポーリング(定期的な update()) にフォールバックします。 -- 外部ライブラリが原因の例外は内部で捕捉し、`errorLogging()` を呼んで記録する設計です。 diff --git a/src-python/docs/modules/mainloop.md b/src-python/docs/modules/mainloop.md deleted file mode 100644 index ba38c6a8..00000000 --- a/src-python/docs/modules/mainloop.md +++ /dev/null @@ -1,50 +0,0 @@ -## mainloop モジュール(src-python/mainloop.py) - -このドキュメントは `mainloop.py` の実装と、最近行ったリファクタの概要をまとめます。`mainloop` は標準入力から JSON を受け取り、`controller` のメソッドにルーティングして標準出力へ JSON で応答を返す小さなメインループです。 - -重要な変更点: -- 2025-10-09: `Main` クラスに `start()` / `stop()` を追加し、受信スレッドとハンドラスレッドのライフサイクル管理を明示化しました。`queue.get(timeout=...)` による安全なシャットダウンを可能にしています。 -- 2025-10-13: ハンドラの振る舞いを改善しました(マルチワーカー化とロック正規化): - - マルチワーカー化: ハンドラ処理はデフォルトで複数ワーカー(例: 3 本)で並列実行されます。これにより、1 つの重い処理が他のすべてのリクエストをブロックしてしまう問題を緩和します。 - - ロック正規化: `/set/enable/` と `/set/disable/` のような on/off ペアは同一のロックキーに正規化され、同一機能の on と off が同時に別スレッドで実行されることを防ぎます。これにより、遅い方の処理結果が後から上書きして最終状態が意図しないものになる不具合を防止します。 - -クラス: Main -- __init__(controller_instance: Controller, mapping_data: dict, worker_count: int = 3) -> None - - `controller_instance`: `Controller` のインスタンス。 - - `mapping_data`: `mainloop` 内で使用する `mapping`(エンドポイント -> ハンドラ情報)辞書。 - - `worker_count`: ハンドラワーカー数(デフォルト 3)。実行環境に応じて調整可能です。 -- start() -> None - - 内部で `startReceiver()` と `startHandler()` を呼び、受信とハンドラのスレッド群を起動します。 -- stop(wait: float = 2.0) -> None - - シャットダウンシグナルをセットし、スレッド終了を待ちます(デフォルト 2 秒)。 - -動作の重要ポイント -- キュー運用: 受信した JSON は内部キューに入れられ、ハンドラワーカーが順次取り出して処理します。`queue.get(timeout=...)` を使っているため CPU 負荷を抑えつつ安全に停止できます。 -- 同期応答設計: 各エンドポイントは基本的に呼び出し元に同期的に結果を返します(`handler` が戻り値としてステータスと結果を返す)。今回の変更でもこの設計は維持されています。 -- 同一機能直列化: `/set/enable/X` と `/set/disable/X` のような on/off ペアは内部で同一の "ロックキー" に正規化され、同時に両方が実行されることを防ぎます。これにより、enable と disable が競合して遅い方が勝つ問題が解消されます。 - -使い方(例): - -```python -from mainloop import Main, mapping, controller - -main_instance = Main(controller_instance=controller, mapping_data=mapping) -main_instance.start() - -# 実行中に別スレッドや外部シグナルで停止させる -main_instance.stop() -``` - -確認手順(変更の検証): -1. バックエンドを起動しておく。 -2. UI/テストスクリプトから `/set/enable/translation` と `/set/disable/translation` を高速に交互送信する(数十〜数百ミリ秒間隔で連打)。 -3. ログ(`printLog` 出力)を確認し、同一機能の複数実行が同時に走っていないこと、最終状態が遅い方に常に上書きされないことを確認する。 -4. 必要に応じて `worker_count` を増減して挙動を確認する(PC リソースに応じて 1〜6 程度を推奨)。 - -注意点と推奨事項: -- `worker_count` を増やすと他のエンドポイントの並列処理性は上がりますが、controller/model 側で共有リソース(GPU メモリやデバイスハンドルなど)への同時アクセスが許可されていない場合は、controller 側で機能単位のロック(例: translation_lock)を追加してください。 -- このドキュメントの変更は `mainloop` の外側から見える挙動(同期応答、ログ、ロックの方針)を説明するものです。controller 内の処理自体は引き続き同期的に実行されます。必要があれば、enable 系の重い処理を非同期化して完了通知をイベントで返す設計(UI 変更が必要)も検討してください。 - -変更履歴: -- 2025-10-09: start/stop ライフサイクル、タイムアウト付きキュー取得、エラー処理強化を追加。 -- 2025-10-13: マルチワーカー化(デフォルト 3)と enable/disable のロック正規化を実装。これにより同一機能の on/off の同時実行を防止し、UI からの高速トグルで最終状態が遅い方に上書きされる問題を修正しました。 diff --git a/src-python/docs/modules/model.md b/src-python/docs/modules/model.md deleted file mode 100644 index 391976e9..00000000 --- a/src-python/docs/modules/model.md +++ /dev/null @@ -1,118 +0,0 @@ -# model.py — クラスと主要メソッド -目的: アプリケーションの中核オーケストレータ。翻訳器 (Translator)、オーバーレイ、トランスクリプタ、OSC、WebSocket、Watchdog などのインスタンスを保持し、これらの起動/停止/操作を担います。`model` は `Model` のシングルトンインスタンスです。 - -主要クラスとシグネチャ: -- class threadFnc(Thread) - - __init__(self, fnc, end_fnc=None, daemon=True, *args, **kwargs) - - stop(self) -> None - - pause(self) -> None - - resume(self) -> None - -- class Model - - __new__(cls) -> Model - - init(self) -> None - - checkTranslatorCTranslate2ModelWeight(self, weight_type: str) -> bool - - changeTranslatorCTranslate2Model(self) -> None - - downloadCTranslate2ModelWeight(self, weight_type, callback=None, end_callback=None) -> Any - - isLoadedCTranslate2Model(self) -> bool - - getListLanguageAndCountry(self) -> list - - getTranslate(self, translator_name, source_language, target_language, target_country, message) -> tuple - - getInputTranslate(self, message, source_language=None) -> (list, list) - - getOutputTranslate(self, message, source_language=None) -> (list, list) - - startMicTranscript(self, fnc) -> None - - stopMicTranscript(self) -> None - - startSpeakerTranscript(self, fnc: Optional[Callable[[dict], None]] = None) -> None - - stopSpeakerTranscript(self) -> None - - startWebSocketServer(self, host, port) -> None - - stopWebSocketServer(self) -> None - - websocketSendMessage(self, message_dict: dict) -> bool - - 変更点(2025-10-09): - - - startCheckMicEnergy(self, fnc: Optional[Callable[[float], None]] = None) -> None - - 説明: 進捗/エネルギー表示用のコールバックを受け取ります。fnc が None の場合は内部で no-op を使い、呼び出し前に callable チェックを行います。これにより呼び出し側が None を渡しても安全になりました。 - - - startCheckSpeakerEnergy(self, fnc: Optional[Callable[[float], None]] = None) -> None - - 説明: 同上(fnc を Optional として受け取り、呼び出し時に callable を確認します)。内部では Queue を作成して録音データを受け取り、定期的にコールバックを呼びます。 - - - convertMessageToTransliteration(self, message: str, hiragana: bool = True, romaji: bool = True) -> list - - 説明: 以前は単一の文字列や別形を返す箇所がありましたが、現在は常にリスト(トークン単位の dict を要素とする list)を返します。hiragana/romaji の両方が False の場合は空リストを返します。 - - - createOverlayImageLargeLog(self, message_type: str, message: Optional[str], your_language: Optional[str], translation: list, target_language: Optional[dict] = None) -> object - - 説明: `target_language` は辞書形式で渡される場合があり、内部で言語リストに正規化されます(enabled な言語のみ抽出)。`message` / `your_language` は Optional となり、`None` を渡して翻訳のみのログを作ることが可能です。 - -使用例(簡易): - -```python -from model import model - -# 翻訳を呼び出す -translation, success = model.getTranslate('CTranslate2', 'Japanese', 'English', 'United States', 'こんにちは') -print(translation, success) - -# マイク文字起こしの開始(コールバックで結果を受け取る) -def on_mic_transcript(result): - print('mic transcript:', result) - -model.startMicTranscript(on_mic_transcript) - -# WebSocket サーバー起動 -model.startWebSocketServer('127.0.0.1', 2231) - -``` - -注意点: -- `Model` は多くの外部リソース(GPU、ファイル、ネットワーク)に依存するため、各操作は例外処理で保護されています。 -- 大きなモデルのロードで VRAM OOM を検出する `detectVRAMError` を備え、Controller 側でのフォールバック処理に使われます。 - -## 詳細設計 - -### 2025-10-09 のリファクタリング要約 - -- 遅延初期化 (lazy-init): `Model` のコンストラクタで重い初期化を行わず、`model.init()` を明示的に呼ぶか、各メソッド先頭で呼ばれる `ensure_initialized()` によって必要時に初期化する設計に変更しました。これによりインポート時の副作用(外部環境依存の初期化)が抑止されます。 - -- `threadFnc` の堅牢化: スレッドユーティリティは args/kwargs をインスタンスで保持し、内部で発生する例外を捕捉して `utils.errorLogging()` に委ねるようになりました。これによりバックグラウンドスレッドが例外で終了するリスクを減らしています。 - -- `device_manager` 呼び出しのガード: `getListMicHost()` / `getListMicDevice()` / `getMicDefaultDevice()` / `getListSpeakerDevice()` など、`device_manager` を参照する箇所は try/except で保護され、失敗時は安全なデフォルト(空リストや `"NoDevice"`)を返すようになりました。 - -- WebSocket/Overlay/Watchdog 等の起動系メソッドは `ensure_initialized()` を先頭に呼ぶようになり、遅延初期化の恩恵を受けるようになっています。 - -これらの変更は非破壊で既存の API を維持することを目的としていますが、起動フローで確実にリソースを確保したい場合はアプリ起動時に `model.init()` を呼ぶことを推奨します。 - - -目的: 各モデル(翻訳/転写/Overlay/Watchdog/OSC/WebSocket 等)のインスタンスを保持し、高レベルの操作を提供するファサード。 - -主要クラス/変数: -- class threadFnc(Thread) - - 説明: ループする関数をバックグラウンドで呼ぶヘルパ。pause/stop/end callback をサポート。 - -- class Model - - シングルトン: ファイル末で `model = Model()` として公開。 - - 主な属性: - - translator (Translator) - - overlay (Overlay) - - overlay_image (OverlayImage) - - mic_audio_queue, mic_audio_recorder, mic_transcriber - - speaker_audio_queue, speaker_audio_recorder, speaker_transcriber - - watchdog (Watchdog) - - osc_handler (OSCHandler) - - websocket_server (WebSocketServer) - - 主なメソッド: - - start/stop logger, overlay, watchdog - - startMicTranscript / stopMicTranscript: 録音、transcriber の起動とキュー処理 - - startSpeakerTranscript / stopSpeakerTranscript - - startCheckMicEnergy / stopCheckMicEnergy - - startCheckSpeakerEnergy / stopCheckSpeakerEnergy - - getTranslate / getInputTranslate / getOutputTranslate: Translator を利用する高レベル関数 - - createOverlayImage* / updateOverlay* : OverlayImage と Overlay を結合して VR 表示を作成 - - startWebSocketServer / stopWebSocketServer / websocketSendMessage - -エラー処理: -- 音声認識や翻訳で VRAM エラーが発生した場合、detectVRAMError() で特殊な例外内容を検査し、Controller 経由で翻訳機能を OFF にする処理がある。 - -非同期/リソース: -- Recorder/Transcriber/Overlay/Watchdog/WebSocket はそれぞれ別スレッドで動作する。Model はそれらの開始/停止を管理する。 - -依存: -- models/translation, models/transcription, models/overlay, models/osc, models/websocket - diff --git a/src-python/docs/modules/model_extra.md b/src-python/docs/modules/model_extra.md deleted file mode 100644 index 5d3af186..00000000 --- a/src-python/docs/modules/model_extra.md +++ /dev/null @@ -1,60 +0,0 @@ -# model.py — クラス一覧と使用例 - -以下は `model.py` で提供される主要クラスのシグネチャ概要と、簡単な呼び出し例です。 - -## クラス / 主要シグネチャ - -- class threadFnc(Thread) - - __init__(self, fnc: Callable, interval: float = 0.1, end_callback: Callable | None = None) - - start(self) -> None - - pause(self) -> None - - resume(self) -> None - - stop(self) -> None - -- class Model - - startLogger(self) -> None - - stopLogger(self) -> None - - startOverlay(self) -> None - - shutdownOverlay(self) -> None - - startMicTranscript(self, callback: Callable[[dict], None]) -> None - - stopMicTranscript(self) -> None - - startSpeakerTranscript(self, callback: Callable[[dict], None]) -> None - - stopSpeakerTranscript(self) -> None - - startCheckMicEnergy(self, progress_callback: Callable[[int], None]) -> None - - stopCheckMicEnergy(self) -> None - - startCheckSpeakerEnergy(self, progress_callback: Callable[[int], None]) -> None - - stopCheckSpeakerEnergy(self) -> None - - startWebSocketServer(self, host: str, port: int) -> None - - stopWebSocketServer(self) -> None - - websocketSendMessage(self, message: dict) -> None - - getListMicHost(self) -> dict - - getListMicDevice(self) -> list - - getListSpeakerDevice(self) -> list - - getInputTranslate(self, text: str, source_language: str = None) -> tuple[list[str], list[bool]] - - getOutputTranslate(self, text: str, source_language: str = None) -> tuple[list[str], list[bool]] - - detectVRAMError(self, exception: Exception) -> tuple[bool, str] - -## サンプル(呼び出し例) - -以下は Model の簡単な呼び出し例です。 - -```python -from model import model - -# マイク転写のコールバック例 -def on_mic_result(result: dict): - # result の想定形: {"text": str|False, "language": str} - text = result.get("text") - language = result.get("language") - print('mic:', text, language) - -# マイク転写を開始(別スレッドで動く) -model.startMicTranscript(on_mic_result) - -# 一度だけ翻訳を呼ぶ -translation, success = model.getInputTranslate('Hello', source_language='English') -print('translation:', translation, 'success:', success) - -# WebSocket 経由で外部クライアントへイベント送信 -model.websocketSendMessage({'type': 'INFO', 'message': 'VRCT ready'}) -``` diff --git a/src-python/docs/modules/osc.md b/src-python/docs/modules/osc.md deleted file mode 100644 index 1dc15e48..00000000 --- a/src-python/docs/modules/osc.md +++ /dev/null @@ -1,47 +0,0 @@ -## OSC モジュール (models.osc) - -このドキュメントは `models/osc/osc.py` の使い方と注意点を簡潔にまとめたものです。 - -### 概要 -- `OSCHandler` クラスは OSC メッセージの送信 (/chatbox/input, /chatbox/typing 等) と、 - ローカル環境では OSCQuery でエンドポイントを公開するための薄いラッパーを提供します。 - -### 依存関係 -- `python-osc` — UDP クライアント/サーバ -- `tinyoscquery` — OSCQuery を利用する場合に必要(オプショナル) - -### 使い方(例) - -```python -from models.osc.osc import OSCHandler - -handler = OSCHandler(ip_address="127.0.0.1", port=9000) -handler.setDictFilterAndTarget({ - "/chatbox/input": lambda addr, *args: print(args), -}) -handler.receiveOscParameters() -handler.sendTyping(True) -handler.sendMessage("Hello") -handler.oscServerStop() -``` - -### 注意点 -- `tinyoscquery` がインストールされていない場合、OSCQuery 関連機能は無効になりますが、送信(UDP クライアント)は動作します。 -- サービスのアドバタイズ中に例外が発生した場合、内部でリトライします。 -# models/osc — 詳細設計 - -目的: VRChat 等と OSC / OSCQuery 経由で値の取得やチャット送信を行う。 - -主要クラス/関数: -- class OSCHandler - - sendMessage(message: str, notification: bool=True): OSC で chatbox/input を送信 - - sendTyping(flag: bool): chatbox/typing を送信 - - receiveOscParameters(): OSCQuery を立て、指定したフィルタに対してローカルでサーバを実装してイベントを受ける - - getOSCParameterValue(address: str): OSCQuery を通じて現在値を問い合わせる(use tinyoscquery) - -注意点: -- `is_osc_query_enabled` が True のときに OSCQuery を使う(127.0.0.1 や localhost の場合に True) -- 受信ハンドラは dispatcher にマップしてコールバックを呼ぶ。 -- ネットワーク環境や OSCQuery の可否により動作が変わるため例外処理が多く入っている。 - -依存: python-osc, tinyoscquery diff --git a/src-python/docs/modules/overlay.md b/src-python/docs/modules/overlay.md deleted file mode 100644 index 648cbdcf..00000000 --- a/src-python/docs/modules/overlay.md +++ /dev/null @@ -1,59 +0,0 @@ -# overlay.py — OpenVR オーバーレイ管理 - -目的: OpenVR を使ったオーバーレイ表示(複数サイズ: small/large)を管理する `Overlay` クラスを提供します。 - -主要メソッド: -- __init__(self, settings_dict: dict) -- init(self) -> None -- startOverlay(self) -> None -- shutdownOverlay(self) -> None -- reStartOverlay(self) -> None -- updateImage(self, img: PIL.Image.Image, size: str) -> None -- updateOpacity(self, opacity: float, size: str, with_fade: bool = False) -> None -- updateUiScaling(self, ui_scaling: float, size: str) -> None -- updatePosition(self, x_pos, y_pos, z_pos, x_rotation, y_rotation, z_rotation, tracker, size) -> None -- mainloop(self) -> None # アニメーション / フェード評価ループ - -使用上の注意: -- OpenVR (SteamVR) が稼働していることが前提です。`checkSteamvrRunning()` で `vrmonitor.exe` の存在チェックを行います。 -- 例外が発生した場合は `errorLogging()` を呼んでスタックトレースを残します。 - -短い使用例: - -```py -from models.overlay.overlay_image import OverlayImage -from models.overlay.overlay import Overlay -from PIL import Image - -settings = { - "small": { - "x_pos": 0.0, "y_pos": 0.0, "z_pos": 0.0, - "x_rotation": 0.0, "y_rotation": 0.0, "z_rotation": 0.0, - "display_duration": 5, "fadeout_duration": 2, - "opacity": 1.0, "ui_scaling": 1.0, "tracker": "HMD" - } -} - -overlay_img = OverlayImage() -overlay = Overlay(settings) -overlay.startOverlay() - -# wait until initialized -while not overlay.initialized: - time.sleep(0.5) - -# push a simple blank image -overlay.updateImage(Image.new("RGBA", (256, 64), (255,255,255,255)), "small") -``` - -## モジュール構成(補足) - -- overlay.py — OpenVR を使ったオーバーレイ管理。Overlay クラスは複数サイズ(small/large)を扱い、位置/回転/透明度/フェードを制御する。 -- overlay_image.py — PIL を使ってオーバーレイに表示する画像を生成(テキストボックス、ログレイアウト、フォント管理)。 -- overlay_utils.py — 行列演算や座標変換ユーティリティ。 - -注意点: -- OpenVR(SteamVR)に依存。SteamVR が動作していることが前提。 -- フォントファイルは repo の fonts フォルダか、ランタイム内パスを探索して読み込む。 -- 生成画像は RGBA バイト列に変換され `overlay.setOverlayRaw` で渡される。 - diff --git a/src-python/docs/modules/overlay_image.md b/src-python/docs/modules/overlay_image.md deleted file mode 100644 index 04735623..00000000 --- a/src-python/docs/modules/overlay_image.md +++ /dev/null @@ -1,115 +0,0 @@ -# overlay_image.py — 画像生成ユーティリティ -目的: `models.overlay.overlay_image.OverlayImage` の実装に基づき、オーバーレイ用のテキストボックス/ログ画像を PIL (Pillow) で生成するための仕様書です。 - -このドキュメントは実装に合わせて書かれており、主要な公開メソッドの振る舞い、引数、返り値、例外、使用例、注意点を含みます。 - -概要 ------- -- 提供クラス: `OverlayImage` -- 役割: 文字列(元文/翻訳)やメッセージタイプ(send/receive) を受け取り、Small/Large 向けの RGBA PIL.Image を生成する。 -- 依存: Pillow (PIL)、フォントファイル群(`fonts/` ディレクトリまたは環境配下) - -主要機能 --------- -- テキストをラップして画像化する(行折り返しを含む) -- 複数テキストブロック(原文+複数の翻訳)を縦に連結して一つの画像にする -- 背景(角丸矩形)を合成して最終的な RGBA 画像を返す -- Small と Large で UI 設定(幅、高さ、フォントサイズ等)を切り替え -- フォント探索: 実行環境の `fonts/` 配下または相対パスからフォントを探し、見つからない場合は FileNotFoundError を投げる - -公開 API(要約) ------------------ -- class OverlayImage(root_path: str | None = None) - - コンストラクタ引数 - - root_path: フォント等のリソースのベースディレクトリ。None の場合は実装に合わせて repo の `fonts/` を候補パスとして探索する。 - -- OverlayImage.createOverlayImageSmallLog(message: str, your_language: str, translation: list | None = None, target_language: list | None = None) -> PIL.Image.Image - - 説明: Small ログ向け(横長・1行〜複数行)にテキストブロックを作成して結合し、角丸背景と合成して RGBA 画像を返す。 - - 引数 - - message: 表示する原文テキスト(None を許容しない想定) - - your_language: 原文の言語キー(フォントマッピングに使用) - - translation: 翻訳テキストのリスト(省略可) - - target_language: 翻訳それぞれに対応する言語キーのリスト(省略可) - - 戻り値: PIL.Image.Image (RGBA) - - 例外: フォントが見つからない場合は FileNotFoundError を投げる可能性あり - -- OverlayImage.createOverlayImageLargeLog(message_type: str, message: str | None = None, your_language: str | None = None, translation: list | None = None, target_language: list | None = None) -> PIL.Image.Image - - 説明: Large ログ(複数行 + ヘッダ(Send/Receive)や時刻)向けに、複数ブロックを作成して縦結合し、背景を合成して返す。 - - 引数 - - message_type: 'send' または 'receive'(UI 向けアンカー/色指定に使用) - - message: 表示する原文テキスト(None 可。この場合翻訳のみを表示することもある) - - your_language: 原文の言語キー(フォント選定に使用) - - translation: 翻訳テキストのリスト(省略可) - - target_language: 翻訳それぞれに対応する言語キーのリスト(省略可) - - 戻り値: PIL.Image.Image (RGBA) - - 例外: フォント未発見などで FileNotFoundError を投げる可能性あり - -内部で使われる補助メソッド(要旨) ---------------------------------- -- concatenateImagesVertically(img1, img2, margin=0) -> Image -- addImageMargin(image, top, right, bottom, left, color) -> Image -- createTextboxSmallLog(...) -> Image -- createTextImageLargeLog(...) -> Image -- createTextboxLargeLog(...) -> Image -- getUiSizeSmallLog(), getUiColorSmallLog(), getUiSizeLargeLog(), getUiColorLargeLog() - -フォントとローカライズ ------------------------ -- 実装は `LANGUAGES` マッピングを持ち、言語キーからフォントファイル名を決定します(例: "Japanese" -> "NotoSansJP-Regular.ttf")。 -- フォントは `root_path` を基準に探索します。実行環境によりフォントファイルの場所が異なるため、実装は複数パスを順に試します。フォントが見つからない場合は FileNotFoundError を発生させる設計です。 - -描画と折り返しロジック(実装に基づく注意点) --------------------------------------------- -- テキスト幅を計算し、基準幅に収まるように文字数ベースで分割して折り返す単純なロジックを採用しています。厳密な単語単位折り返しではなく、文字数ベースの分割になります。 -- Small/Large でフォントサイズや余白、角丸半径などを分けており、複数行のテキストブロックを縦結合することで最終画像を作ります。 - -使用例 ------- -Small ログ画像を作る例: - -```python -from models.overlay.overlay_image import OverlayImage - -overlay = OverlayImage() -img = overlay.createOverlayImageSmallLog( - message='こんにちは、世界!', - your_language='Japanese', - translation=['Hello, world!'], - target_language=['English'] -) -img.save('overlay_small.png') -``` - -Large ログ(複数メッセージ履歴)を作る例: - -```python -from models.overlay.overlay_image import OverlayImage -from datetime import datetime - -overlay = OverlayImage() -img = overlay.createOverlayImageLargeLog( - message_type='send', - message='Hello from VRCT', - your_language='English', - translation=['こんにちは'], - target_language=['Japanese'] -) -img.save('overlay_large.png') -``` - -実装上の注意と推奨事項 ------------------------ -- 実行環境にフォントが存在することを確認してください(`fonts/` に主要フォントを置くのが簡単です)。 -- Pillow (PIL) のバージョンに依存する描画 API を使っています。Pillow は v8〜最新程度で問題ありません。 -- 長いテキストの折り返しは単純な文字幅分割ロジックです。より自然な折り返し(単語単位・ルビ考慮等)が必要なら実装拡張を推奨します。 -- 生成画像は RGBA(透過)です。Overlay 側の API(`overlay.setOverlayRaw` 相当)へ渡して使う前提です。 - -復元メモ --------- -このファイルは実装ファイル `models/overlay/overlay_image.py` を参照して復元しました。実装を変更した場合は本ドキュメントも同期して更新してください。 - -関連ファイル -------------- -- 実装: `models/overlay/overlay_image.py` -- ヘルパ: `models/overlay/overlay_utils.py` -- フォント: `fonts/` ディレクトリ diff --git a/src-python/docs/modules/transcription.md b/src-python/docs/modules/transcription.md deleted file mode 100644 index a8f5abc9..00000000 --- a/src-python/docs/modules/transcription.md +++ /dev/null @@ -1,126 +0,0 @@ -## 文字起こしモジュール (models.transcription) - -このドキュメントでは `models/transcription` に関する設計・セットアップ・使用例・テスト方針・トラブルシュートをまとめます。 - -### 概要 -- `models/transcription` は音声入力をテキストに変換する機能を提供します。主に: - - `transcription_recorder.py` — マイクやスピーカからの音声取得ラッパー - - `transcription_transcriber.py` — 音声バッファを認識エンジンに渡して文字起こしを行うロジック - - `transcription_whisper.py` — faster-whisper(WhisperModel)周りのダウンロード/ロード補助 - - `transcription_languages.py` — 各言語・国別のエンジン別コードマップ - -### 最近の変更点 -- 各モジュールに型注釈と docstring を追加しました。これによりメンテナンス性が向上します。 -- `transcription_whisper.py` にダウンロード進捗コールバックを明記した実装を追加しました。 - -### 依存関係 -主要な依存: -- `speech_recognition` — オーディオ録音と Google 音声認識のラッパー -- `pyaudiowpatch` — クロスプラットフォームのオーディオ設定 -- `pydub` — 音声のチャンネル変換や処理 -- `faster_whisper`(オプショナル)— ローカルで Whisper を使う場合 -- `huggingface_hub`(オプショナル)— モデルアーティファクトのダウンロード - -注意: `pydub` は `ffmpeg` が必要です。環境に ffmpeg が無いとワーニングが出ます。 - -推奨インストール(任意): - -```powershell -pip install speechrecognition pyaudiowpatch pydub faster-whisper huggingface-hub -``` - -テストでは多くの外部依存をモックするため、全てをインストールする必要はありません。 - -### 初回セットアップ -1. 必要に応じて `ffmpeg` をインストールしてください(pydub の動作に必要)。 -2. Whisper ローカルモデルを使う場合、`transcription_whisper.downloadWhisperWeight(root, weight_type, callback, end_callback)` を呼んでモデルを取得します。 - - `callback(progress: float)` は 0.0〜1.0 の進捗通知です。 - - 例: - -```python -from models.transcription import transcription_whisper as tw -tw.downloadWhisperWeight("./", "tiny", callback=lambda p: print(f"{p*100:.1f}%"), end_callback=lambda: print("done")) -``` - -### API 使用例 -簡単な `AudioTranscriber` の使い方: - -```python -from models.transcription.transcription_transcriber import AudioTranscriber - -# source はライブラリが提供するオーディオソースオブジェクト -tr = AudioTranscriber(speaker=False, source=source, phrase_timeout=3, max_phrases=10, transcription_engine="Google") -# audio_queue は録音スレッドがプッシュするキュー -tr.transcribeAudioQueue(audio_queue, languages=["English"], countries=["United States"]) -``` - -戻り値やエラー処理のルールについては各関数の docstring を参照してください。 - -### テスト方針 -- `AudioTranscriber` と `Whisper` ラッパーはユニットテストでモック化して検証します。 -- 推奨: `pytest` と `unittest.mock` を使い、以下のケースをカバーします: - - 正常系: Google/Whisper の成功パス(モックで期待テキストを返す) - - エッジ: 無音、低確信、複数言語 - - フォールバック: Whisper が利用不可の場合のフォールバック動作 - -### トラブルシュート -- ffmpeg が見つからない: `pydub` がワーニングを出します。OS に合わせて ffmpeg をインストールしてください。 -- Whisper のロード時に VRAM エラー: `getWhisperModel` は VRAM 不足を検出して `ValueError("VRAM_OUT_OF_MEMORY", message)` を投げます。デバイス設定や compute_type を調整してください。 -- ハッシュ不一致やダウンロード失敗: キャッシュや weights ディレクトリを削除して再ダウンロードしてください。 - -### 変更履歴 -- 2025-10-09: 型注釈と docstring を追加、ダウンロード/コールバック仕様を明記。 - ---- -このドキュメントは簡潔な参照用です。さらに詳細な実行手順(ログ収集方法、ffmpeg のインストール手順例など)が必要であれば追記します。 -# transcription — 文字起こしモジュール -概要: マイク/スピーカー音声の録音と Whisper/Google などのエンジンを使った文字起こしを提供するモジュール群です。主なクラスは録音用の Recorder と `AudioTranscriber` です。 - -主要クラス/シグネチャ: -- SelectedMicEnergyAndAudioRecorder(device, energy_threshold, dynamic_energy_threshold, phrase_time_limit) -- SelectedSpeakerEnergyAndAudioRecorder(...) -- SelectedMicEnergyRecorder(device) -- SelectedSpeakerEnergyRecorder(device) -- AudioTranscriber(speaker: bool, source, phrase_timeout: int, max_phrases: int, transcription_engine: str, root: str, whisper_weight_type: str, device: str, device_index: int, compute_type: str) - - transcribeAudioQueue(queue, languages:list, countries:list, avg_logprob: float, no_speech_prob: float) -> bool - - getTranscript() -> dict - -使用例: - -```python -from models.transcription.transcription_recorder import SelectedMicEnergyAndAudioRecorder -from models.transcription.transcription_transcriber import AudioTranscriber - -# 録音 -rec = SelectedMicEnergyAndAudioRecorder(device, energy_threshold=300, dynamic_energy_threshold=False, phrase_time_limit=3) -queue = Queue() -rec.recordIntoQueue(queue, None) - -# 文字起こし -transcriber = AudioTranscriber(speaker=False, source=rec.source, phrase_timeout=3, max_phrases=10, transcription_engine='Google', root='.', whisper_weight_type='base', device='cpu', device_index=0, compute_type='auto') -transcriber.transcribeAudioQueue(queue, ['Japanese'], ['Japan'], -0.8, 0.6) -print(transcriber.getTranscript()) -``` - -注意点: -- Whisper のモデルロードは VRAM を消費します。`Model.detectVRAMError` のような検知と回復策が必要です。 -- 録音は OS のデバイス依存のため `device_manager` でのデバイス取得と組み合わせて利用してください。 - -# models/transcription — 詳細設計 - -構成ファイル: -- transcription_recorder.py — 各デバイス向け Recorder クラス群(Base, SelectedMic*, SelectedSpeaker*)。speech_recognition をラップし、Audio / Energy をキューへ出す。 -- transcription_transcriber.py — AudioTranscriber: Google Speech API または faster-whisper を使った音声→テキスト変換の実行ロジック。複数言語に対する最良候補選択と confidence に基づく選出。 -- transcription_whisper.py — Whisper(faster-whisper)重みのダウンロードとモデル生成のユーティリティ。 - -主要契約: -- Recorder は recordIntoQueue(audio_queue, energy_queue) を提供し、バックグラウンドで音声データをキューに流す。 -- AudioTranscriber.transcribeAudioQueue(audio_queue, languages, countries, avg_logprob, no_speech_prob) -> bool - - audio_queue から音声を取り出し認識を試みる。結果は getTranscript() で取得する。常に True/False を返して呼び出し側がループ継続を制御。 - -VRAM エラー対策: -- Whisper のモデルロードで GPU メモリ不足が発生すると、ValueError("VRAM_OUT_OF_MEMORY", message) を投げる実装。Controller で捕捉して機能停止/通知する。 - -外部依存: -- speech_recognition, faster_whisper, pydub, numpy, torch - diff --git a/src-python/docs/modules/translation.md b/src-python/docs/modules/translation.md deleted file mode 100644 index a391da23..00000000 --- a/src-python/docs/modules/translation.md +++ /dev/null @@ -1,113 +0,0 @@ -## 翻訳モジュール (models.translation) - -このドキュメントは `models/translation` 配下に対して行った最近の変更点、セットアップ手順、API の使い方、テスト方針、トラブルシュートをまとめたものです。 - -### 概要 -- モジュールの責務: テキストの翻訳を行う高レベルの `Translator` クラス、言語コードのマッピング、CTranslate2 用の重み・トークナイザのダウンロード/検証ユーティリティを提供します。 -- 変更点の狙い: 型注釈と docstring を追加し、`translation_utils.py` のダウンロード/検証ロジックをシンプルで堅牢な実装へ置換しました。これにより初回セットアップの手順が明確になります。 - -### 主な変更点(サマリ) -- `translation_translator.py`: 型注釈、docstring を追記。外部依存は存在するが、例外が発生してもモジュールが壊れないように保護されています。 -- `translation_languages.py`: 言語コードマッピングの説明を追加。 -- `translation_utils.py`: 重みファイルの検証(SHA-256 ハッシュ照合)、zip 展開、`transformers.AutoTokenizer` を使ったトークナイザ取得、ダウンロード進捗用のコールバックを備えた実装へ置換。 - -### インストール(依存関係) -必須ではないものが含まれます。開発・最小稼働に必要なパッケージはプロジェクト全体の要件に従ってください。 - -主に使うパッケージ: -- `requests` — ダウンロード処理 -- `transformers` — トークナイザ取得(AutoTokenizer) -- `ctranslate2` — CTranslate2 を使う場合(ランタイムのみ、テストではモック推奨) - -推奨インストール例(任意): - -```powershell -pip install requests transformers ctranslate2 -``` - -DeepL や `translators` といった外部 API ラッパーはオプショナルです。CI やローカルテストではモックして動作確認してください。 - -### 初回セットアップ / 重みの準備 -`translation_utils.py` に含まれるユーティリティ関数: - -- `checkCTranslate2Weight(root: str, weight_type: str = "small") -> bool` - - 指定した `root/weights/ctranslate2/` 以下に必要なファイルが存在し、既知のハッシュと一致するかをチェックします。 - -- `downloadCTranslate2Weight(root: str, weight_type: str = "small", callback: Optional[Callable[[float], None]] = None, end_callback: Optional[Callable[[], None]] = None) -> None` - - 重みを ZIP 形式でダウンロードして展開します。 - - `callback(progress: float)` は 0.0〜1.0 の進捗通知に使えます。 - - `end_callback()` は処理完了時に呼び出されます。 - -- `downloadCTranslate2Tokenizer(path: str, weight_type: str = "small") -> None` - - `transformers.AutoTokenizer.from_pretrained` を利用してトークナイザをダウンロード/キャッシュします(`cache_dir` に保存)。 - -呼び出し例(簡単): - -```python -from models.translation import translation_utils as tu - -# ルートディレクトリ(プロジェクトルートなど) -root = "." -if not tu.checkCTranslate2Weight(root, "small"): - tu.downloadCTranslate2Weight(root, "small", callback=lambda p: print(f"{p*100:.1f}%")) - tu.downloadCTranslate2Tokenizer(root, "small") -``` - -注意: 大きなモデル(`large`)はダウンロードに時間とディスク容量を要します。 - -### API 使用例 (`Translator` の簡易例) - -以下は `Translator` の想定されるシンプルな使い方です(実装は `translation_translator.py` を参照してください)。 - -```python -from models.translation.translation_translator import Translator - -tr = Translator() -result = tr.translate("Hello", src_lang="en", target_lang="ja") -if result: - print(result) -else: - print("翻訳に失敗しました") -``` - -戻り値とエラー: 既存のコードベースとの互換性を重視し、失敗時は False を返すケースがあります。API 呼び出し前に戻り値の型を確認してください。 - -### テスト方針 -- 外部サービス(DeepL、web 翻訳ラッパー、ctranslate2、transformers)はユニットテストでモックします。 -- 推奨: `pytest` と `unittest.mock` を使い、`Translator.translate` の成功パス・失敗パスを検証するテストを追加してください。 - -簡単なテスト設計: -- 正常系: ctranslate2 経由の翻訳が正しく呼ばれる(モックで期待レスポンスを返す) -- フォールバック系: ctranslate2 が利用できない場合に別の翻訳経路を辿る(モック) - -### トラブルシュート -- `ModuleNotFoundError` (例: `sudachidict_full`) — transliteration/別モジュールで必要な辞書が無い場合。該当パッケージのインストールか、当該機能を無効にしてください。 -- ハッシュ不一致 — ダウンロード済みファイルの破損が疑われます。該当ファイルを削除して再ダウンロードしてください。 -- `transformers` のトークナイザが取得できない場合、ネットワークやキャッシュ先の権限を確認してください。 - -### 変更履歴 -- 2025-10-09: 型注釈と docstring の追加、`translation_utils.py` を再実装してダウンロード/検証ロジックを整理。 - ---- -このドキュメントは簡潔な参照用です。必要なら実行例やさらに詳細なトラブルシュート手順(コマンド出力例、ログの取り方など)を追加します。 -# models/translation — 詳細設計 - -構成ファイル: -- translation_translator.py — Translator クラス。DeepL/API、Google、Bing、Papago、CTranslate2 を統一インターフェースで扱う。 -- translation_utils.py — 重みファイルのダウンロード・検証ロジック(CTranslate2 用)。 -- translation_languages.py — 各エンジンの対応言語マップ。 - -Translator の契約: -- translate(translator_name, source_language, target_language, target_country, message) -> str|False - - 成功時は文字列、失敗または一時的エラーは False を返す。 -- changeCTranslate2Model(path, model_type, device, device_index, compute_type) - - CTranslate2 の Translator オブジェクトと Tokenizer を初期化する。 - -フォールバック: -- Controller/Model 層で翻訳が失敗した場合に CTranslate2 にフォールバックする実装がある。 - -外部依存: -- ctranslate2, transformers, deepl(オプション)、translators(任意) - -安全性: -- 翻訳 API キー(DeepL)は Translator.authenticationDeepLAuthKey で検証して保持。 diff --git a/src-python/docs/modules/transliteration.md b/src-python/docs/modules/transliteration.md deleted file mode 100644 index 577d055d..00000000 --- a/src-python/docs/modules/transliteration.md +++ /dev/null @@ -1,17 +0,0 @@ -# models/transliteration — 詳細設計 - -目的: 日本語テキストの仮名読みを解析し、ひらがな/ローマ字(Hepburn)に変換する。 - -主要クラス/関数: -- class Transliterator - - analyze(text: str, use_macron: bool=False) -> List[dict] - - 入力: テキスト - - 出力: トークンのリスト。各要素は { orig, kana, hira, hepburn } - - split_kanji_okurigana(surface, reading_kana): 漢字+送り仮名を分割して kana を割り当てるロジックを持つ(詳細設計あり) - -実装上のポイント: -- SudachiPy を使い形態素解析して読みを得る。 -- Katakana を Hiragana に変換し、katakana_to_hepburn モジュールでローマ字化を行う。 -- 文脈ルールを `transliteration_context_rules.apply_context_rules` で適用できる設計(ルールエンジン)。 - -依存: sudachipy diff --git a/src-python/docs/modules/utils.md b/src-python/docs/modules/utils.md deleted file mode 100644 index 9b7d4e71..00000000 --- a/src-python/docs/modules/utils.md +++ /dev/null @@ -1,132 +0,0 @@ -## utils モジュール(src-python/utils.py) - -このドキュメントは `src-python/utils.py` に対する最近のリファクタ内容、公開 API、利用上の注意点、テスト方法をまとめたものです。 - -### 概要 -- `utils.py` はプロジェクト全体で使われる汎用ユーティリティ群を提供します。主な内容: - - ネットワーク接続チェック (`isConnectedNetwork`) - - ソケットの空きポート確認 (`isAvailableWebSocketServer`) - - IP アドレス検証 (`isValidIpAddress`) - - 計算デバイス一覧取得 (`getComputeDeviceList` / `getBestComputeType`) - - Base64 デコード (JSON) (`encodeBase64`) - - ロガー設定/ログ出力ヘルパー (`setupLogger`, `printLog`, `printResponse`, `errorLogging`) - -### 今回のリファクタ(要点) -- Optional 依存へのフォールバック: `torch` と `ctranslate2` が存在しない環境でも動作するよう、import をガードし、安全なデフォルトを返す実装にしました。 -- 型注釈と docstring を追加して可読性を向上させました。 -- ログ設定の重複ハンドラ追加を防ぐチェックを導入しました。 -- `encodeBase64` はデコード失敗時に例外を投げず空辞書を返すように(安全側)変更しました。 -- `getComputeDeviceList` は GPU 情報取得で失敗しても CPU 情報を返すように例外保護を行いました。 - -### 重要な利用上の注意(breaking/behavior changes) -- Optional 依存 - - `torch` が無い環境では GPU 情報は取得できません(`getComputeDeviceList` は CPU エントリのみ返します)。 - - `ctranslate2` の `get_supported_compute_types` が無い場合は空リストを返します。 - → 環境に依存する挙動を想定して、呼び出し側は存在チェックやフォールバックを実装してください。 - -- `encodeBase64` の挙動 - - 不正な base64/JSON を入力した場合、例外を投げず `{}` を返します。既存コードが例外を期待している場合は注意してください。 - -- `isAvailableWebSocketServer` の仕様 - - 指定した host:port に対して bind が成功すれば True を返します(「使用中かどうか」を判定する用途と逆の意味合いになることがあるため注意)。 - -- ロギング - - `setupLogger` は同じログファイルに対するハンドラを重複して追加しません。`errorLogging()` はログ書き込みに失敗した場合でも最後に trace を stdout に出力するフォールバックがあります。 - -### API 使い方(短い例) - -```python -from utils import getComputeDeviceList, encodeBase64, printResponse - -devices = getComputeDeviceList() -print(devices) - -obj = encodeBase64('eyAia2V5IjogInZhbHVlIiB9') # -> {'key': 'value'} - -printResponse(200, '/health', {'status': 'ok'}) -``` - -### テスト方針 -- optional 依存の違いを扱うため、ユニットテストは `torch` と `ctranslate2` をモックして行うことを推奨します。 -- 例: `getComputeDeviceList()` は GPU がない環境でも CPU のエントリを返すことを確認するテスト。 - -### トラブルシュート -- ログファイルの書き込みエラー: 権限やディスク容量を確認してください。`error.log` と `process.log` の存在と権限をチェックします。 -- `getComputeDeviceList()` が空しか返さない場合、`torch` または `ctranslate2` のインストールを確認してください。 - -### 変更履歴 -- 2025-10-09: 型注釈・docstring 追加、optional import ガード、ロギング堅牢化。 -# utils.py — 関数一覧と使用例 -目的: 共通ユーティリティ(ログ、JSON 出力、ネットワーク/ポート検査、デバイス/計算タイプ列挙、バリデーション等)を提供します。 - -主要関数とシグネチャ: -- validateDictStructure(data: dict, structure: dict) -> bool -- isConnectedNetwork(url: str = "http://www.google.com", timeout: int = 3) -> bool -- isAvailableWebSocketServer(host: str, port: int) -> bool -- isValidIpAddress(ip_address: str) -> bool -- getComputeDeviceList() -> dict -- getBestComputeType(device: str, device_index: int) -> str -- encodeBase64(data: str) -> dict -- removeLog() -> None -- setupLogger(name, log_file, level=logging.INFO) -> logging.Logger -- printLog(log: str, data: Any = None) -> None -- printResponse(status: int, endpoint: str, result: Any = None) -> None -- errorLogging() -> None - -使用例: - -```python -from utils import printResponse, getComputeDeviceList, validateDictStructure - -# JSON 形式で mainloop に応答を返す -printResponse(200, '/get/data/version', {'version': '3.2.2'}) - -# 利用可能な計算デバイス一覧を取得 -devices = getComputeDeviceList() -print(devices) - -# 辞書構造のバリデーション -data = {'a': 1, 'b': {'c': 'x'}} -structure = {'a': int, 'b': {'c': str}} -ok = validateDictStructure(data, structure) -print('valid:', ok) -``` - -注意点: -- `printResponse` は stdout に JSON を出力しつつログファイルにも書き込みます。大きなオブジェクトは json.dumps で失敗する可能性があるため、例外処理が含まれています。 - -# utils.py — 詳細設計 - -目的: 小さなユーティリティ関数群。ロギング、ネットワーク検査、型検証、計算デバイス列挙など。 - -主要関数/変数: -- validateDictStructure(data: dict, structure: dict) -> bool - - 説明: 辞書が期待される構造(キーセットと値の型/入れ子)に完全一致するか検証する。 - - 入力: data(検証対象), structure(期待構造: 値が型または入れ子 dict) - - 出力: bool - - 例外: 型不一致や欠落時は False を返す(例外は投げない)。 - -- isConnectedNetwork(url="http://www.google.com", timeout=3) -> bool - - 説明: 指定 URL に HTTP GET して接続可否を判定。requests を使用。 - -- isAvailableWebSocketServer(host: str, port: int) -> bool - - 説明: 指定ポートへ bind できるかを試し、使用中かを判別する(True=利用可能)。 - -- isValidIpAddress(ip_address: str) -> bool - - 説明: ipaddress.ip_address で検証。 - -- getComputeDeviceList() -> dict - - 説明: CPU と CUDA(利用可能なら)を列挙し、各デバイスでサポートされる compute types を取得する。 - - 依存: torch, ctranslate2.get_supported_compute_types - -- getBestComputeType(device: str, device_index: int) -> str - - 説明: デバイス名に基づき優先 compute_type を選び、利用可能なものを返す。デフォルトは "float32"。 - -- setupLogger(name, log_file, level=logging.INFO) -> Logger - - 説明: RotatingFileHandler を使って UTF-8 ログを作る。10MB ローテーション。 - -- printLog / printResponse / errorLogging - - 説明: mainloop と通信するために標準出力へ JSON を flush するユーティリティ。内部で file ログへも書く。 - -注意点: -- ネットワーク検査やファイル生成で例外が発生した場合、errorLogging() を呼んでトレースを error.log に保存する。 diff --git a/src-python/docs/modules/watchdog.md b/src-python/docs/modules/watchdog.md deleted file mode 100644 index 92a05669..00000000 --- a/src-python/docs/modules/watchdog.md +++ /dev/null @@ -1,80 +0,0 @@ -# models/watchdog — 詳細設計 - -目的: 外部(Process 管理側)へ定期的に "生存" を知らせるために使う軽量ウォッチドッグ。 - -設計: -- class Watchdog(timeout: int = 60, interval: int = 20) - - feed(): 最終フィード時刻を更新 - - setCallback(callback): タイムアウト時に呼ぶコールバックを登録(zero-arg を想定) - - start(): 単一チェックを行い、`interval` 秒の sleep を行う(継続監視は呼び出し側でループまたはスレッド化) - -注意: -- 現行実装は非常にシンプルで、長時間のブロッキングやスレッド運用の見直しが必要になり得る。 - -変更点(実装に入れた改善): -- コールバック属性を初期化しておくことで AttributeError を防止 -- コールバック呼び出し内の例外はウォッチドッグ本体に影響を与えないよう try/except で保護 -- メソッドに型注釈と docstring を追加 - -短い使用例(ポーリング方式): - -```py -import time -from models.watchdog.watchdog import Watchdog - -def on_timeout(): - print('watchdog timed out') - -wd = Watchdog(timeout=5, interval=1) -wd.setCallback(on_timeout) - -# 別スレッドにせず、単純なループでポーリングする例 -while True: - wd.start() # ここで timeout をチェックし、必要なら callback を呼ぶ - # アプリケーションの他処理... - time.sleep(0.5) - - # 正常時に feed を呼ぶ例 - # wd.feed() -``` - -使用例(スレッド化ヘルパを用意するアプローチ): - -```py -import time -from threading import Thread, Event -from models.watchdog.watchdog import Watchdog - -stop_event = Event() - -def run_watchdog(wd: Watchdog, stop_event: Event): - # シンプルなバックグラウンド実行ループ(安全な停止用フラグ付き) - while not stop_event.is_set(): - wd.start() - -wd = Watchdog(timeout=10, interval=1) -wd.setCallback(lambda: print('timed out')) -thread = Thread(target=run_watchdog, args=(wd, stop_event), daemon=True) -thread.start() - -# 正常動作時 -wd.feed() -time.sleep(2) - -# 停止する場合は stop_event.set() を呼ぶ -stop_event.set() -thread.join() -``` - -拡張案(将来の改善): -- `start_in_thread()` / `stop()` を Watchdog に組み込む(内部で Thread と Event を管理して安全に停止できるようにする) -- コールバックに引数を渡せるようにする(context 情報、呼び出し回数など) -- asyncio と相互運用できるバージョン(async/await ベース)を用意する -- ロギング統合(標準 logging を使って状態変化を記録) -- 単発(one-shot)/繰り返しの動作モード指定 - -簡易テスト済み: -- 基本的なコールバックの有効/無効挙動をローカルで確認済み(feed 後は呼ばれず、タイムアウト状態で呼ばれる)。 - -注意事項: -- フル自動化(CI での運用)を行う場合は、スレッド起動・停止のテストを追加することを推奨します。 diff --git a/src-python/docs/modules/websocket.md b/src-python/docs/modules/websocket.md deleted file mode 100644 index 936d61de..00000000 --- a/src-python/docs/modules/websocket.md +++ /dev/null @@ -1,18 +0,0 @@ -# models/websocket — 詳細設計 - -目的: 外部クライアント(例えば第三者のアプリ)へ翻訳済みテキストやイベントをブロードキャストする軽量 WebSocket サーバー。 - -API: -- class WebSocketServer(host='127.0.0.1', port=8765) - - start(): 別スレッドで asyncio ループを生成しサーバを起動。 - - stop(): サーバ停止、全クライアント切断。 - - set_message_handler(handler): クライアントからのメッセージ受信時のコールバックを登録。handler(server, websocket, message) - - send(message): 非同期キューに積んで全クライアントへ送信(スレッドセーフ)。 - - broadcast(message): asyncio を経由して即時ブロードキャスト。 - -実装上の工夫: -- サーバ本体は別スレッドで asyncio イベントループを run_forever している。 -- 送信用に内部キュー `_send_queue` を持ち、_send_loop で順次送信する。これにより GUI 等から安全に send() を呼べる。 - -依存: websockets(asyncio) - diff --git a/src-python/docs/run_events_payloads.md b/src-python/docs/run_events_payloads.md deleted file mode 100644 index a30f7b8d..00000000 --- a/src-python/docs/run_events_payloads.md +++ /dev/null @@ -1,125 +0,0 @@ -# Run events payloads - -このファイルは `controller.py` 内で `self.run(status, run_mapping["key"], payload)` として発行される全ての run イベントの鍵と、実際に渡されるペイロードの具体例を列挙します。 - ---- - -## 抽出済み run イベント一覧(正規化済み) - -以下は controller.py の self.run 呼び出しを解析して抽出した run イベントです。名称は `mainloop.py` の `run_mapping` に合わせて正規化しています。 - -- connected_network (200) - - payload: true | false - -- enable_ai_models (200) - - payload: true | false - -- mic_host_list (200) - - payload: list[str] - -- mic_device_list (200) - - payload: list[str] - -- speaker_device_list (200) - - payload: list[str] - -- initialization_complete (200) - - payload: dict mapping endpoint -> current value (constructed from init_mapping) - -- selected_mic_device (200) - - payload: {"host": , "device": } - -- selected_speaker_device (200) - - payload: string (device name) - -- error_device (400) - - payload: {"message": , "data": null} - -- check_mic_volume (200) - - payload: numeric energy value (float) - -- check_speaker_volume (200) - - payload: numeric energy value (float) - -- download_progress_ctranslate2_weight (200) - - payload: {"weight_type": , "progress": } - -- downloaded_ctranslate2_weight (200) - - payload: - -- error_ctranslate2_weight (400) - - payload: {"message":"CTranslate2 weight download error","data": null} - -- download_progress_whisper_weight (200) - - payload: {"weight_type": , "progress": } - -- downloaded_whisper_weight (200) - - payload: - -- error_whisper_weight (400) - - payload: {"message":"Whisper weight download error","data": null} - -- word_filter (200) - - payload: {"message": "Detected by word filter: "} - -- error_translation_engine (400) - - payload: {"message":"Translation engine limit error","data": null} - -- error_translation_mic_vram_overflow (400) - - payload: {"message":"VRAM out of memory during translation of mic","data": } - -- error_translation_speaker_vram_overflow (400) - - payload: {"message":"VRAM out of memory during translation of speaker","data": } - -- error_translation_chat_vram_overflow (400) - - payload: {"message":"VRAM out of memory during translation of chat","data": } - -- enable_translation (400 or 200) - - payload example on OOM: {"message":"Translation disabled due to VRAM overflow","data": false} - -- transcription_send_mic_message (200) - - payload: { - "original": {"message": , "transliteration": }, - "translations": [ {"message": , "transliteration": }, ... ] - } - -- transcription_receive_speaker_message (200) - - payload: same shape as transcription_send_mic_message - -- software_update_info (200) - - payload: dict (e.g. {"has_update": true, "latest_version": "3.3.0"}) - -- selected_translation_compute_type (200) - - payload: string e.g. "auto" | "cpu" | "cuda:0" - -- selected_transcription_compute_type (200) - - payload: string - -- selected_translation_compute_device (200) - - payload: device descriptor (object) — `config.SELECTED_TRANSLATION_COMPUTE_DEVICE` の現在値。 - -- selected_translation_engines (200) - - payload: config.SELECTED_TRANSLATION_ENGINES (list/dict per tab) - -- translation_engines (200) - - payload: list of selectable engines (e.g. ["CTranslate2"]) - -- initialization_progress (200) - - payload: integer stage (used values in code: 1..4) - -- enable_osc_query (200) - - payload: {"data": true|false, "disabled_functions": [...]} - -- enable_transcription_receive (200) - - payload: boolean (true when transcription receive enabled) - -- error_transcription_mic_vram_overflow (400) - - payload: {"message":"VRAM out of memory during mic transcription","data": } - -- error_transcription_speaker_vram_overflow (400) - - payload: {"message":"VRAM out of memory during speaker transcription","data": } - ---- - -注: 上記は controller.py の self.run 呼び出しを解析して作成した "実際に送られる" ペイロード例です。UI 側はこれらの形を期待してコーディングしてください。状況によっては model 層からの戻り値の具象型が変化するため、実装では型チェック/存在チェックを行ってください。 - diff --git a/src-python/docs/runtime.md b/src-python/docs/runtime.md deleted file mode 100644 index f633e9f4..00000000 --- a/src-python/docs/runtime.md +++ /dev/null @@ -1,43 +0,0 @@ -# 実行手順と依存関係 - -対象 OS: Windows を想定(device_manager は WASAPI / pycaw を使う)。 - -必須依存(概略): -- Python 3.10+ 推奨 -- pip パッケージ: - - torch - - ctranslate2 - - transformers - - requests - - pyaudiowpatch - - pycaw - - speech_recognition - - pydub - - websockets - - python-osc - - tinyoscquery - - sudachipy - - pillow - - flashtext - - faster_whisper (オプション: Whisper をローカルで使う場合) - - deepl / translators(外部翻訳を使う場合) - -実行手順 (開発環境): -1. 仮想環境を作成し有効化 -2. 必要パッケージをインストール - - requirements.txt を用意する場合はそこからインストール -3. `src-python` をワークディレクトリにして `python mainloop.py` を実行 - -注意点: -- Whisper / CTranslate2 の重みは初回にダウンロードする必要がある。Controller の downloadCtranslate2Weight / downloadWhisperWeight エンドポイントからトリガできる。 -- OpenVR (SteamVR) を使う Overlay は SteamVR が動作している環境でのみ動作。 -- Windows 固有: device_manager が pyaudiowpatch と pycaw に依存。Linux/Mac での互換性は保証されない。 - -ログ: -- process.log (標準動作ログ) -- error.log (トレースバック) -- models 用のロガーは `model.startLogger()` により PATH_LOGS 配下に日付付きファイルを作成する。 - -デバッグ: -- `utils.printLog` と `utils.printResponse` が stdout に JSON を出すため、GUI 側はそれをパースして UI 更新を行う。 -- WebSocket を有効にすると外部クライアントに JSON をブロードキャストできる。 diff --git a/src-python/docs/utils.md b/src-python/docs/utils.md new file mode 100644 index 00000000..1abb554b --- /dev/null +++ b/src-python/docs/utils.md @@ -0,0 +1,940 @@ +# utils.py ドキュメント + +## 概要 +`utils.py` は VRCT アプリケーション全体で使用される汎用ユーティリティ関数とロギング機能を提供するモジュール。辞書構造の検証、ネットワーク接続確認、計算デバイス管理、Base64エンコーディング、構造化ログ出力など、複数のサブシステムで共有される基盤機能を集約している。 + +## 主要機能 +- 辞書構造の厳密な検証 +- ネットワーク接続状態の診断 +- WebSocketサーバーのアドレス可用性チェック +- IPアドレスのバリデーション +- CUDA/CPU計算デバイスの検出と最適化 +- 構造化ログ出力(process.log, error.log) +- Base64エンコード/デコード +- ログファイルのローテーション管理 + +## アーキテクチャ上の位置づけ + +``` +┌─────────────────┐ +│ All Modules │ (controller, model, device_manager, etc.) +└────────┬────────┘ + │ Import +┌────────▼────────┐ +│ utils.py │ ◄── このファイル +└─────────────────┘ + │ +┌────────▼────────┐ +│ External Deps │ (torch, ctranslate2, requests, ipaddress) +└─────────────────┘ +``` + +全てのモジュールから参照される共通基盤として機能し、循環参照を避けるため他の内部モジュールへの依存を持たない。 + +## 依存関係 + +### 標準ライブラリ +```python +import base64 +import json +import traceback +import logging +from logging.handlers import RotatingFileHandler +from typing import Any, List, Dict, Optional +``` + +### サードパーティライブラリ(オプション依存) +```python +import torch # GPU検出用(インポート失敗時はNoneにフォールバック) +from ctranslate2 import get_supported_compute_types # 計算タイプ取得用 +import requests # ネットワーク接続確認用 +import ipaddress # IPアドレス検証用 +import socket # WebSocketサーバー可用性チェック用 +``` + +**セーフガードインポート:** +```python +try: + import torch +except Exception: + torch = None # type: ignore + +try: + from ctranslate2 import get_supported_compute_types +except Exception: + def get_supported_compute_types(device: str, device_index: int) -> List[str]: + return [] +``` + +オプション依存が満たされない環境でもモジュールは正常にロード可能。 + +## 関数リファレンス + +### 1. 辞書構造検証 + +#### `validateDictStructure(data: dict, structure: dict) -> bool` + +**責務:** 辞書の構造と型が期待される仕様と完全に一致するかを検証 + +**アルゴリズム:** +1. 両方が辞書型であることを確認 +2. キーの数と名前が完全一致するかチェック +3. 各キーの値について: + - 期待値が辞書の場合: 再帰的に検証(多重入れ子対応) + - 期待値が型オブジェクトの場合: `isinstance()` で型チェック + +**引数:** +- `data` (dict): 検証対象の辞書 +- `structure` (dict): 期待される構造定義 + - 値には型(str, int, bool等)または入れ子の辞書を指定 + +**返り値:** +- `True`: 構造が完全に一致 +- `False`: 不一致(キー不足、余分なキー、型不一致等) + +**使用例:** +```python +# 単純な構造検証 +data = {"name": "Alice", "age": 30} +structure = {"name": str, "age": int} +assert validateDictStructure(data, structure) is True + +# 入れ子構造の検証 +data = { + "user": { + "id": 123, + "profile": {"name": "Bob", "active": True} + } +} +structure = { + "user": { + "id": int, + "profile": {"name": str, "active": bool} + } +} +assert validateDictStructure(data, structure) is True + +# 不一致の検出 +data = {"name": "Alice", "extra_key": "value"} +structure = {"name": str, "age": int} +assert validateDictStructure(data, structure) is False # キーが不一致 +``` + +**使用場面:** +- フロントエンドからのリクエストペイロード検証 +- 設定ファイルのスキーマ検証 +- API レスポンスの構造確認 + +--- + +### 2. ネットワーク診断 + +#### `isConnectedNetwork(url: str = "http://www.google.com", timeout: int = 3) -> bool` + +**責務:** インターネット接続の可用性を高速チェック + +**処理:** +1. 指定URLに HTTP GET リクエストを送信 +2. `timeout` 秒以内に 200 OK レスポンスを受信したら接続あり +3. タイムアウトまたは例外発生時は接続なし + +**引数:** +- `url` (str): 接続確認先URL(デフォルト: Google) +- `timeout` (int): タイムアウト時間(秒) + +**返り値:** +- `True`: ネットワーク接続あり +- `False`: ネットワーク接続なし + +**使用例:** +```python +if isConnectedNetwork(): + # モデルウェイトをダウンロード + downloadModelWeights() +else: + # オフラインモードで動作 + useLocalModels() +``` + +**注意事項:** +- ファイアウォールやプロキシ環境では正しく動作しない場合がある +- 初期化時の1回のみチェックを推奨(頻繁な呼び出しは避ける) + +--- + +#### `isAvailableWebSocketServer(host: str, port: int) -> bool` + +**責務:** 指定したホスト/ポートでWebSocketサーバーが起動可能かを確認 + +**処理:** +1. TCP ソケットを作成 +2. `SO_REUSEADDR` オプションを設定 +3. `bind()` を試行 +4. 成功 → アドレス利用可能、失敗 → アドレス使用中 + +**引数:** +- `host` (str): バインドするIPアドレス +- `port` (int): バインドするポート番号 + +**返り値:** +- `True`: アドレスが利用可能 +- `False`: アドレスが使用中 + +**使用例:** +```python +if isAvailableWebSocketServer("127.0.0.1", 8080): + startWebSocketServer("127.0.0.1", 8080) +else: + print("Port 8080 is already in use") +``` + +**注意事項:** +- `SO_REUSEADDR` により、TIME_WAIT 状態のアドレスも利用可能と判定される +- 管理者権限が必要なポート(1024未満)では失敗する場合がある + +--- + +#### `isValidIpAddress(ip_address: str) -> bool` + +**責務:** IPv4/IPv6アドレスの妥当性を検証 + +**処理:** +- `ipaddress.ip_address()` でパース +- 成功 → 有効なIPアドレス、失敗 → 無効 + +**引数:** +- `ip_address` (str): 検証対象のIPアドレス文字列 + +**返り値:** +- `True`: 有効なIPアドレス +- `False`: 無効なIPアドレス + +**使用例:** +```python +assert isValidIpAddress("127.0.0.1") is True +assert isValidIpAddress("2001:db8::1") is True +assert isValidIpAddress("invalid") is False +``` + +**サポート形式:** +- IPv4: "192.168.1.1", "127.0.0.1" +- IPv6: "2001:db8::1", "fe80::1" + +--- + +### 3. 計算デバイス管理 + +#### `getComputeDeviceList() -> List[Dict[str, Any]]` + +**責務:** 利用可能な計算デバイス(CPU/GPU)とサポートされる計算タイプを列挙 + +**返り値構造:** +```python +[ + { + "device": "cpu", + "device_index": 0, + "device_name": "cpu", + "compute_types": ["auto", "float32", "int8", ...] + }, + { + "device": "cuda", + "device_index": 0, + "device_name": "NVIDIA GeForce RTX 3090", + "compute_types": ["auto", "int8_bfloat16", "int8_float16", ...] + }, + ... +] +``` + +**処理フロー:** +1. CPU デバイスを常に追加(最低限の計算環境を保証) +2. PyTorch と CUDA が利用可能な場合: + - 全GPUデバイスを列挙 + - 各GPUの計算タイプを `get_supported_compute_types()` で取得 + - GPU アーキテクチャに応じて計算タイプを制限: + - **GTX シリーズ**: `int8_bfloat16`, `bfloat16`, `float16`, `int8` を除外 + - **RTX, Tesla, A100, Quadro**: 全計算タイプをサポート + - **その他**: `float32` のみ + +**GPU別の計算タイプ制限:** +```python +if "GTX" in gpu_device_name: + unsupported_types = {"int8_bfloat16", "bfloat16", "float16", "int8"} + gpu_compute_types = [t for t in gpu_compute_types if t not in unsupported_types] +elif not any(keyword in gpu_device_name for keyword in ["RTX", "Tesla", "A100", "Quadro"]): + gpu_compute_types = ["float32"] +``` + +**使用例:** +```python +devices = getComputeDeviceList() +for device in devices: + print(f"{device['device_name']}: {', '.join(device['compute_types'])}") + +# 出力例: +# cpu: auto, float32, int8 +# NVIDIA GeForce RTX 3090: auto, int8_bfloat16, int8_float16, int8, bfloat16, float16, int8_float32, float32 +``` + +**エラーハンドリング:** +- GPU検出中の例外は `errorLogging()` でログ記録し、CPU デバイスのみ返却 + +--- + +#### `getBestComputeType(device: str, device_index: int) -> str` + +**責務:** デバイスアーキテクチャに最適な計算タイプを自動選択 + +**優先順位:** +```python +preferred_types = { + "default": [ + "int8_bfloat16", # 最も効率的(対応GPUのみ) + "int8_float16", # 2番目に効率的 + "int8", # 整数演算高速化 + "bfloat16", # 混合精度 + "float16", # 半精度浮動小数点 + "int8_float32", # 互換性重視 + "float32" # フォールバック + ], + "GTX": ["float32"], # GTXシリーズは制限あり + "RTX": ["int8_bfloat16", "int8_float16", ...], + "Tesla": [...], + "A100": [...], + "Quadro": [...] +} +``` + +**処理フロー:** +1. `get_supported_compute_types()` で利用可能な計算タイプを取得 +2. デバイス名に基づいて優先リストを選択 +3. 優先順に計算タイプをチェックし、最初に利用可能なものを返却 +4. 全て利用不可の場合は `"float32"` を返却(安全なフォールバック) + +**引数:** +- `device` (str): "cpu" または "cuda" +- `device_index` (int): GPUデバイスのインデックス(CPUの場合は0) + +**返り値:** +- 最適な計算タイプ文字列(例: "int8_bfloat16", "float32") + +**使用例:** +```python +best_type = getBestComputeType("cuda", 0) +model.load_model(compute_type=best_type) +``` + +**計算タイプの特性:** + +| 計算タイプ | メモリ使用量 | 速度 | 精度 | 対応GPU | +|----------|------------|------|------|--------| +| int8_bfloat16 | 最小 | 最速 | 高 | RTX 30xx以降 | +| int8_float16 | 最小 | 最速 | 高 | RTX 20xx以降 | +| int8 | 小 | 高速 | 中 | 多くのGPU | +| bfloat16 | 中 | 高速 | 高 | RTX 30xx以降 | +| float16 | 中 | 高速 | 高 | RTX 20xx以降 | +| float32 | 大 | 標準 | 最高 | 全GPU/CPU | + +--- + +### 4. エンコーディング + +#### `encodeBase64(data: str) -> Dict[str, Any]` + +**責務:** Base64エンコードされたJSON文字列をデコードしてパース + +**処理:** +1. Base64デコード +2. UTF-8文字列に変換 +3. JSON パース +4. 失敗時は空の辞書を返却 + +**引数:** +- `data` (str): Base64エンコードされたJSON文字列 + +**返り値:** +- パース成功: JSON オブジェクト +- パース失敗: `{}`(空の辞書) + +**使用例:** +```python +# エンコード例(参考) +import base64 +import json +payload = {"message": "Hello", "id": 123} +encoded = base64.b64encode(json.dumps(payload).encode('utf-8')).decode('utf-8') + +# デコード +decoded = encodeBase64(encoded) +assert decoded == {"message": "Hello", "id": 123} +``` + +**エラーハンドリング:** +- 不正なBase64文字列 +- 不正なJSON形式 +- 文字エンコーディングエラー + +全て `errorLogging()` でログ記録し、空の辞書を返却。 + +**注意事項:** +- 関数名が `encodeBase64` だが、実際には**デコード**を行う(命名の歴史的経緯) +- セキュリティ: Base64は暗号化ではないため、機密情報の保護には使用しない + +--- + +### 5. ロギング + +#### `removeLog() -> None` + +**責務:** プロセスログファイル(process.log)を初期化 + +**処理:** +- `process.log` を空の内容で上書き +- ファイルが存在しない場合は新規作成 + +**使用例:** +```python +# アプリケーション起動時にログをクリア +removeLog() +printLog("Application started") +``` + +**エラーハンドリング:** +- ファイル書き込み失敗時は `errorLogging()` でエラーログに記録 + +--- + +#### `setupLogger(name: str, log_file: str, level: int = logging.INFO) -> logging.Logger` + +**責務:** ローテーション機能付きロガーインスタンスを生成 + +**設定:** +- **最大ログサイズ**: 10MB +- **バックアップ数**: 1(最大2ファイル) +- **ローテーション動作**: 10MB到達時に `.1` バックアップを作成し、新規ログを開始 +- **エンコーディング**: UTF-8 +- **遅延書き込み**: `delay=True`(最初の書き込み時にファイルを開く) + +**引数:** +- `name` (str): ロガー名(例: "process", "error") +- `log_file` (str): ログファイルパス +- `level` (int): ログレベル(デフォルト: `logging.INFO`) + +**返り値:** +- 設定済み `logging.Logger` インスタンス + +**ログフォーマット:** +``` +%(asctime)s - %(name)s - %(levelname)s - %(message)s +``` + +**出力例:** +``` +2025-10-13 14:30:45,123 - process - INFO - Application started +2025-10-13 14:30:46,456 - error - ERROR - Connection failed +``` + +**重複ハンドラー防止:** +```python +if not any(isinstance(h, RotatingFileHandler) and getattr(h, 'baseFilename', None) == getattr(file_handler, 'baseFilename', None) for h in logger.handlers): + logger.addHandler(file_handler) +``` + +同じファイルへの重複ハンドラー追加を防止し、複数回呼び出されても安全。 + +--- + +#### `printLog(log: str, data: Any = None) -> None` + +**責務:** 構造化プロセスログの出力 + +**出力先:** +1. `process.log` ファイル +2. 標準出力(JSON形式) + +**出力形式:** +```python +{ + "status": 348, # プロセスログ専用ステータス + "log": "User action performed", + "data": "additional context" +} +``` + +**引数:** +- `log` (str): ログメッセージ +- `data` (Any): 追加のコンテキスト情報(オプション) + +**使用例:** +```python +printLog("Model loading started", {"model_type": "whisper", "weight": "medium"}) +# 出力(stdout): +# {"status": 348, "log": "Model loading started", "data": "{'model_type': 'whisper', 'weight': 'medium'}"} +``` + +**実装の詳細:** +```python +global process_logger +if process_logger is None: + process_logger = setupLogger("process", "process.log", logging.INFO) + +response = { + "status": 348, + "log": log, + "data": str(data), +} +process_logger.info(response) +serialized = json.dumps(response) +print(serialized, flush=True) +``` + +**注意事項:** +- `data` は `str()` で文字列化されるため、複雑なオブジェクトは読みにくくなる可能性がある +- `flush=True` により即座に出力(バッファリングを無効化) + +--- + +#### `printResponse(status: int, endpoint: str, result: Any = None) -> None` + +**責務:** 構造化APIレスポンスの出力 + +**出力先:** +1. `process.log` ファイル +2. 標準出力(JSON形式) + +**出力形式:** +```python +{ + "status": 200, + "endpoint": "/get/config/version", + "result": {"version": "3.3.0"} +} +``` + +**引数:** +- `status` (int): HTTPステータスコード風のステータス番号 +- `endpoint` (str): エンドポイント識別子 +- `result` (Any): レスポンスペイロード(オプション) + +**使用例:** +```python +printResponse(200, "/set/config/language", {"language": "ja"}) +printResponse(400, "/set/config/threshold", {"error": "Value out of range"}) +``` + +**JSONシリアライズエラーハンドリング:** +```python +try: + serialized_response = json.dumps(response) +except Exception as e: + errorLogging() # 完全なトレースバックをログ + process_logger.error(f"Problematic response object: {response}") + process_logger.error(f"Exception during json.dumps: {e}") + # フォールバックエラーペイロード + error_json = json.dumps({ + "status": 500, + "endpoint": endpoint, + "result": {"error": "Failed to serialize response", "details": str(e)}, + }) + print(error_json, flush=True) +else: + print(serialized_response, flush=True) +``` + +**シリアライズ不可能なオブジェクトの例:** +- `datetime` オブジェクト +- カスタムクラスインスタンス +- 循環参照を持つ辞書 + +**対策:** +- `result` を構築する際に JSON シリアライズ可能な型のみ使用 +- 必要に応じて `str()` や専用のシリアライザーで変換 + +--- + +#### `errorLogging() -> None` + +**責務:** 現在の例外トレースバックをエラーログに記録 + +**処理:** +1. `error.log` ファイルにトレースバックを出力 +2. ロガー初期化失敗時は標準出力にフォールバック + +**使用例:** +```python +try: + risky_operation() +except Exception: + errorLogging() # トレースバックをerror.logに記録 + # 必要に応じて追加処理 +``` + +**出力例(error.log):** +``` +2025-10-13 14:35:12,789 - error - ERROR - Traceback (most recent call last): + File "model.py", line 123, in loadModel + model.load() + File "ctranslate2/model.py", line 456, in load + raise RuntimeError("CUDA out of memory") +RuntimeError: CUDA out of memory +``` + +**注意事項:** +- **例外コンテキスト内でのみ呼び出し可能**(`traceback.format_exc()` を使用) +- 例外をキャッチせずに呼び出すと空のトレースバックが記録される + +**ベストプラクティス:** +```python +try: + dangerous_function() +except SpecificException as e: + errorLogging() # 詳細をログ + # ユーザーフレンドリーなエラー処理 + printResponse(400, endpoint, {"error": "Operation failed"}) +except Exception: + errorLogging() # 予期しないエラーもログ + raise # 上位へ伝播 +``` + +--- + +## グローバル変数 + +### `process_logger: Optional[logging.Logger] = None` +プロセスログ用のグローバルロガーインスタンス。初回 `printLog()` または `printResponse()` 呼び出し時に初期化される。 + +### `error_logger: Optional[logging.Logger] = None` +エラーログ用のグローバルロガーインスタンス。初回 `errorLogging()` 呼び出し時に初期化される。 + +**遅延初期化の理由:** +- モジュールインポート時のオーバーヘッド削減 +- ファイルシステムへの不要なアクセスを回避 + +--- + +## エラーハンドリング戦略 + +### 1. 防御的プログラミング +全てのユーティリティ関数は例外を内部で処理し、呼び出し元に例外を伝播しない: + +```python +def isConnectedNetwork(url="http://www.google.com", timeout=3) -> bool: + try: + response = requests.get(url, timeout=timeout) + return response.status_code == 200 + except requests.RequestException: + return False # 例外をキャッチして安全な値を返却 +``` + +### 2. フォールバック値 +- `encodeBase64()`: パース失敗時は `{}` +- `getComputeDeviceList()`: GPU検出失敗時はCPUのみ +- `getBestComputeType()`: 全て失敗時は `"float32"` + +### 3. ログ記録 +全てのエラーは `errorLogging()` でトレースバックを記録し、デバッグを容易にする。 + +--- + +## パフォーマンス考慮事項 + +### 1. ネットワーク接続チェック +`isConnectedNetwork()` はブロッキング操作(最大3秒)のため、起動時の1回のみ実行を推奨: + +```python +# 良い例 +if isConnectedNetwork(): + downloadModels() + +# 悪い例(UI フリーズの原因) +while True: + if isConnectedNetwork(): # 毎回3秒待機 + processData() +``` + +### 2. ログローテーション +10MB のログファイルローテーションにより、ディスク容量を制御(最大20MB)。 + +### 3. グローバルロガーの遅延初期化 +ロガーは初回使用時に初期化されるため、インポート時のオーバーヘッドを最小化。 + +--- + +## 使用パターン + +### パターン1: ネットワーク依存機能の初期化 +```python +def initialize_online_features(): + if not isConnectedNetwork(): + printLog("Offline mode: skipping model download") + return + + printLog("Online mode: downloading models") + downloadModels() +``` + +### パターン2: デバイス自動選択 +```python +devices = getComputeDeviceList() +if len(devices) > 1: + # GPU利用可能 + best_device = devices[1] # 最初のGPU + best_type = getBestComputeType(best_device["device"], best_device["device_index"]) + printLog(f"Using GPU: {best_device['device_name']}", {"compute_type": best_type}) +else: + # CPUのみ + printLog("No GPU detected, using CPU") + best_type = "float32" +``` + +### パターン3: 構造化リクエスト検証 +```python +def handle_request(payload): + expected_structure = { + "action": str, + "data": { + "id": int, + "value": str + } + } + + if not validateDictStructure(payload, expected_structure): + printResponse(400, "/handle_request", {"error": "Invalid request structure"}) + return + + # 処理続行 + printLog("Valid request received", payload) +``` + +### パターン4: WebSocketサーバー起動 +```python +def start_websocket(host, port): + if not isValidIpAddress(host): + printResponse(400, "/websocket/start", {"error": "Invalid IP address"}) + return + + if not isAvailableWebSocketServer(host, port): + printResponse(400, "/websocket/start", {"error": f"Port {port} is in use"}) + return + + # サーバー起動 + printLog(f"Starting WebSocket server", {"host": host, "port": port}) + startServer(host, port) +``` + +--- + +## テスト推奨事項 + +### 単体テスト例 + +**辞書構造検証:** +```python +def test_validate_dict_structure_simple(): + data = {"name": "Alice", "age": 30} + structure = {"name": str, "age": int} + assert validateDictStructure(data, structure) is True + +def test_validate_dict_structure_nested(): + data = {"user": {"id": 1, "active": True}} + structure = {"user": {"id": int, "active": bool}} + assert validateDictStructure(data, structure) is True + +def test_validate_dict_structure_invalid(): + data = {"name": "Alice"} + structure = {"name": str, "age": int} # 'age'キーが不足 + assert validateDictStructure(data, structure) is False +``` + +**ネットワーク診断:** +```python +def test_network_connection(): + # 実際のネットワーク接続をテスト + result = isConnectedNetwork() + assert isinstance(result, bool) + +def test_network_timeout(): + # タイムアウト動作を確認 + result = isConnectedNetwork(url="http://192.0.2.1", timeout=1) + assert result is False +``` + +**計算デバイス:** +```python +def test_get_compute_device_list(): + devices = getComputeDeviceList() + assert len(devices) >= 1 # 最低限CPUが含まれる + assert devices[0]["device"] == "cpu" + +def test_get_best_compute_type(): + compute_type = getBestComputeType("cpu", 0) + assert compute_type in ["float32", "int8"] +``` + +**ロギング:** +```python +def test_print_log(capsys): + printLog("Test message", {"key": "value"}) + captured = capsys.readouterr() + output = json.loads(captured.out) + assert output["status"] == 348 + assert output["log"] == "Test message" + +def test_print_response(capsys): + printResponse(200, "/test", {"result": "success"}) + captured = capsys.readouterr() + output = json.loads(captured.out) + assert output["status"] == 200 + assert output["endpoint"] == "/test" +``` + +--- + +## セキュリティ考慮事項 + +### 1. IPアドレス検証 +`isValidIpAddress()` はフォーマット検証のみで、プライベートアドレス範囲のチェックは行わない: + +```python +# セキュリティを強化する場合 +import ipaddress + +def is_public_ip(ip_str): + if not isValidIpAddress(ip_str): + return False + ip = ipaddress.ip_address(ip_str) + return not (ip.is_private or ip.is_loopback or ip.is_reserved) +``` + +### 2. Base64デコード +`encodeBase64()` は入力検証を行わないため、信頼できないソースからのデータには注意: + +```python +# 安全な使用例 +if source_is_trusted: + data = encodeBase64(base64_string) +else: + # 追加の検証を実施 + pass +``` + +### 3. ログファイルへの機密情報記録 +ログに機密情報(API キー、パスワード等)が含まれないよう注意: + +```python +# 悪い例 +printLog("API key loaded", api_key) + +# 良い例 +printLog("API key loaded", "***REDACTED***") +``` + +--- + +## 制限事項 + +1. **プラットフォーム依存性:** + - GPU検出は CUDA 環境でのみ動作(ROCm/Metal非対応) + +2. **ネットワークチェックの制限:** + - ファイアウォール、プロキシ環境で誤判定の可能性 + - IPv6専用環境での動作は未検証 + +3. **ログファイルのスレッドセーフティ:** + - `RotatingFileHandler` は基本的にスレッドセーフだが、高負荷時のローテーション中にログ損失の可能性 + +4. **計算タイプの最適化:** + - `getBestComputeType()` の優先順位は一般的な推奨値であり、特定のモデルやタスクでは最適でない場合がある + +--- + +## 依存モジュールとの関係 + +### controller.py +- デバイス管理の設定変更時にデバイスリスト取得 +- エラー時のログ記録 +- ネットワーク接続確認 + +### model.py +- 計算デバイスとタイプの決定 +- エラー時のトレースバック記録 + +### config.py +- 起動時のネットワーク接続確認 +- 計算デバイスリストの提供 + +### mainloop.py +- リクエスト/レスポンスの構造化ログ出力 +- エラー時のトレースバック記録 + +--- + +## 今後の拡張性 + +### 1. 非同期ネットワークチェック +```python +import asyncio +import aiohttp + +async def isConnectedNetworkAsync(url="http://www.google.com", timeout=3) -> bool: + try: + async with aiohttp.ClientSession() as session: + async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout)) as response: + return response.status == 200 + except Exception: + return False +``` + +### 2. 構造化ログの拡張 +```python +def printStructuredLog(level: str, message: str, context: dict = None): + """ + より詳細な構造化ログ出力 + - timestamp + - level + - message + - context (key-value pairs) + - stack trace (error時) + """ + pass +``` + +### 3. メトリクス収集 +```python +def recordMetric(metric_name: str, value: float, tags: dict = None): + """ + パフォーマンスメトリクスの記録 + - function execution time + - memory usage + - GPU utilization + """ + pass +``` + +--- + +## 関連ドキュメント +- `controller.md`: Controller での utils 関数使用例 +- `config.md`: Config での計算デバイス管理 +- `model.md`: Model でのエラーハンドリング +- `コーディングルール.md`: ロギングとエラーハンドリングの規約 + +--- + +## ライセンス +プロジェクトのルートディレクトリの `LICENSE` ファイルを参照 + +--- + +## まとめ + +`utils.py` は VRCT プロジェクトの基盤インフラストラクチャとして、以下の重要な責務を担う: + +1. **安全性**: 全ての関数が例外を内部処理し、安全なフォールバック値を提供 +2. **可観測性**: 構造化ログとローテーション機能により、問題の診断を容易化 +3. **互換性**: オプション依存のセーフガードにより、様々な環境で動作 +4. **最適化**: GPU アーキテクチャに応じた計算タイプの自動選択 +5. **検証**: 辞書構造、IPアドレス、ネットワーク接続の厳密なバリデーション + +全てのサブシステムから依存される中核モジュールとして、高い信頼性と保守性を維持している。 diff --git a/src-python/docs/CODING_RULES.md b/src-python/docs/コーディングルール.md similarity index 100% rename from src-python/docs/CODING_RULES.md rename to src-python/docs/コーディングルール.md diff --git a/src-python/docs/仕様書.md b/src-python/docs/仕様書.md new file mode 100644 index 00000000..1ccd8ea2 --- /dev/null +++ b/src-python/docs/仕様書.md @@ -0,0 +1,58 @@ +# 仕様書 + +概要 +- プロジェクト名: VRCT (VR Chat Translator) +- 目的: マイク入力とスピーカー出力をリアルタイムに文字起こし・翻訳し、VR オーバーレイや OSC/WebSocket 経由で外部に送出するバックエンドロジック。 +- 言語: Python + +対象ユーザー +- VR 環境でリアルタイム翻訳・文字起こしを利用したいエンドユーザー +- フロントエンド(GUI)や VR クライアント(OSC)と連携するアプリケーション開発者 + +主要機能(機能要件) +1. 音声の取り込み・文字起こし + - マイク(送信)およびスピーカー(受信)から音声を取得し、ローカル Whisper(faster-whisper)または外部サービスによりテキスト化する。 + - 音声エネルギー(音量)監視を行い、閾値ベースで検出する。 + +2. 翻訳 + - DeepL / DeepL API / 各クラウド翻訳 / ローカル CTranslate2 モデルの複数バックエンドをサポート。 + - 複数出力言語への一括翻訳、翻訳エンジンのフォールバック(CTranslate2 など)。 + - 翻訳モデルのダウンロードと管理機能。 + +3. 表示・通知 + - OpenVR オーバーレイ(small/large)用の画像生成と更新。 + - OSC による VR へのメッセージ送信(typing/通知等)。 + - WebSocket サーバーを介した外部クライアントへの JSON ブロードキャスト。 + +4. 入出力インターフェース + - stdin ラインベースの JSON コマンド受信(mainloop が実装)。 + - stdout に対して構造化された JSON レスポンスを出力(printResponse/printLog)。 + +5. 設定・永続化 + - JSON ベースの設定ファイルを使用(`config.py` による読み書きとデバウンス保存)。 + +6. ロギングと監視 + - プロセスログ(process.log)とエラーログ(error.log)をローテーションで管理。 + - ウォッチドッグ機構で定期的に死活チェック・コールバック。 + +非機能要件 +- プラットフォーム: 主に Windows(Audio 周りは WASAPI を利用)を想定。クロスプラットフォームでの import 安全性を考慮。 +- 可用性: 外部依存(PyAudio, CUDA, ctranslate2 等)が無い環境でも安全にインポートでき、機能劣化しつつ動作する。 +- パフォーマンス: ローカルモデル利用時は GPU を利用して計算性能を確保。compute type 選択ロジックを実装。 +- セキュリティ: 外部への API キー(DeepL など)は設定で扱い、コード上では平文保持を避ける(設定ファイルに保存)。 + +運用フロー +- 起動: stdin でコマンドを受け付ける mainloop を実行。必要な初期化は遅延実行(lazy init)を採用。 +- モデル重ダウンロード: CTranslate2/Whisper 重みは `weights/` 配下にダウンロードし、チェックサム等で整合性確認。 +- 障害時: 例外は utils.errorLogging() でトレースを error.log に出力。重要機能はフォールバック実装。 + +インターフェース(抜粋) +- stdin(JSON): {"endpoint": "/set/..." | "/get/..." | "/run/...", "data": } +- stdout(JSON): 標準化されたレスポンスを printResponse/printLog が出力(status, endpoint, result など)。 + +依存関係(オプション含む) +- 必須(実装時想定): requests, packaging, flashtext, pillow, pyaudiowpatch, speech_recognition +- ローカル推奨: faster-whisper, ctranslate2, torch(GPU 利用時) +- Windows 固有(音声ループバック): pycaw, comtypes + +参考: 実装上の安全設計として optional な import は try/except でガードしており、存在しない依存があっても import 時にクラッシュしない。 diff --git a/src-python/docs/設計書.md b/src-python/docs/設計書.md new file mode 100644 index 00000000..2b37c101 --- /dev/null +++ b/src-python/docs/設計書.md @@ -0,0 +1,57 @@ +# 設計書 + +概要 +- 本設計書はアプリケーションのアーキテクチャ、主要コンポーネント、並列化モデル、エラー処理ポリシー、設定の保存方針を記述する。 + +アーキテクチャ概要 +- 層構造 + - mainloop: stdin ベースのコマンド受け取り、ワーカー(複数スレッド)で実行。 + - controller: GUI/フロントエンドからの操作とモデルの仲介。`Controller` がビジネスロジックを実行。 + - model: 実際の機能(翻訳、文字起こし、オーバーレイ、OSC、WebSocket、デバイス管理)を提供するファサード的シングルトン。 + - models/*: 翻訳、文字起こしなどのドメイン別実装(Translator, AudioTranscriber, Overlay, WebSocketServer ...)。 + - device_manager: 音声デバイス検出・監視(Windows の場合は WASAPI/pycaw を利用)。 + - utils: 共通ユーティリティ(ロギング、ネットワークチェック、compute device 列挙など)。 + +初期化ポリシー +- 重い初期化(GPU モデルロード、OpenVR 初期化など)は import 時に行わず、`model.init()` か要求時の `ensure_initialized()` にて遅延実行。 +- `DeviceManager` は import 時に軽量な init を行い、監視スレッドは `startMonitoring()` で開始する。 + +並列化・同期モデル +- mainloop.Main は 1 つの受信スレッド(stdin 읽取り)と N 個のハンドラワーカースレッドを持つ。 +- 各リクエストはキューに入れられ、handler() により処理される。 +- 有効/無効の切替(/set/enable/**, /set/disable/**)は同一リソースを競合しないよう正規化キーで Lock を割り当てる。 +- モデル内部では threadFnc(Thread ラッパ)で周期的な送信処理や監視処理を実装。 +- Audio 録音や文字起こしは専用の Queue を用い、Producer(Recorder)と Consumer(AudioTranscriber)を分離。 + +エラー処理 +- すべての外周呼び出しは try/except で保護し、`utils.errorLogging()` によってトレースバックを error.log に出力する。 +- JSON シリアライズに失敗した場合はフォールバック JSON を stdout に出力してプロセスを止めない。 +- VRAM 関連のエラーは model.detectVRAMError() で判定し、該当する機能(翻訳等)を無効化してユーザーに通知する。 + +設定管理 +- `config.py` が単一の Config シングルトンを持ち、変更はデバウンスして JSON ファイルへ保存。 +- GUI からの操作は Controller が受け取り、Config を更新する。 + +ログ +- `utils.setupLogger` によりローテートファイルハンドラを使ったログを実装(process.log / error.log)。 +- stdout には構造化ログを出力してフロントエンドと通信する。 + +インターフェース一覧(抜粋) +- STDIN/STDOUT プロトコル: mainloop の JSON 入出力(詳細は `mainloop.py` の mapping を参照) +- OSC: `models.osc.OSCHandler` が OSC 送受信と OSCQuery を管理 +- WebSocket: `models.websocket.WebSocketServer` がクライアント管理とメッセージブロードキャストを担う + +スレッド図(要点) +- main_thread: メイン(stdin 読み取り、キュー投入) +- handler_threads: キューから取り出し処理 +- device_manager.th_monitoring: デバイス監視 +- model.mic_print_transcript / speaker_print_transcript: 音声 -> 翻訳結果送出のループ +- websocket_server_thread: WebSocket サーバの asyncio ループを別スレッドで実行 + +拡張性・互換性設計 +- 依存性は try/except でガードして optional 機能として扱う(例: faster-whisper が無くても import は成功する)。 +- 翻訳エンジンは backend 名で抽象化され、Translator クラスにより統一インターフェースを提供。 + +運用上の考慮 +- 大きなモデルファイル(Whisper, CTranslate2)をダウンロードする仕組みを持ち、進捗を GUI に報告する。 +- GPU 計算タイプは utils.getBestComputeType で選択し、不適切な設定を検出した場合はフォールバック。 diff --git a/src-python/docs/詳細設計書.md b/src-python/docs/詳細設計書.md new file mode 100644 index 00000000..11b539ee --- /dev/null +++ b/src-python/docs/詳細設計書.md @@ -0,0 +1,66 @@ +# 詳細設計書 + +この文書は主要クラス・関数の詳細、データ構造、例外ケース、スレッドの振る舞いを記載する。 + +目次 +- Model +- Controller +- Main (mainloop) +- DeviceManager +- Utils +- モデルの重みダウンロードと整合性 + +## Model +- シングルトン: `model = Model()` +- 遅延初期化: `init()` と `ensure_initialized()` を備え、init は重いリソース(Overlay, Translator, Watchdog, OSC ハンドラ等)を構築する。 +- 主な責務 + - 翻訳/文字起こし関連の起動停止ラッパ + - Overlay/OSChandler/WebSocket の操作 + - キーワード検出(flashtext)と重複検出 + - VRAM エラー検出とフォールバック +- 重要属性(抜粋) + - `translator` : Translator インスタンス + - `overlay` / `overlay_image` : Overlay 系 + - `mic_*`, `speaker_*` : 録音、トランスクリプタ、energy recorder + - `watchdog` : Watchdog + - `osc_handler`, `websocket_server` +- スレッド制御 + - threadFnc を用いて周期処理を回す。stop/pause/resume が可能。 + +## Controller +- GUI からの要求を受け、Model を操作して結果を run() コールバックへ返す。 +- 各種設定変更 (/set/ や /get/ エンドポイント) を実装。 +- 翻訳/文字起こし/オーバーレイ連携ロジックを持ち、メッセージ整形(messageFormatter)や OSC の送信を行う。 +- ダウンロード作業は別スレッドで行い、進捗を run_mapping を通して通知。 + +## Main (mainloop.Main) +- stdin を readline() で受け取り JSON を parse、endpoint と data をキューへ投入。 +- worker_count 個の handler スレッドが queue を取り出し `_call_handler` を実行。 +- endpoint ロック正規化: `/set/enable/...` と `/set/disable/...` は同じ正規化キー `/lock/set/...` を共有して排他制御。 +- エラーレスポンスの標準化と再試行ロジック(status==423 は再キュー化)。 + +## DeviceManager +- シングルトン。初期化は軽量で、`init()` により内部構造をセット、実デバイスは `update()` で取得。 +- Windows 環境では COM イベント (pycaw/MMNotificationClient) を用いた検出か、PyAudio によるポーリングでデバイス一覧を構成。 +- コールバック設計: 変更検出時に Controller のコールバックを呼び出して UI 更新を促す。 + +## Utils +- `validateDictStructure(data, structure)` : JSON 構造検証。 +- `getComputeDeviceList()` / `getBestComputeType()` : CPU/CUDA を列挙し、推奨 compute_type を返す。 +- `setupLogger()` / `printLog()` / `printResponse()` / `errorLogging()` : ログ、標準出力の整形、エラー記録。 +- ネットワーク/ソケット/IP アドレス検査ユーティリティ。 + +## モデル重みダウンロード +- `models.translation.translation_utils` と `models.transcription.transcription_whisper` にダウンロード/チェック関数があり、チェックサムやファイル存在を検証する。 +- GUI からの要求は Controller により非同期スレッドで実行され、進捗コールバックが run_mapping を介してフロントエンドに渡る。 + +## エッジケース / 例外処理 +- 外部 API のレート制限や認証エラーは呼び出し元に 400 系のレスポンスを返し、必要であればフォールバック実装(CTranslate2 への切替)を行う。 +- 大きなモデル実行時の VRAM エラーは検出し、当該機能を無効化してユーザへ通知する。 +- 音声デバイスが存在しない場合は NoDevice を返し、UI 側で扱う。 + +## テスト観点 +- メッセージ受信/送信のエンドツーエンド: stdin -> handler -> Controller -> Model -> printResponse の流れ。 +- デバイス挙動: DeviceManager.update() がデバイス一覧を取得できるか(PyAudio 経由)。 +- モデルダウンロード: ダウンロード成功・失敗、チェックサム検証。 +- ログ/エラー: errorLogging() による例外トレースが error.log に記録されるか。 diff --git a/src-python/mypy.ini b/src-python/mypy.ini deleted file mode 100644 index d9f53b5e..00000000 --- a/src-python/mypy.ini +++ /dev/null @@ -1,32 +0,0 @@ -[mypy] -# Temporarily ignore missing type stubs for third-party libraries to focus on -# type errors inside the project. We'll tighten this later. -ignore_missing_imports = True -python_version = 3.11 -show_error_codes = True - -# Per-module ignores can be added later for specific noisy modules. - -[mypy-tests.*] -ignore_errors = True - -# Temporarily ignore entire implementation areas that produce many non-actionable -# mypy errors (third-party untyped libs or large unannotated modules). We'll -# progressively remove these ignores as we annotate the codebase. -[mypy-models.transliteration.*] -ignore_errors = True - -[mypy-models.overlay.*] -ignore_errors = True - -[mypy-models.osc.*] -ignore_errors = True - -[mypy-models.transcription.*] -ignore_errors = True - -[mypy-models.translation.*] -ignore_errors = True - -[mypy-device_manager] -ignore_errors = True diff --git a/src-python/scripts/cleanup_docs_placeholders.py b/src-python/scripts/cleanup_docs_placeholders.py deleted file mode 100644 index 9da108fd..00000000 --- a/src-python/scripts/cleanup_docs_placeholders.py +++ /dev/null @@ -1,16 +0,0 @@ -from pathlib import Path -p=Path(__file__).resolve().parents[1]/'docs'/'api.md' -text=p.read_text(encoding='utf-8') -lines=[] -for line in text.splitlines(): - stripped=line.strip() - # Remove exact umbrella placeholder tokens or standalone list entries - if stripped in ('- /set/enable', '- /set/disable', '- /get/data/', '/set/enable', '/set/disable', '/get/data/'): - continue - # Remove lines that are just '/get/data' or '/set/data' or '/run/' etc - if stripped in ('/get/data', '/set/data', '/run/', '/get', '/set', '/run'): - continue - lines.append(line) -new='\n'.join(lines) -p.write_text(new,encoding='utf-8') -print('cleaned') diff --git a/src-python/scripts/find_doc_tokens.py b/src-python/scripts/find_doc_tokens.py deleted file mode 100644 index 7b9fae1a..00000000 --- a/src-python/scripts/find_doc_tokens.py +++ /dev/null @@ -1,21 +0,0 @@ -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -DOC_DIR = ROOT / 'docs' - -tokens = [ - 'transcription_mic', - 'transcription_speaker', - 'selected_translation_compute_device', - '/run/transcription_mic', - '/run/transcription_speaker', -] - -for p in DOC_DIR.rglob('*.md'): - text = p.read_text(encoding='utf-8') - for i, line in enumerate(text.splitlines(), start=1): - for t in tokens: - if t in line: - print(f"{p}:{i}:{line.strip()}") - -print('done') diff --git a/src-python/scripts/print_mapping.py b/src-python/scripts/print_mapping.py deleted file mode 100644 index 8b66e177..00000000 --- a/src-python/scripts/print_mapping.py +++ /dev/null @@ -1,28 +0,0 @@ -from pathlib import Path -import re -ROOT = Path(__file__).resolve().parents[1] -MAINLOOP = ROOT / 'mainloop.py' -text = MAINLOOP.read_text(encoding='utf-8') -run_mapping = {} -mapping = {} -for mm in re.finditer(r"[\'\"]([^\'\"]+)[\'\"]\s*:\s*[\'\"](/run/[a-zA-Z0-9_\-\\/]+)[\'\"]", text): - run_mapping[mm.group(1)] = mm.group(2) -for mm in re.finditer(r"[\'\"](/(?:get|set)/[a-zA-Z0-9_\-\\/]+)[\'\"]", text): - mapping[mm.group(1)] = True -print('run_mapping entries:', len(run_mapping)) -print('sample run_mapping keys:', sorted(run_mapping.items())[:10]) -print('\nmapping endpoints count:', len(mapping)) -# show any endpoints that are exactly '/get/data/' -print('\ncontains /get/data/?', '/get/data/' in mapping) -if '/get/data/' in mapping: - print('Found /get/data/ literal in mainloop.py text') -# show ones containing '/get/data' -has_get_data = [k for k in mapping.keys() if '/get/data' in k] -print('\nendpoints containing /get/data:', len(has_get_data)) -if has_get_data: - for k in sorted(has_get_data)[:30]: - print(' -', k) -# print first 20 mapping endpoints -print('\nFirst 40 endpoints:') -for k in sorted(mapping.keys())[:40]: - print(' -', k) diff --git a/src-python/scripts/verify_docs_vs_code.py b/src-python/scripts/verify_docs_vs_code.py deleted file mode 100644 index 7aa47c40..00000000 --- a/src-python/scripts/verify_docs_vs_code.py +++ /dev/null @@ -1,161 +0,0 @@ -import importlib.util -import re -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -MAINLOOP = ROOT / 'mainloop.py' -CONTROLLER = ROOT / 'controller.py' -DOC_API = ROOT / 'docs' / 'api.md' -DOC_RUN = ROOT / 'docs' / 'run_events_payloads.md' - - -def extract_mapping_from_mainloop(): - """ - Import mainloop.py and read 'mapping' and 'run_mapping' objects directly. - This executes the module in an isolated module object; mainloop has some - initialization but exposing these dicts is acceptable for verification. - """ - run_mapping = {} - mapping = {} - try: - spec = importlib.util.spec_from_file_location('project_mainloop', str(MAINLOOP)) - module = importlib.util.module_from_spec(spec) - loader = spec.loader - if loader is None: - raise RuntimeError('Could not load mainloop module') - loader.exec_module(module) - mapping = getattr(module, 'mapping', {}) or {} - run_mapping = getattr(module, 'run_mapping', {}) or {} - return mapping, run_mapping - except Exception as e: - print('Error importing mainloop.py', e) - - # Fallback: simple regex-based extraction from mainloop.py text - try: - text = MAINLOOP.read_text(encoding='utf-8') - # run_mapping entries like: "transcription_mic": "/run/transcription_send_mic_message", - for mm in re.finditer(r"[\'\"]([^\'\"]+)[\'\"]\s*:\s*[\'\"](/run/[a-zA-Z0-9_\-\/]+)[\'\"]", text): - run_mapping[mm.group(1)] = mm.group(2) - # mapping endpoints: any '/get/...' or '/set/...' literal in file - for mm in re.finditer(r"[\'\"](/(?:get|set)/[a-zA-Z0-9_\-\/]+)[\'\"]", text): - mapping[mm.group(1)] = True - except Exception as e: - print('Error parsing mainloop.py via fallback', e) - - return mapping, run_mapping - - -def extract_run_events_from_controller(): - code = CONTROLLER.read_text(encoding='utf-8') - # find self.run( ... , self.run_mapping["key"], ... ) and direct self.run(..., - run_keys = set() - # pattern for self.run(..., self.run_mapping["xxx"], ...) - pattern = re.compile(r"self\.run\([^\)]*self\.run_mapping\[\s*[\'\"]([^\'\"]+)[\'\"]\s*\]", re.M) - for m in pattern.finditer(code): - run_keys.add(m.group(1)) - # also find self.run(..., "/run/xxx", ...) - pattern2 = re.compile(r"self\.run\([^\)]*\"(/run/[^\'\"]+)\"", re.M) - for m in pattern2.finditer(code): - run_keys.add(m.group(1)) - return run_keys - - -def extract_endpoints_from_docs(): - api = DOC_API.read_text(encoding='utf-8') - run = DOC_RUN.read_text(encoding='utf-8') if DOC_RUN.exists() else '' - endpoints = set() - run_events = set() - # conservative extraction: match endpoints that start with /get/ /set/ /run/ - pattern = re.compile(r"(/(?:get|set|run)(?:/[a-zA-Z0-9_\-]+)+)") - for m in pattern.finditer(api): - token = m.group(1) - # drop umbrella placeholders and tokens that end with '/' - if token in ('/get', '/set', '/run', '/get/data', '/set/data'): - continue - if token.endswith('/'): - continue - if token.startswith('/run/'): - run_events.add(token) - else: - endpoints.add(token) - for m in pattern.finditer(run): - token = m.group(1) - if token in ('/get', '/set', '/run', '/get/data', '/set/data'): - continue - if token.endswith('/'): - continue - if token.startswith('/run/'): - run_events.add(token) - else: - endpoints.add(token) - return endpoints, run_events - - -def main(): - mapping, run_mapping = extract_mapping_from_mainloop() - code_endpoints = set(mapping.keys()) - code_run_events = set(run_mapping.values()) - # normalize run events: run_mapping values likely like '/run/…' - controller_run_keys = extract_run_events_from_controller() - - doc_endpoints, doc_run_events = extract_endpoints_from_docs() - - report = [] - report.append('=== Summary ===') - report.append(f'Code endpoints (/get,/set,/run): {len(code_endpoints)}') - report.append(f'Code run_mapping entries: {len(code_run_events)}') - report.append(f'Controller-run keys found by scan: {len(controller_run_keys)}') - report.append(f'Documented endpoints found in docs/api.md: {len(doc_endpoints)}') - report.append(f'Documented run events found in docs: {len(doc_run_events)}') - - # endpoints present in code but not in docs - missing_in_docs = code_endpoints - doc_endpoints - extra_in_docs = doc_endpoints - code_endpoints - - report.append('\n=== Endpoints present in code but NOT documented ===') - if missing_in_docs: - for e in sorted(missing_in_docs): - report.append(' - ' + e) - else: - report.append(' - None') - - report.append('\n=== Endpoints documented but NOT in code ===') - if extra_in_docs: - for e in sorted(extra_in_docs): - report.append(' - ' + e) - else: - report.append(' - None') - - report.append('\n=== Run events present in code (run_mapping) but NOT documented ===') - missing_run_in_docs = code_run_events - doc_run_events - if missing_run_in_docs: - for e in sorted(missing_run_in_docs): - report.append(' - ' + e) - else: - report.append(' - None') - - report.append('\n=== Run keys emitted in controller (self.run mapping keys) but NOT in run_mapping values ===') - # controller_run_keys are keys like 'connected_network' or '/run/connected_network' - # normalize controller keys to values: if key starts with '/run/' keep, else map via run_mapping if possible - normalized = set() - for k in controller_run_keys: - if k.startswith('/run/'): - normalized.add(k) - else: - if k in run_mapping: - normalized.add(run_mapping[k]) - else: - normalized.add(k) - # compare normalized with code_run_events - extra_controller_keys = normalized - code_run_events - if extra_controller_keys: - for e in sorted(extra_controller_keys): - report.append(' - ' + e) - else: - report.append(' - None') - - out = '\n'.join(report) - print(out) - -if __name__ == '__main__': - main() diff --git a/src-python/scripts/verify_docs_vs_code_runtime.py b/src-python/scripts/verify_docs_vs_code_runtime.py deleted file mode 100644 index 187575f3..00000000 --- a/src-python/scripts/verify_docs_vs_code_runtime.py +++ /dev/null @@ -1,126 +0,0 @@ -import re -import json -import sys -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -DOC_API = ROOT / 'docs' / 'api.md' -DOC_RUN = ROOT / 'docs' / 'run_events_payloads.md' - -# Ensure project root is importable so `import mainloop` works when this script is -# executed from the scripts/ folder. -sys.path.insert(0, str(ROOT)) - - -def main(): - # Delayed imports to avoid module-level import ordering issues (E402 in linters) - import mainloop - import controller as controller_module - - mapping_keys = set(mainloop.mapping.keys()) - run_mapping_values = set(mainloop.run_mapping.values()) - - # extract controller emitted run keys by source scan - controller_src = Path(controller_module.__file__).read_text(encoding='utf-8') - controller_run_keys = set() - for m in re.finditer(r"self\.run\([^\)]*self\.run_mapping\[\s*[\'\"]([^\'\"]+)[\'\"]\s*\]", controller_src): - controller_run_keys.add(m.group(1)) - for m in re.finditer(r"self\.run\([^\)]*\"(/run/[a-zA-Z0-9_\-/]+)\"", controller_src): - controller_run_keys.add(m.group(1)) - # read docs and extract endpoints conservatively (only full endpoints starting with /get/ /set/ /run/) - api_text = DOC_API.read_text(encoding='utf-8') - run_text = DOC_RUN.read_text(encoding='utf-8') if DOC_RUN.exists() else '' - - # include delete endpoints as well (e.g. /delete/data/deepl_auth_key) - endpoint_pattern = re.compile(r"(/(?:get|set|run|delete)[A-Za-z0-9_\-/]*)") - - doc_endpoints = set(m.group(1) for m in endpoint_pattern.finditer(api_text + '\n' + run_text)) - - # Remove umbrella placeholder artifacts that sometimes appear due to - # comma-separated lists or pattern fragments in the markdown. These are - # not concrete endpoints and should not be treated as documented endpoints - # for parity checking. - umbrella_tokens = { - '/get', '/set', '/run', '/get/data', '/set/data', '/set/enable', '/set/disable' - } - # Remove exact umbrella tokens and any accidental entries that end with a - # trailing slash (these are artifacts of pattern matching in markdown). - doc_endpoints = {e for e in doc_endpoints if e not in umbrella_tokens and not e.endswith('/')} - - # Compare - missing_in_docs = mapping_keys - doc_endpoints - # A documented endpoint is valid if it corresponds to either an incoming mapping (mapping_keys) - # or an outgoing run event (run_mapping_values). Treat extra_in_docs as anything documented - # that is neither in mapping_keys nor in run_mapping_values. - extra_in_docs = doc_endpoints - (mapping_keys | run_mapping_values) - - missing_run_in_docs = run_mapping_values - doc_endpoints - - # Normalize controller keys to run_mapping values - normalized = set() - for k in controller_run_keys: - if k.startswith('/run/'): - normalized.add(k) - else: - if k in mainloop.run_mapping: - normalized.add(mainloop.run_mapping[k]) - else: - normalized.add(k) - - extra_controller_keys = normalized - run_mapping_values - - report = [] - report.append('=== Runtime verification report ===') - report.append(f'Code mapping endpoints: {len(mapping_keys)}') - report.append(f'Code run_mapping entries: {len(run_mapping_values)}') - report.append(f'Controller emitted run keys: {len(controller_run_keys)}') - report.append(f'Documented endpoints (docs): {len(doc_endpoints)}') - - report.append('\n--- Endpoints present in code but NOT documented ---') - if missing_in_docs: - for e in sorted(missing_in_docs): - report.append(' - ' + e) - else: - report.append(' - None') - - report.append('\n--- Endpoints documented but NOT in code ---') - if extra_in_docs: - for e in sorted(extra_in_docs): - report.append(' - ' + e) - else: - report.append(' - None') - - report.append('\n--- Run events present in code (run_mapping) but NOT documented ---') - if missing_run_in_docs: - for e in sorted(missing_run_in_docs): - report.append(' - ' + e) - else: - report.append(' - None') - - report.append('\n--- Run keys emitted in controller (normalized) but NOT in run_mapping values ---') - if extra_controller_keys: - for e in sorted(extra_controller_keys): - report.append(' - ' + e) - else: - report.append(' - None') - - print('\n'.join(report)) - - # Also output JSON for downstream processing - out = { - 'mapping_keys': sorted(mapping_keys), - 'run_mapping_values': sorted(run_mapping_values), - 'controller_run_keys': sorted(controller_run_keys), - 'doc_endpoints': sorted(doc_endpoints), - 'missing_in_docs': sorted(missing_in_docs), - 'extra_in_docs': sorted(extra_in_docs), - 'missing_run_in_docs': sorted(missing_run_in_docs), - 'extra_controller_keys': sorted(extra_controller_keys), - } - print('\nJSON_OUTPUT_START') - print(json.dumps(out)) - print('JSON_OUTPUT_END') - - -if __name__ == '__main__': - main() diff --git a/src-python/tests/test_osc_imports.py b/src-python/tests/test_osc_imports.py deleted file mode 100644 index 1df699c3..00000000 --- a/src-python/tests/test_osc_imports.py +++ /dev/null @@ -1,6 +0,0 @@ -def test_import_osc_module(): - try: - import importlib - importlib.import_module('models.osc.osc') - except Exception as e: - raise AssertionError(f"Failed importing models.osc.osc: {e}") diff --git a/src-python/tests/test_overlay_imports.py b/src-python/tests/test_overlay_imports.py deleted file mode 100644 index b90389e7..00000000 --- a/src-python/tests/test_overlay_imports.py +++ /dev/null @@ -1,30 +0,0 @@ -import sys -import time -from PIL import Image - -sys.path.append(r"d:\WORKSPACE\WORK\VRChatProject\VRCT\src-python") - -from models.overlay import overlay_image, overlay_utils - - -def test_overlay_image_create(): - oi = overlay_image.OverlayImage() - img = oi.createOverlayImageSmallLog("hello", "English", [], []) - assert isinstance(img, Image.Image) - - -def test_utils_transform(): - import numpy as np - base = np.array([ - [1, 0, 0, 0], - [0, 1, 0, 0], - [0, 0, 1, 0] - ]) - res = overlay_utils.transform_matrix(base, (0, 0, 0), (0, 0, 0)) - assert res.shape == (3, 4) - - -if __name__ == '__main__': - test_overlay_image_create() - test_utils_transform() - print('tests passed') From d1aef28c7a892f749fcf35174f21d46a3d9cf3d4 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Tue, 14 Oct 2025 07:28:03 +0900 Subject: [PATCH 92/92] Add comprehensive detailed design documents --- src-python/docs/details/backend_test.md | 95 ++ src-python/docs/details/config.md | 392 +++++++ src-python/docs/details/controller.md | 349 ++++++ src-python/docs/details/mainloop.md | 275 +++++ src-python/docs/details/model.md | 292 ++++++ src-python/docs/details/osc.md | 602 +++++++++++ src-python/docs/details/overlay.md | 754 +++++++++++++ .../docs/details/transcription_languages.md | 229 ++++ .../docs/details/transcription_recorder.md | 325 ++++++ .../docs/details/transcription_transcriber.md | 325 ++++++ .../docs/details/transcription_whisper.md | 373 +++++++ .../docs/details/translation_languages.md | 342 ++++++ .../docs/details/translation_translator.md | 406 +++++++ src-python/docs/details/translation_utils.md | 438 ++++++++ .../details/transliteration_context_rules.md | 397 +++++++ .../transliteration_kana_to_hepburn.md | 465 ++++++++ .../details/transliteration_transliterator.md | 659 ++++++++++++ src-python/docs/details/utils.md | 213 ++++ src-python/docs/details/watchdog.md | 670 ++++++++++++ src-python/docs/details/websocket_server.md | 989 ++++++++++++++++++ 20 files changed, 8590 insertions(+) create mode 100644 src-python/docs/details/backend_test.md create mode 100644 src-python/docs/details/config.md create mode 100644 src-python/docs/details/controller.md create mode 100644 src-python/docs/details/mainloop.md create mode 100644 src-python/docs/details/model.md create mode 100644 src-python/docs/details/osc.md create mode 100644 src-python/docs/details/overlay.md create mode 100644 src-python/docs/details/transcription_languages.md create mode 100644 src-python/docs/details/transcription_recorder.md create mode 100644 src-python/docs/details/transcription_transcriber.md create mode 100644 src-python/docs/details/transcription_whisper.md create mode 100644 src-python/docs/details/translation_languages.md create mode 100644 src-python/docs/details/translation_translator.md create mode 100644 src-python/docs/details/translation_utils.md create mode 100644 src-python/docs/details/transliteration_context_rules.md create mode 100644 src-python/docs/details/transliteration_kana_to_hepburn.md create mode 100644 src-python/docs/details/transliteration_transliterator.md create mode 100644 src-python/docs/details/utils.md create mode 100644 src-python/docs/details/watchdog.md create mode 100644 src-python/docs/details/websocket_server.md diff --git a/src-python/docs/details/backend_test.md b/src-python/docs/details/backend_test.md new file mode 100644 index 00000000..af0413c0 --- /dev/null +++ b/src-python/docs/details/backend_test.md @@ -0,0 +1,95 @@ +# backend_test.py - APIエンドポイントテストモジュール + +## 概要 +VRCTアプリケーションのAPIエンドポイントを包括的にテストするためのモジュールです。メインループの各種機能をランダムアクセスでテストし、システムの安定性と堅牢性を検証します。 + +## 主要機能 + +### Color クラス +- ANSIエスケープシーケンスを使用したコンソール出力色彩管理 +- テスト結果の視覚的表示(成功・失敗・スキップ等) + +### TestMainloop クラス +- APIエンドポイントの包括的テスト実行 +- ランダムアクセステスト +- テスト結果の記録・分析 +- VRCTメインループとの統合テスト + +## 主要メソッド + +### テスト実行メソッド +- `test_endpoints_on_off_all()`: ON/OFF系エンドポイントの全テスト +- `test_set_data_endpoints_all()`: データ設定系エンドポイントの全テスト +- `test_run_endpoints_all()`: 実行系エンドポイントの全テスト +- `test_endpoints_all_random()`: 全エンドポイントのランダムアクセステスト + +### 特定機能テスト +- `test_translate_all_language_pairs()`: 全言語ペアでの翻訳テスト +- `test_endpoints_on_off_continuous()`: ON/OFF連続切り替えテスト +- `test_endpoints_specific_random()`: 特定エンドポイントのランダムテスト + +### 結果分析 +- `generate_summary()`: テスト結果のサマリー生成 +- `record_test_result()`: テスト結果の記録 + +## 使用方法 + +### 基本的な使い方 +```python +# テストインスタンスを作成 +test = TestMainloop() + +# 各種テストを実行 +test.test_endpoints_on_off_all() +test.test_set_data_endpoints_all() +test.test_run_endpoints_all() + +# テスト結果のサマリー表示 +test.generate_summary() +``` + +### ランダムテストの実行 +```python +# 全エンドポイントのランダムアクセステスト +test.test_endpoints_all_random() + +# 特定エンドポイントのランダムテスト +test.test_endpoints_specific_random() +``` + +## 依存関係 +- `mainloop`: VRCTメインループモジュール +- `random`: ランダムテストデータ生成 +- `time`: テスト間隔制御 + +## テスト対象エンドポイント + +### 制御系 +- `/set/enable/*`: 機能有効化 +- `/set/disable/*`: 機能無効化 + +### データ設定系 +- `/set/data/*`: 各種設定データの更新 + +### 実行系 +- `/run/*`: 各種機能の実行 + +### データ削除系 +- `/delete/data/*`: データの削除 + +## 注意事項 +- テスト実行前に`config.json`を削除して初期化 +- 重いAIモデルを使用するテストは実行時間に注意 +- ランダムテストは指定回数(デフォルト1000-10000回)実行される +- テスト終了時は自動的にすべての機能を無効化する + +## エラーハンドリング +- 各テストは独立して実行され、一つの失敗が全体に影響しない +- 期待されるステータスコードと実際の結果を比較 +- VRAM不足等のリソースエラーも適切にハンドリング + +## テスト結果の分類 +- **PASS**: 期待されるステータスコードと一致 +- **ERROR**: 期待されるステータスコードと不一致 +- **SKIP**: テスト実行不可(401ステータス) +- **Invalid**: 無効なエンドポイント(404ステータス) \ No newline at end of file diff --git a/src-python/docs/details/config.md b/src-python/docs/details/config.md new file mode 100644 index 00000000..fd619d77 --- /dev/null +++ b/src-python/docs/details/config.md @@ -0,0 +1,392 @@ +# config.py - 設定管理モジュール + +## 概要 + +VRCTアプリケーションの全設定を一元管理するモジュールです。シングルトンパターンを採用し、アプリケーション全体で統一された設定アクセスを提供します。JSON設定ファイルの読み書き、設定の永続化、デバウンス機能付き保存機能を提供します。 + +## 主要機能 + +### シングルトン設計 +- アプリケーション全体で単一の設定インスタンス +- スレッドセーフな設定アクセス +- 遅延初期化による軽量インポート + +### 設定永続化 +- JSON形式での設定ファイル管理 +- デバウンス機能付き自動保存 +- 設定変更の即座反映 + +### 動的設定管理 +- 実行時設定変更対応 +- デバイス情報の動的取得 +- 言語・エンジン設定の自動更新 + +### 型安全な設定アクセス +- プロパティベースのアクセス制御 +- 読み取り専用・読み書き可能設定の分離 +- デコレータによるシリアライゼーション管理 + +## クラス構造 + +### Config クラス +```python +class Config: + _instance = None # シングルトンインスタンス + _config_data: Dict[str, Any] # 設定データ + _timer: Optional[threading.Timer] # デバウンスタイマー + _debounce_time: int = 2 # デバウンス時間(秒) +``` + +## 設定カテゴリ + +### 読み取り専用設定 + +```python +@property +def VERSION(self) -> str +``` +- アプリケーションバージョン + +```python +@property +def PATH_LOCAL(self) -> str +``` +- ローカルディレクトリパス + +```python +@property +def PATH_CONFIG(self) -> str +``` +- 設定ファイルパス + +### UI・表示設定 + +```python +@property +def UI_LANGUAGE(self) -> str +``` +- UIの表示言語 + +```python +@property +def TRANSPARENCY(self) -> int +``` +- ウィンドウの透明度(0-100) + +```python +@property +def UI_SCALING(self) -> int +``` +- UIのスケーリング(50-200%) + +```python +@property +def FONT_FAMILY(self) -> str +``` +- 使用フォントファミリー + +### 翻訳設定 + +```python +@property +def ENABLE_TRANSLATION(self) -> bool +``` +- 翻訳機能の有効・無効 + +```python +@property +def SELECTED_TRANSLATION_ENGINES(self) -> Dict[str, str] +``` +- 選択されている翻訳エンジン + +```python +@property +def SELECTED_YOUR_LANGUAGES(self) -> Dict[str, Dict[str, Any]] +``` +- 送信言語設定 + +```python +@property +def SELECTED_TARGET_LANGUAGES(self) -> Dict[str, Dict[str, Any]] +``` +- 受信言語設定 + +### 音声認識設定 + +```python +@property +def ENABLE_TRANSCRIPTION_SEND(self) -> bool +``` +- 送信音声認識の有効・無効 + +```python +@property +def SELECTED_TRANSCRIPTION_ENGINE(self) -> str +``` +- 音声認識エンジン + +```python +@property +def SELECTED_MIC_DEVICE(self) -> str +``` +- 選択されたマイクデバイス + +```python +@property +def MIC_THRESHOLD(self) -> int +``` +- マイク音声しきい値 + +```python +@property +def MIC_RECORD_TIMEOUT(self) -> int +``` +- マイク録音タイムアウト(秒) + +### VR設定 + +```python +@property +def OVERLAY_SMALL_LOG(self) -> bool +``` +- 小型ログオーバーレイの有効・無効 + +```python +@property +def OVERLAY_SMALL_LOG_SETTINGS(self) -> Dict[str, Any] +``` +- 小型オーバーレイの詳細設定 + +```python +@property +def OVERLAY_LARGE_LOG_SETTINGS(self) -> Dict[str, Any] +``` +- 大型オーバーレイの詳細設定 + +### 通信設定 + +```python +@property +def OSC_IP_ADDRESS(self) -> str +``` +- OSC通信IPアドレス + +```python +@property +def OSC_PORT(self) -> int +``` +- OSC通信ポート + +```python +@property +def WEBSOCKET_HOST(self) -> str +``` +- WebSocketサーバーホスト + +```python +@property +def WEBSOCKET_PORT(self) -> int +``` +- WebSocketサーバーポート + +### 計算デバイス設定 + +```python +@property +def SELECTED_TRANSLATION_COMPUTE_DEVICE(self) -> Dict[str, Any] +``` +- 翻訳用計算デバイス + +```python +@property +def SELECTED_TRANSCRIPTION_COMPUTE_DEVICE(self) -> Dict[str, Any] +``` +- 音声認識用計算デバイス + +## 主要メソッド + +### 設定保存 + +```python +saveConfig(key: str, value: Any, immediate_save: bool = False) -> None +``` +- 設定値の保存(デバウンス付き) +- immediate_save=Trueで即座保存 + +```python +saveConfigToFile() -> None +``` +- 設定ファイルへの直接保存 + +### 初期化・設定読み込み + +```python +init_config() -> None +``` +- 設定の初期化 +- デフォルト値の設定 + +```python +load_config() -> None +``` +- 設定ファイルからの読み込み +- 存在しない場合はデフォルト設定を作成 + +## デコレータ機能 + +### @json_serializable +```python +@json_serializable("setting_name") +@property +def SETTING_NAME(self) -> Any: +``` +- 設定のJSONシリアライゼーション対象指定 +- 自動的にconfig.jsonに保存される設定を定義 + +## 使用方法 + +### 基本的な使い方 + +```python +from config import config + +# 設定値の取得 +version = config.VERSION +ui_language = config.UI_LANGUAGE +translation_enabled = config.ENABLE_TRANSLATION + +# 設定値の変更 +config.UI_LANGUAGE = "ja" +config.TRANSPARENCY = 80 +config.MIC_THRESHOLD = 1500 +``` + +### 複雑な設定の変更 + +```python +# 翻訳エンジンの設定 +engines = config.SELECTED_TRANSLATION_ENGINES +engines["1"] = "DeepL" +config.SELECTED_TRANSLATION_ENGINES = engines + +# オーバーレイ設定の変更 +overlay_settings = config.OVERLAY_SMALL_LOG_SETTINGS +overlay_settings["x_pos"] = 0.5 +overlay_settings["opacity"] = 0.8 +config.OVERLAY_SMALL_LOG_SETTINGS = overlay_settings +``` + +### 即座保存 + +```python +# 重要な設定変更時の即座保存 +config.saveConfig("ENABLE_TRANSLATION", True, immediate_save=True) +``` + +## 設定ファイル形式 + +設定は`config.json`ファイルにJSON形式で保存されます: + +```json +{ + "UI_LANGUAGE": "ja", + "TRANSPARENCY": 85, + "UI_SCALING": 100, + "ENABLE_TRANSLATION": true, + "SELECTED_TRANSLATION_ENGINES": { + "1": "DeepL", + "2": "Google", + "3": "CTranslate2" + }, + "OVERLAY_SMALL_LOG_SETTINGS": { + "x_pos": 0.0, + "y_pos": -0.4, + "z_pos": 1.0, + "opacity": 1.0, + "ui_scaling": 1.0, + "display_duration": 5, + "fadeout_duration": 1 + } +} +``` + +## デフォルト設定 + +### UI設定 +- UI言語: "en"(英語) +- 透明度: 85% +- UIスケーリング: 100% +- フォント: "Noto Sans JP" + +### 翻訳設定 +- 翻訳機能: 無効 +- デフォルトエンジン: "Google" +- 送信言語: English(US) +- 受信言語: 日本語 + +### 音声認識設定 +- 送信音声認識: 無効 +- 受信音声認識: 無効 +- 音声認識エンジン: "Google" +- マイクしきい値: 300 + +### VR設定 +- 小型オーバーレイ: 無効 +- 大型オーバーレイ: 無効 +- オーバーレイ位置: HMD正面 + +### 通信設定 +- OSC IP: "127.0.0.1" +- OSC ポート: 9000 +- WebSocket ホスト: "127.0.0.1" +- WebSocket ポート: 8765 + +## 依存関係 + +### 必須依存関係 +- `json`: 設定ファイルのシリアライゼーション +- `threading`: デバウンス機能 +- `typing`: 型注釈 + +### オプション依存関係 +- `device_manager`: デバイス情報取得 +- `torch`: CUDA計算デバイス情報 +- 各種モデルモジュール: 言語・エンジン情報 + +## エラーハンドリング + +- 設定ファイル読み込みエラーの適切な処理 +- 不正な設定値の検証・補正 +- オプション依存関係の欠如に対するフォールバック +- ファイル書き込みエラーの処理 + +## パフォーマンス特性 + +### デバウンス機能 +- 設定変更から2秒後に自動保存 +- 連続する変更の統合 +- I/O負荷の軽減 + +### 遅延初期化 +- 重い依存関係の遅延読み込み +- インポート時間の短縮 + +### メモリ効率 +- 設定データのシングルトン管理 +- 不要な複製の防止 + +## 注意事項 + +- 設定変更は即座にメモリに反映される +- ファイル保存はデバウンス機能により遅延される +- 重要な設定はimmediate_save=Trueを使用 +- オプション依存関係の欠如時はデフォルト値を使用 +- 不正な設定値は自動的に補正される +- 設定ファイルが破損した場合は新規作成される + +## セキュリティ考慮事項 + +- 設定ファイルの適切な権限管理 +- 外部入力値の検証 +- APIキー等の機密情報の適切な取り扱い +- パスインジェクション攻撃の防止 \ No newline at end of file diff --git a/src-python/docs/details/controller.md b/src-python/docs/details/controller.md new file mode 100644 index 00000000..ca1fefda --- /dev/null +++ b/src-python/docs/details/controller.md @@ -0,0 +1,349 @@ +# controller.py - VRCTコントローラーモジュール + +## 概要 + +VRCTアプリケーションのビジネスロジックを制御するコントローラークラスです。UI層とモデル層の間に位置し、ユーザーの入力を適切な処理に変換し、結果を UI に返す役割を担います。全ての機能制御、設定管理、状態管理を一元的に行います。 + +## 主要機能 + +### 機能制御 +- 翻訳機能の有効化・無効化 +- 音声認識機能の制御 +- VRオーバーレイの管理 +- WebSocketサーバーの制御 + +### 設定管理 +- アプリケーション設定の取得・更新 +- デバイス設定の管理 +- 言語・エンジン設定の制御 + +### 状態管理 +- システム状態の監視 +- エラー状態の管理 +- 初期化プロセスの制御 + +### 通信制御 +- OSC通信の管理 +- WebSocket通信の制御 +- 外部アプリケーション連携 + +## クラス構造 + +### Controller クラス +```python +class Controller: + def __init__(self) -> None +``` + +中核となるコントローラークラス + +### 内部ヘルパークラス + +#### DownloadCTranslate2 クラス +```python +class DownloadCTranslate2: + def progressBar(self, progress) -> None + def downloaded(self) -> None +``` +- 翻訳モデルのダウンロード進捗管理 + +#### DownloadWhisper クラス +```python +class DownloadWhisper: + def progressBar(self, progress) -> None + def downloaded(self) -> None +``` +- 音声認識モデルのダウンロード進捗管理 + +## 主要メソッド + +### 初期化・設定 + +```python +init() -> None +``` +- コントローラーの初期化 +- 各コンポーネントの起動 +- 初期設定の適用 + +```python +setInitMapping(init_mapping: dict) -> None +setRunMapping(run_mapping: dict) -> None +setRun(run: Callable) -> None +``` +- エンドポイント・コールバック設定 + +### 翻訳機能制御 + +```python +setEnableTranslation(data) -> dict +setDisableTranslation(data) -> dict +``` +- 翻訳機能の有効化・無効化 + +```python +setSelectedTranslationEngines(data) -> dict +getSelectedTranslationEngines(data) -> dict +``` +- 翻訳エンジンの選択・取得 + +```python +setSelectedYourLanguages(data) -> dict +setSelectedTargetLanguages(data) -> dict +``` +- 送信・受信言語の設定 + +```python +sendMessageBox(data) -> dict +``` +- メッセージの翻訳・送信処理 + +### 音声認識機能制御 + +```python +setEnableTranscriptionSend(data) -> dict +setEnableTranscriptionReceive(data) -> dict +``` +- 音声認識機能の有効化 + +```python +setSelectedTranscriptionEngine(data) -> dict +getSelectedTranscriptionEngine(data) -> dict +``` +- 音声認識エンジンの選択・取得 + +```python +setSelectedMicDevice(data) -> dict +setSelectedSpeakerDevice(data) -> dict +``` +- 音声デバイスの選択 + +```python +setMicThreshold(data) -> dict +setSpeakerThreshold(data) -> dict +``` +- 音声しきい値の設定 + +### VRオーバーレイ制御 + +```python +setEnableOverlaySmallLog(data) -> dict +setEnableOverlayLargeLog(data) -> dict +``` +- VRオーバーレイの有効化 + +```python +setOverlaySmallLogSettings(data) -> dict +setOverlayLargeLogSettings(data) -> dict +``` +- オーバーレイ設定の更新 + +### WebSocket制御 + +```python +setEnableWebSocketServer(data) -> dict +setDisableWebSocketServer(data) -> dict +``` +- WebSocketサーバーの制御 + +```python +setWebSocketHost(data) -> dict +setWebSocketPort(data) -> dict +``` +- WebSocket接続設定 + +### システム管理 + +```python +updateSoftware(data) -> dict +updateCudaSoftware(data) -> dict +``` +- ソフトウェアアップデート + +```python +downloadCtranslate2Weight(data) -> dict +downloadWhisperWeight(data) -> dict +``` +- AIモデルのダウンロード + +```python +feedWatchdog(data) -> dict +``` +- ウォッチドッグの生存シグナル送信 + +## 使用方法 + +### 基本的な使い方 + +```python +from controller import Controller + +# コントローラーの初期化 +controller = Controller() +controller.init() + +# 翻訳機能の有効化 +result = controller.setEnableTranslation(None) +print(f"翻訳機能: {result}") + +# メッセージ送信 +message_data = {"id": "123", "message": "Hello World"} +result = controller.sendMessageBox(message_data) +``` + +### エンドポイント設定 + +```python +# マッピング設定 +mapping = { + "/set/enable/translation": controller.setEnableTranslation, + "/get/data/version": controller.getVersion, +} + +# 実行関数の設定 +def run_callback(status, endpoint, result): + print(f"Status: {status}, Endpoint: {endpoint}, Result: {result}") + +controller.setRun(run_callback) +``` + +### 音声認識の設定 + +```python +# マイクデバイスの選択 +host_data = "DirectSound" +result = controller.setSelectedMicHost(host_data) + +device_data = "マイク (USB Audio Device)" +result = controller.setSelectedMicDevice(device_data) + +# 音声認識の開始 +result = controller.setEnableTranscriptionSend(None) +``` + +## レスポンス形式 + +全てのメソッドは統一されたレスポンス形式を返します: + +```python +{ + "status": int, # HTTPステータスコード(200, 400, 500等) + "result": any # 処理結果(成功時)または エラーメッセージ(失敗時) +} +``` + +### 成功レスポンス例 +```python +{ + "status": 200, + "result": "翻訳機能が有効化されました" +} +``` + +### エラーレスポンス例 +```python +{ + "status": 400, + "result": "Invalid device selection" +} +``` + +## 状態管理 + +### システム状態 +- 各機能の有効・無効状態 +- デバイスの接続状態 +- ネットワーク接続状態 + +### エラー状態 +- デバイスエラー +- 翻訳エンジンエラー +- VRAMオーバーフローエラー + +### 初期化状態 +- 段階的な初期化プロセス +- 依存関係の解決状態 + +## イベント処理 + +### 音声認識イベント + +```python +micMessage(result: dict) -> None +``` +- マイク音声認識結果の処理 +- 翻訳・フィルタリング・送信 + +```python +speakerMessage(result: dict) -> None +``` +- スピーカー音声認識結果の処理 + +### ダウンロードイベント +- 進捗通知 +- 完了通知 +- エラー通知 + +### デバイス変更イベント +- マイク・スピーカーの選択変更 +- 計算デバイスの変更 + +## 依存関係 + +### 直接依存 +- `config`: 設定管理 +- `model`: コアモデル機能 +- `device_manager`: デバイス管理 +- `utils`: ユーティリティ機能 + +### 間接依存 +- 各種モデルモジュール(翻訳、音声認識等) +- VRオーバーレイモジュール +- 通信モジュール + +## エラーハンドリング + +### VRAM不足エラー +- 自動的にCTranslate2への切り替え +- ユーザーへの適切な通知 + +### デバイスエラー +- デバイス接続状態の監視 +- 自動復旧機能 + +### ネットワークエラー +- 接続状態の定期確認 +- オフライン機能への切り替え + +### 設定エラー +- 設定値の妥当性チェック +- デフォルト値への復帰 + +## パフォーマンス考慮事項 + +### 遅延初期化 +- 必要な時点での機能初期化 +- メモリ使用量の最適化 + +### 非同期処理 +- バックグラウンドでの重い処理 +- UI の応答性維持 + +### キャッシュ機能 +- 設定値のキャッシュ +- 翻訳結果のキャッシュ + +## 注意事項 + +- すべてのメソッドは例外安全である +- 設定変更は即座に config に反映される +- 重い処理は別スレッドで実行される +- VR機能は適切な環境でのみ動作する +- ネットワーク機能はオフライン時に制限される + +## セキュリティ考慮事項 + +- 外部入力の適切な検証 +- APIキーの安全な管理 +- ファイルアクセスの制限 +- ネットワーク通信の暗号化(該当する場合) \ No newline at end of file diff --git a/src-python/docs/details/mainloop.md b/src-python/docs/details/mainloop.md new file mode 100644 index 00000000..22a086ed --- /dev/null +++ b/src-python/docs/details/mainloop.md @@ -0,0 +1,275 @@ +# mainloop.py - VRCTメインループモジュール + +## 概要 + +VRCTアプリケーションのメインイベントループを管理するモジュールです。標準入力からのJSONリクエストを処理し、適切なコントローラーメソッドを呼び出してレスポンスを返す、アプリケーションの中枢的な役割を担います。 + +## 主要機能 + +### リクエスト処理システム +- JSON形式の標準入力からのリクエスト受信 +- エンドポイントベースのルーティング +- 非同期・並列処理対応 + +### エンドポイント管理 +- RESTライクなエンドポイント構造 +- 機能別のエンドポイント分類 +- 排他制御によるスレッドセーフティ + +### 初期化システム +- アプリケーション設定の初期化 +- コンポーネント間の依存関係解決 +- 段階的な機能有効化 + +## クラス構造 + +### Main クラス +```python +class Main: + def __init__(self, controller_instance: Controller, mapping_data: dict, worker_count: int = 3) +``` + +- メインループの制御 +- ワーカースレッドプール管理 +- エンドポイント排他制御 + +## エンドポイント分類 + +### 機能制御系 +``` +/set/enable/* - 各機能の有効化 +/set/disable/* - 各機能の無効化 +``` + +### データ操作系 +``` +/get/data/* - 設定データの取得 +/set/data/* - 設定データの更新 +/delete/data/* - データの削除 +``` + +### 実行系 +``` +/run/* - 各種処理の実行 +``` + +## 主要エンドポイント + +### 翻訳機能 +- `/set/enable/translation`: 翻訳機能の有効化 +- `/set/disable/translation`: 翻訳機能の無効化 +- `/set/data/selected_translation_engines`: 翻訳エンジンの選択 +- `/run/send_message_box`: メッセージ送信 + +### 音声認識機能 +- `/set/enable/transcription_send`: 送信音声認識の有効化 +- `/set/enable/transcription_receive`: 受信音声認識の有効化 +- `/set/data/selected_transcription_engine`: 音声認識エンジン選択 + +### VR機能 +- `/set/data/overlay_small_log_settings`: 小型オーバーレイ設定 +- `/set/data/overlay_large_log_settings`: 大型オーバーレイ設定 + +### WebSocket機能 +- `/set/enable/websocket_server`: WebSocketサーバー有効化 +- `/set/data/websocket_host`: サーバーホスト設定 +- `/set/data/websocket_port`: サーバーポート設定 + +### システム管理 +- `/run/update_software`: ソフトウェアアップデート +- `/run/download_ctranslate2_weight`: 翻訳モデルダウンロード +- `/run/download_whisper_weight`: 音声認識モデルダウンロード + +## 主要メソッド + +### リクエスト処理 + +```python +receiver() -> None +``` +- 標準入力からのJSONリクエスト受信 +- パースエラーの適切な処理 + +```python +handleRequest(endpoint: str, data: Any = None) -> tuple +``` +- エンドポイント処理の実行 +- ステータスコードと結果の返却 + +```python +handler() -> None +``` +- ワーカースレッドのメイン処理 +- キューからのリクエスト取得・処理 + +### スレッド管理 + +```python +startReceiver() -> None +``` +- レシーバースレッドの起動 + +```python +startHandler() -> None +``` +- ハンドラースレッドプールの起動 + +```python +start() -> None +``` +- 全スレッドの起動 + +```python +stop(wait: float = 2.0) -> None +``` +- 全スレッドの安全な停止 + +## 使用方法 + +### 基本的な使い方 + +```python +from mainloop import main_instance + +# メインループの開始 +main_instance.start() + +# ウォッチドッグコールバックの設定 +main_instance.controller.setWatchdogCallback(main_instance.stop) + +# コントローラーの初期化 +main_instance.controller.init() +``` + +### 直接リクエスト処理 + +```python +# エンドポイントの直接呼び出し +result, status = main_instance.handleRequest("/get/data/version", None) +print(f"バージョン: {result}") + +# 翻訳機能の有効化 +result, status = main_instance.handleRequest("/set/enable/translation", None) +``` + +### 標準入力からの処理 + +```json +{ + "endpoint": "/run/send_message_box", + "data": "eyJpZCI6ICIxMjMiLCAibWVzc2FnZSI6ICJIZWxsbyBXb3JsZCJ9" +} +``` + +## リクエスト形式 + +### 入力形式 +```json +{ + "endpoint": "string", // 必須:処理対象のエンドポイント + "data": "string|null" // オプション:Base64エンコード済みデータ +} +``` + +### 出力形式 +```json +{ + "status": 200, // HTTPステータスコード + "endpoint": "string", // 処理されたエンドポイント + "result": "any" // 処理結果 +} +``` + +## ステータスコード + +- `200`: 成功 +- `400`: 不正なリクエスト +- `404`: 存在しないエンドポイント +- `423`: ロック中(機能が無効化されている) +- `500`: 内部エラー + +## 排他制御 + +### ロック機能 +- enable/disableペアは同一ロックキーを共有 +- 同一機能の同時実行を防止 +- デッドロックを回避する設計 + +### ロックキー正規化 +```python +/set/enable/translation -> /lock/set/translation +/set/disable/translation -> /lock/set/translation +``` + +## 初期化プロセス + +### 段階的初期化 +1. コントローラーの初期化 +2. デバイスマネージャーの初期化 +3. モデルの初期化 +4. 各機能の段階的有効化 + +### 初期化mapping +- `/get/data/*`エンドポイントから初期化設定を自動抽出 +- システム起動時の設定復元 + +## ログ機能 + +### プロセスログ +- 全リクエスト・レスポンスの記録 +- JSON形式での構造化ログ + +### エラーログ +- 例外の詳細記録 +- スタックトレースの保存 + +## 依存関係 + +### 直接依存 +- `controller`: ビジネスロジック制御 +- `utils`: ユーティリティ機能(ログ、エンコード等) + +### 間接依存 +- `config`: 設定管理 +- `model`: コアモデル機能 +- `device_manager`: デバイス管理 + +## 設定項目 + +### ワーカー数 +```python +DEFAULT_WORKER_COUNT = 3 # 並列処理スレッド数 +``` + +### タイムアウト +- キュー待機タイムアウト: 0.5秒 +- スレッド停止待機: 2.0秒 +- 処理安定化待機: 0.2秒 + +## エラーハンドリング + +- JSONパースエラーの適切な処理 +- エンドポイント実行エラーのキャッチ +- スレッドセーフなエラーログ記録 +- グレースフルシャットダウン + +## パフォーマンス特性 + +### スループット +- 複数ワーカーによる並列処理 +- ノンブロッキングI/O + +### レイテンシ +- キューイング遅延の最小化 +- 排他制御による一時的な遅延あり + +### メモリ使用量 +- リクエストキューのサイズ制限なし(要注意) +- スレッドプールによる固定オーバーヘッド + +## 注意事項 + +- 標準入力をブロッキングで読み取るため、パイプ経由での使用を想定 +- エンドポイント名の大文字小文字は区別される +- Base64データは自動的にデコードされる +- 長時間のブロッキング処理は他のリクエストに影響する可能性 \ No newline at end of file diff --git a/src-python/docs/details/model.md b/src-python/docs/details/model.md new file mode 100644 index 00000000..9880730c --- /dev/null +++ b/src-python/docs/details/model.md @@ -0,0 +1,292 @@ +# model.py - VRCTコアモデルクラス + +## 概要 + +VRCTアプリケーションの中核となるModelクラスを定義するモジュールです。音声認識、翻訳、VRオーバーレイ、OSC通信、WebSocketサーバーなどの主要機能を統合管理し、システム全体の動作を制御します。 + +## 主要機能 + +### シングルトンパターン +- アプリケーション全体で単一のModelインスタンスを保証 +- 遅延初期化による軽量なインポート + +### 音声認識機能 +- マイク音声のリアルタイム文字起こし +- スピーカー出力の音声認識 +- エネルギーレベル監視 +- 複数言語対応 + +### 翻訳機能 +- 複数の翻訳エンジン対応(DeepL、Google、CTranslate2等) +- 言語自動検出 +- バッチ翻訳処理 + +### VRオーバーレイ +- OpenVR統合 +- 小型・大型ログオーバーレイ +- 動的配置・透明度制御 + +### OSC通信 +- VRChatとのOSC通信 +- タイピング状態の同期 +- ミュート状態の監視 + +### WebSocketサーバー +- 外部アプリケーションとの通信 +- リアルタイムメッセージ配信 + +## クラス構造 + +### threadFnc クラス +```python +class threadFnc(Thread): + def __init__(self, fnc, end_fnc=None, daemon: bool = True, *args, **kwargs) +``` + +- 関数を繰り返し実行するスレッドラッパー +- 一時停止・再開機能 +- エラー保護機能 + +### Model クラス +```python +class Model: + def __new__(cls) # シングルトンパターン + def init(self) # 重い初期化処理 + def ensure_initialized(self) # 遅延初期化 +``` + +## 主要メソッド + +### 初期化・管理 + +```python +init() -> None +``` +- 全コンポーネントの初期化 +- 重い処理のため明示的に呼び出し + +```python +ensure_initialized() -> None +``` +- 必要時の自動初期化 +- 安全な遅延初期化 + +### 翻訳機能 + +```python +getInputTranslate(message, source_language=None) -> Tuple[List[str], List[bool]] +``` +- 入力メッセージの多言語翻訳 +- 成功フラグも同時に返却 + +```python +getOutputTranslate(message, source_language=None) -> Tuple[List[str], List[bool]] +``` +- 出力メッセージの翻訳(逆方向) + +```python +authenticationTranslatorDeepLAuthKey(auth_key) -> bool +``` +- DeepL APIキーの認証 + +### 音声認識機能 + +```python +startMicTranscript(fnc: Callable) -> None +``` +- マイク音声認識の開始 +- コールバック関数で結果を通知 + +```python +startSpeakerTranscript(fnc: Callable) -> None +``` +- スピーカー音声認識の開始 + +```python +pauseMicTranscript() -> None +resumeMicTranscript() -> None +``` +- 音声認識の一時停止・再開 + +```python +startCheckMicEnergy(fnc: Callable) -> None +startCheckSpeakerEnergy(fnc: Callable) -> None +``` +- 音声エネルギーレベルの監視 + +### VRオーバーレイ機能 + +```python +createOverlayImageSmallLog(message, your_language, translation, target_language) -> Image +``` +- 小型ログオーバーレイ画像の生成 + +```python +createOverlayImageLargeLog(message_type, message, your_language, translation, target_language) -> Image +``` +- 大型ログオーバーレイ画像の生成 + +```python +updateOverlaySmallLogSettings() -> None +updateOverlayLargeLogSettings() -> None +``` +- オーバーレイ設定の更新 + +### OSC通信機能 + +```python +oscSendMessage(message: str) -> None +``` +- VRChatへのメッセージ送信 + +```python +oscStartSendTyping() -> None +oscStopSendTyping() -> None +``` +- タイピング状態の通知 + +```python +setMuteSelfStatus() -> None +``` +- VRChatミュート状態の取得 + +### WebSocket機能 + +```python +startWebSocketServer(host: str, port: int) -> None +``` +- WebSocketサーバーの起動 + +```python +websocketSendMessage(message_dict: dict) -> bool +``` +- 全クライアントへのメッセージ送信 + +```python +checkWebSocketServerAlive() -> bool +``` +- サーバー稼働状態の確認 + +### ファイルダウンロード機能 + +```python +downloadCTranslate2ModelWeight(weight_type, callback=None, end_callback=None) +``` +- 翻訳モデルのダウンロード + +```python +downloadWhisperModelWeight(weight_type, callback=None, end_callback=None) +``` +- 音声認識モデルのダウンロード + +### ウォッチドッグ機能 + +```python +startWatchdog() -> None +feedWatchdog() -> None +setWatchdogCallback(callback: Callable) -> None +``` +- システム監視とタイムアウト処理 + +## 使用方法 + +### 基本的な使い方 + +```python +from model import model + +# 明示的な初期化(推奨) +model.init() + +# または自動初期化 +model.ensure_initialized() + +# 翻訳機能の使用 +translations, success_flags = model.getInputTranslate("Hello World") + +# 音声認識の開始 +def on_transcript_result(result): + print(f"認識結果: {result}") + +model.startMicTranscript(on_transcript_result) +``` + +### VRオーバーレイの使用 + +```python +# オーバーレイの開始 +model.startOverlay() + +# 画像の作成と更新 +img = model.createOverlayImageSmallLog( + message="Hello", + your_language="English", + translation=["こんにちは"], + target_language={"1": {"language": "Japanese", "enable": True}} +) +model.updateOverlaySmallLog(img) +``` + +### WebSocketサーバーの使用 + +```python +# サーバー起動 +model.startWebSocketServer("127.0.0.1", 8765) + +# メッセージ送信 +message = {"type": "translation", "text": "Hello", "translation": "こんにちは"} +success = model.websocketSendMessage(message) +``` + +## 依存関係 + +### 必須モジュール +- `controller`: アプリケーション制御 +- `config`: 設定管理 +- `device_manager`: デバイス管理 + +### 音声・翻訳関連 +- `models.transcription.*`: 音声認識 +- `models.translation.*`: 翻訳機能 +- `models.transliteration.*`: 音写変換 + +### VR・通信関連 +- `models.overlay.*`: VRオーバーレイ +- `models.osc.*`: OSC通信 +- `models.websocket.*`: WebSocket通信 + +### ユーティリティ +- `models.watchdog.*`: 監視機能 +- `utils`: 共通ユーティリティ +- `flashtext`: キーワードフィルタリング + +## 設定依存関係 + +多くの機能がconfigモジュールの設定に依存: + +- 音声認識設定(しきい値、タイムアウト等) +- 翻訳設定(エンジン選択、言語設定等) +- VR設定(オーバーレイ位置、透明度等) +- OSC設定(IPアドレス、ポート等) + +## エラーハンドリング + +- 初期化エラーの適切な処理 +- VRAM不足エラーの検出と対応 +- ネットワークエラーの回復機能 +- スレッドセーフティの保証 + +## 注意事項 + +- 重い初期化処理のため、明示的な初期化を推奨 +- OpenVR環境が必要(VRオーバーレイ使用時) +- CUDA環境推奨(高速な音声認識・翻訳) +- WebSocketサーバーは非同期で動作 +- 音声デバイスのアクセス権限が必要 + +## パフォーマンス考慮事項 + +- 遅延初期化によるメモリ使用量の最適化 +- スレッドプールによる並行処理 +- モデルの重複読み込み防止 +- キューイングによる非同期処理 \ No newline at end of file diff --git a/src-python/docs/details/osc.md b/src-python/docs/details/osc.md new file mode 100644 index 00000000..72ffefa3 --- /dev/null +++ b/src-python/docs/details/osc.md @@ -0,0 +1,602 @@ +# osc.py - OSC通信・OSCQueryプロトコル管理 + +## 概要 + +VRChatとの高度なOSC(Open Sound Control)通信を管理する包括的なシステムです。基本的なOSCメッセージ送信に加え、OSCQueryプロトコルによる双方向通信、パラメータ監視、自動サービス発見機能を提供します。 + +## 主要機能 + +### OSC通信機能 +- VRChatチャットボックスへのメッセージ送信 +- タイピング状態の制御 +- パラメータ値の動的取得 + +### OSCQuery対応 +- 自動サービス発見・接続 +- リアルタイムパラメータ監視 +- 双方向エンドポイント公開 + +### 堅牢性機能 +- 防御的プログラミング設計 +- 欠損ライブラリの優雅な処理 +- 自動エラー復旧機構 + +## クラス構造 + +### OSCHandler クラス + +```python +class OSCHandler: + def __init__(self, ip_address: str = "127.0.0.1", port: int = 9000) -> None: + self.is_osc_query_enabled: bool + self.osc_ip_address: str + self.osc_port: int + self.udp_client: udp_client.SimpleUDPClient + self.osc_server: Optional[osc_server.ThreadingOSCUDPServer] + self.osc_query_service: Optional[OSCQueryService] + self.browser: Optional[OSCQueryBrowser] +``` + +OSC通信の中核管理クラス + +#### 属性 +- **is_osc_query_enabled**: OSCQuery機能の有効性フラグ +- **osc_ip_address**: 送信先IPアドレス +- **osc_port**: UDP通信ポート +- **udp_client**: OSC送信クライアント +- **osc_server**: ローカルOSCサーバー +- **osc_query_service**: OSCQueryサービスインスタンス +- **browser**: OSCQueryブラウザー + +## 主要メソッド + +### メッセージ送信 + +```python +def sendMessage(self, message: str = "", notification: bool = True) -> None +``` + +VRChatチャットボックスにメッセージを送信 + +#### パラメータ +- **message**: 送信するテキストメッセージ +- **notification**: 通知フラグ(音・表示の有無) + +```python +def sendTyping(self, flag: bool = False) -> None +``` + +タイピング状態をVRChatに送信 + +#### パラメータ +- **flag**: タイピング中フラグ + +### パラメータ監視 + +```python +def getOSCParameterMuteSelf() -> Optional[bool] +``` + +VRChatのMuteSelfパラメータ値を取得 + +#### 戻り値 +- **Optional[bool]**: ミュート状態(取得失敗時はNone) + +```python +def getOSCParameterValue(self, address: str) -> Any +``` + +任意のOSCパラメータ値を取得 + +#### パラメータ +- **address**: OSCアドレス(例:"/avatar/parameters/MuteSelf") + +#### 戻り値 +- **Any**: パラメータ値(取得失敗時はNone) + +### 設定変更 + +```python +def setOscIpAddress(self, ip_address: str) -> None +``` + +送信先IPアドレスを変更し、サービスを再初期化 + +#### パラメータ +- **ip_address**: 新しいIPアドレス + +```python +def setOscPort(self, port: int) -> None +``` + +送信ポートを変更し、サービスを再初期化 + +#### パラメータ +- **port**: 新しいUDPポート番号 + +## 使用方法 + +### 基本的なメッセージ送信 + +```python +from models.osc.osc import OSCHandler + +# OSCハンドラーの初期化 +osc = OSCHandler(ip_address="127.0.0.1", port=9000) + +# チャットボックスにメッセージを送信 +osc.sendMessage("こんにちは、VRChat!", notification=True) + +# タイピング状態の制御 +osc.sendTyping(True) # タイピング開始 +# ... 実際のタイピング処理 ... +osc.sendTyping(False) # タイピング終了 + +# 再度メッセージ送信 +osc.sendMessage("翻訳完了しました", notification=False) +``` + +### リモートVRChatへの接続 + +```python +# リモートVRChatインスタンスへの接続 +remote_osc = OSCHandler(ip_address="192.168.1.100", port=9000) + +# OSCQuery機能は自動的に無効化される +print(f"OSCQuery有効: {remote_osc.getIsOscQueryEnabled()}") # False + +# 基本的なメッセージ送信は利用可能 +remote_osc.sendMessage("リモートからの翻訳結果", notification=True) +``` + +### パラメータ監視(ローカル接続時のみ) + +```python +# ローカル接続でのパラメータ監視 +local_osc = OSCHandler(ip_address="127.0.0.1", port=9000) + +if local_osc.getIsOscQueryEnabled(): + # MuteSelfパラメータの監視 + mute_status = local_osc.getOSCParameterMuteSelf() + + if mute_status is not None: + if mute_status: + print("ユーザーはミュート中です") + else: + print("ユーザーはミュート解除中です") + else: + print("MuteSelfパラメータの取得に失敗しました") + + # カスタムパラメータの監視 + custom_value = local_osc.getOSCParameterValue("/avatar/parameters/CustomParam") + if custom_value is not None: + print(f"カスタムパラメータ値: {custom_value}") +``` + +### 双方向OSC通信の設定 + +```python +def handle_mute_change(address, *args): + """ミュート状態変更のハンドラー""" + print(f"ミュート状態が変更されました: {args}") + +def handle_typing_change(address, *args): + """タイピング状態変更のハンドラー""" + print(f"タイピング状態: {args}") + +def handle_chatbox_input(address, *args): + """チャットボックス入力のハンドラー""" + print(f"チャットボックス入力: {args}") + +# OSCパラメータハンドラーの設定 +osc_handlers = { + "/avatar/parameters/MuteSelf": handle_mute_change, + "/chatbox/typing": handle_typing_change, + "/chatbox/input": handle_chatbox_input +} + +osc = OSCHandler() +osc.setDictFilterAndTarget(osc_handlers) + +# OSCサーバー開始(OSCQuery自動公開) +osc.receiveOscParameters() + +print("OSC受信サーバーが開始されました") +print("VRChatからのパラメータ変更を監視中...") + +# メッセージ送信テスト +import time +time.sleep(2) +osc.sendMessage("双方向通信テスト", notification=True) + +# 長時間実行 +time.sleep(30) + +# クリーンアップ +osc.oscServerStop() +``` + +### 動的設定変更 + +```python +# 実行時のIP・ポート変更 +osc = OSCHandler(ip_address="127.0.0.1", port=9000) + +# 初期設定でローカル接続 +osc.sendMessage("ローカル接続テスト") + +print("リモート接続に切り替え中...") +osc.setOscIpAddress("192.168.1.150") # 自動的にOSCQueryが無効化 +osc.sendMessage("リモート接続テスト") + +print("ポート変更...") +osc.setOscPort(9001) +osc.sendMessage("新しいポートでのテスト") + +print("ローカル接続に戻る...") +osc.setOscIpAddress("127.0.0.1") # OSCQueryが再度有効化 +osc.sendMessage("ローカル接続復帰テスト") +``` + +## OSCQuery詳細機能 + +### 自動サービス発見 + +```python +class VRChatMonitor: + """VRChatサービス監視クラス""" + + def __init__(self): + self.osc = OSCHandler() + self.monitoring = False + + def start_monitoring(self): + """VRChatパラメータの継続監視開始""" + + if not self.osc.getIsOscQueryEnabled(): + print("OSCQuery機能が無効です(ローカル接続のみサポート)") + return + + # OSCハンドラー設定 + handlers = { + "/avatar/parameters/MuteSelf": self.on_mute_change, + "/avatar/parameters/Voice": self.on_voice_change, + "/avatar/parameters/Viseme": self.on_viseme_change, + "/avatar/parameters/GestureLeft": self.on_gesture_left, + "/avatar/parameters/GestureRight": self.on_gesture_right + } + + self.osc.setDictFilterAndTarget(handlers) + self.osc.receiveOscParameters() + + self.monitoring = True + print("VRChatパラメータ監視を開始しました") + + def on_mute_change(self, address, *args): + print(f"ミュート状態変更: {args[0] if args else 'Unknown'}") + + def on_voice_change(self, address, *args): + print(f"音声レベル: {args[0] if args else 'Unknown'}") + + def on_viseme_change(self, address, *args): + print(f"口形変化: {args[0] if args else 'Unknown'}") + + def on_gesture_left(self, address, *args): + print(f"左手ジェスチャー: {args[0] if args else 'Unknown'}") + + def on_gesture_right(self, address, *args): + print(f"右手ジェスチャー: {args[0] if args else 'Unknown'}") + + def stop_monitoring(self): + """監視停止""" + self.osc.oscServerStop() + self.monitoring = False + print("VRChatパラメータ監視を停止しました") + +# 使用例 +monitor = VRChatMonitor() +monitor.start_monitoring() + +# 監視中に他の処理を実行 +time.sleep(60) # 1分間監視 + +monitor.stop_monitoring() +``` + +### リアルタイムパラメータ追跡 + +```python +class ParameterTracker: + """パラメータ値の追跡・履歴管理""" + + def __init__(self, osc_handler): + self.osc = osc_handler + self.parameter_history = {} + self.tracking_active = False + + def track_parameter(self, address, interval=0.1): + """指定されたパラメータを定期監視""" + + import threading + + def monitoring_loop(): + while self.tracking_active: + try: + value = self.osc.getOSCParameterValue(address) + if value is not None: + timestamp = time.time() + + if address not in self.parameter_history: + self.parameter_history[address] = [] + + # 値が変更された場合のみ記録 + if (not self.parameter_history[address] or + self.parameter_history[address][-1][1] != value): + + self.parameter_history[address].append((timestamp, value)) + print(f"{address}: {value} (時刻: {timestamp:.2f})") + + # 履歴サイズ制限(最新100件まで) + if len(self.parameter_history[address]) > 100: + self.parameter_history[address] = self.parameter_history[address][-100:] + + time.sleep(interval) + + except Exception as e: + print(f"パラメータ追跡エラー: {e}") + time.sleep(interval) + + self.tracking_active = True + thread = threading.Thread(target=monitoring_loop, daemon=True) + thread.start() + + def stop_tracking(self): + """追跡停止""" + self.tracking_active = False + + def get_parameter_history(self, address): + """パラメータの履歴取得""" + return self.parameter_history.get(address, []) + + def get_latest_value(self, address): + """最新パラメータ値取得""" + history = self.get_parameter_history(address) + return history[-1][1] if history else None + +# 使用例 +osc = OSCHandler() +tracker = ParameterTracker(osc) + +# MuteSelfパラメータの追跡開始 +tracker.track_parameter("/avatar/parameters/MuteSelf", interval=0.5) + +# しばらく監視 +time.sleep(30) + +# 結果確認 +mute_history = tracker.get_parameter_history("/avatar/parameters/MuteSelf") +print(f"MuteSelf変更履歴: {len(mute_history)}件") + +for timestamp, value in mute_history[-5:]: # 最新5件表示 + print(f" {time.ctime(timestamp)}: {value}") + +tracker.stop_tracking() +``` + +## エラーハンドリング・復旧機構 + +### 堅牢な接続管理 + +```python +class RobustOSCHandler: + """堅牢性を高めたOSCハンドラー""" + + def __init__(self, ip_address="127.0.0.1", port=9000): + self.osc = OSCHandler(ip_address, port) + self.connection_retries = 3 + self.retry_delay = 1.0 + + def safe_send_message(self, message, notification=True, max_retries=None): + """安全なメッセージ送信(リトライ機構付き)""" + + retries = max_retries or self.connection_retries + + for attempt in range(retries): + try: + self.osc.sendMessage(message, notification) + return True + + except Exception as e: + print(f"送信試行 {attempt + 1}/{retries} 失敗: {e}") + + if attempt < retries - 1: + time.sleep(self.retry_delay * (attempt + 1)) # 指数バックオフ + + # 接続再初期化を試行 + try: + self.osc.udp_client = udp_client.SimpleUDPClient( + self.osc.osc_ip_address, + self.osc.osc_port + ) + except Exception as reconnect_error: + print(f"再接続失敗: {reconnect_error}") + + print(f"メッセージ送信に失敗しました: '{message}'") + return False + + def safe_get_parameter(self, address, timeout=5.0): + """安全なパラメータ取得(タイムアウト付き)""" + + if not self.osc.getIsOscQueryEnabled(): + return None + + import threading + import queue + + result_queue = queue.Queue() + + def parameter_getter(): + try: + value = self.osc.getOSCParameterValue(address) + result_queue.put(value) + except Exception as e: + result_queue.put(e) + + # タイムアウト付きでパラメータ取得 + thread = threading.Thread(target=parameter_getter, daemon=True) + thread.start() + + try: + result = result_queue.get(timeout=timeout) + if isinstance(result, Exception): + raise result + return result + + except queue.Empty: + print(f"パラメータ取得タイムアウト: {address}") + return None + +# 使用例 +robust_osc = RobustOSCHandler() + +# 堅牢な送信 +success = robust_osc.safe_send_message("堅牢性テスト", notification=True) +print(f"送信成功: {success}") + +# 安全なパラメータ取得 +mute_value = robust_osc.safe_get_parameter("/avatar/parameters/MuteSelf", timeout=3.0) +print(f"MuteSelf値: {mute_value}") +``` + +## パフォーマンス最適化 + +### 効率的な通信管理 + +```python +class OptimizedOSCHandler: + """パフォーマンス最適化OSCハンドラー""" + + def __init__(self, ip_address="127.0.0.1", port=9000): + self.osc = OSCHandler(ip_address, port) + self.message_queue = [] + self.batch_size = 10 + self.batch_interval = 0.1 + self.last_batch_time = 0 + + def queue_message(self, message, notification=True): + """メッセージをキューに追加(バッチ送信用)""" + + self.message_queue.append((message, notification)) + + # バッチサイズまたは時間間隔でフラッシュ + current_time = time.time() + + if (len(self.message_queue) >= self.batch_size or + current_time - self.last_batch_time >= self.batch_interval): + self.flush_messages() + + def flush_messages(self): + """キューされたメッセージを一括送信""" + + if not self.message_queue: + return + + # 最新のメッセージのみ送信(重複排除) + if len(self.message_queue) > 1: + # 最後のメッセージを優先 + last_message, last_notification = self.message_queue[-1] + self.osc.sendMessage(last_message, last_notification) + else: + message, notification = self.message_queue[0] + self.osc.sendMessage(message, notification) + + # キューをクリア + self.message_queue.clear() + self.last_batch_time = time.time() + + def send_immediate(self, message, notification=True): + """即座にメッセージ送信(キューをバイパス)""" + self.flush_messages() # 既存キューを先にフラッシュ + self.osc.sendMessage(message, notification) + +# 使用例 +optimized_osc = OptimizedOSCHandler() + +# 複数のメッセージを効率的に送信 +for i in range(20): + optimized_osc.queue_message(f"バッチメッセージ {i}") + time.sleep(0.05) # 短い間隔 + +# 残りのメッセージをフラッシュ +optimized_osc.flush_messages() + +# 即座に送信が必要な重要メッセージ +optimized_osc.send_immediate("緊急メッセージ", notification=True) +``` + +## 依存関係・要件 + +### 必須依存関係 +- `pythonosc`: 基本OSC通信ライブラリ +- `threading`: 並行処理制御 +- `time`: 時間管理機能 + +### オプション依存関係 +- `tinyoscquery`: OSCQuery機能(ローカル接続時のみ) +- `utils`: エラーログ機能(フォールバック処理あり) + +### システム要件 +```python +# 最小システム要件 +requirements = { + "python_version": "3.7+", + "network": "UDP通信対応", + "vrchat_version": "OSCサポート版(2022年8月以降)", + "local_ports": "空きUDP/TCPポート(OSCQuery使用時)" +} + +# 推奨環境 +recommended = { + "network_latency": "< 10ms(ローカル接続)", + "cpu_usage": "OSCQuery使用時は追加CPU負荷", + "memory": "tinyoscquery使用時は追加メモリ" +} +``` + +## 注意事項・制限 + +### OSCQuery制限 +- ローカルホスト(127.0.0.1/localhost)接続時のみ利用可能 +- tinyoscqueryライブラリが必要 +- ファイアウォール設定によっては動作しない可能性 + +### 通信制限 +- UDPプロトコルのため送達保証なし +- VRChatのOSC受信制限(レート制限あり) +- ネットワーク環境による遅延・パケット loss + +### プラットフォーム依存 +```python +# 既知の制限事項 +limitations = { + "windows": "Windowsファイアウォールの設定が必要な場合あり", + "macos": "セキュリティ設定によるポート制限の可能性", + "linux": "一部のLinuxディストリビューションでの互換性問題", + "vrchat_platform": "PC版VRChatのみOSCサポート" +} +``` + +## 関連モジュール + +- `config.py`: OSC設定管理 +- `controller.py`: OSC機能制御インターフェース +- `model.py`: OSC機能統合 +- `utils.py`: エラーログ・ネットワークユーティリティ + +## 将来の改善点 + +- より高度なOSCQueryパラメータ監視 +- カスタムOSCプロトコル拡張 +- パフォーマンス監視・分析機能 +- 自動再接続・復旧機構の改善 +- VRChatアバター固有パラメータ対応 \ No newline at end of file diff --git a/src-python/docs/details/overlay.md b/src-python/docs/details/overlay.md new file mode 100644 index 00000000..e28382a0 --- /dev/null +++ b/src-python/docs/details/overlay.md @@ -0,0 +1,754 @@ +# overlay - VRオーバーレイ統合システム + +## 概要 + +VRChat向けのOpenVRオーバーレイシステムです。翻訳結果や字幕をVR空間内に表示する機能を提供し、HMD・コントローラー追跡、フェード効果、多言語フォント対応を統合的に管理します。 + +## 主要コンポーネント + +### overlay.py - メインオーバーレイ管理 +- OpenVRオーバーレイの生成・配置・制御 +- HMD・左手・右手への追跡設定 +- フェードイン・フェードアウト効果 + +### overlay_image.py - 画像生成・描画 +- 多言語対応テキスト画像生成 +- メッセージログ・履歴表示 +- フォント・レイアウト管理 + +### overlay_utils.py - 数学的変換ユーティリティ +- 3D座標変換行列計算 +- オイラー角・回転行列変換 +- 同次座標系変換 + +## クラス構造 + +### Overlay クラス (overlay.py) + +```python +class Overlay: + def __init__(self, settings_dict: Dict[str, Dict[str, Any]]) -> None: + self.system: Optional[Any] = None # OpenVRシステム + self.overlay: Optional[Any] = None # オーバーレイインターface + self.handle: Dict[str, Any] = {} # サイズ別ハンドル + self.settings: Dict[str, Dict[str, Any]] # サイズ別設定 + self.lastUpdate: Dict[str, float] = {} # 最終更新時刻 + self.fadeRatio: Dict[str, float] = {} # フェード比率 +``` + +VRオーバーレイの総合管理クラス + +#### 主要機能 +- OpenVRの初期化・管理 +- 複数サイズオーバーレイの同時管理 +- リアルタイムフェード効果処理 +- SteamVR接続状態監視 + +### OverlayImage クラス (overlay_image.py) + +```python +class OverlayImage: + LANGUAGES = { + "Default": "NotoSansJP-Regular.ttf", + "Japanese": "NotoSansJP-Regular.ttf", + "Korean": "NotoSansKR-Regular.ttf", + "Chinese Simplified": "NotoSansSC-Regular.ttf", + "Chinese Traditional": "NotoSansTC-Regular.ttf" + } + + def __init__(self, root_path: Optional[str] = None) -> None: + self.message_log: List[dict] = [] + self.root_path: str +``` + +テキスト画像生成・多言語フォント管理クラス + +#### 主要機能 +- 多言語フォント自動選択 +- メッセージ履歴管理 +- 動的画像生成・合成 +- UI要素のサイズ計算 + +## 主要メソッド + +### Overlay クラス + +#### 初期化・制御 + +```python +def startOverlay(self) -> None +``` + +オーバーレイシステム開始 + +```python +def shutdownOverlay(self) -> None +``` + +オーバーレイシステム終了・リソース解放 + +```python +def reStartOverlay(self) -> None +``` + +オーバーレイシステム再起動 + +#### 表示制御 + +```python +def showOverlay(self, image: Image, size: str) -> None +``` + +画像をオーバーレイに表示 + +#### パラメータ +- **image**: 表示するPIL画像 +- **size**: オーバーレイサイズ識別子 + +```python +def setOpacity(self, opacity: float, size: str) -> None +``` + +オーバーレイ透明度設定 + +#### パラメータ +- **opacity**: 透明度(0.0-1.0) +- **size**: 対象サイズ + +```python +def setTrackedDeviceRelative(self, tracker: str, size: str) -> None +``` + +追跡デバイスへのオーバーレイ配置 + +#### パラメータ +- **tracker**: 追跡デバイス("HMD", "LeftHand", "RightHand") +- **size**: オーバーレイサイズ + +### OverlayImage クラス + +#### 画像生成 + +```python +def createOverlayImage(self, message: str, language: str, ui_size: dict, + ui_settings: dict, message_log_settings: dict) -> Image +``` + +オーバーレイ用画像の生成 + +#### パラメータ +- **message**: 表示メッセージ +- **language**: 言語設定 +- **ui_size**: UIサイズ設定 +- **ui_settings**: UI表示設定 +- **message_log_settings**: ログ表示設定 + +#### 戻り値 +- **Image**: 生成されたPIL画像 + +#### 履歴管理 + +```python +def addMessageLog(self, message: str, timestamp: datetime) -> None +``` + +メッセージログに新規追加 + +#### パラメータ +- **message**: 追加するメッセージ +- **timestamp**: タイムスタンプ + +```python +def clearMessageLog(self) -> None +``` + +メッセージログのクリア + +## 使用方法 + +### 基本的なオーバーレイ表示 + +```python +from models.overlay.overlay import Overlay +from models.overlay.overlay_image import OverlayImage +from PIL import Image + +# オーバーレイ設定 +settings = { + "small": { + "width": 0.3, + "height": 0.1, + "x_pos": 0.0, + "y_pos": -0.2, + "z_pos": 1.0, + "opacity": 0.8, + "display_duration": 3.0, + "fadeout_duration": 1.0 + }, + "large": { + "width": 0.5, + "height": 0.2, + "x_pos": 0.0, + "y_pos": -0.3, + "z_pos": 1.2, + "opacity": 0.9, + "display_duration": 5.0, + "fadeout_duration": 1.5 + } +} + +# オーバーレイシステム初期化 +overlay_system = Overlay(settings) +overlay_image = OverlayImage() + +# システム開始 +overlay_system.startOverlay() + +# 翻訳結果の表示 +translation_text = "Hello, world! / こんにちは、世界!" + +# 画像生成設定 +ui_size = OverlayImage.getUiSizeSmallLog() +ui_settings = { + "font_size": 20, + "text_color": (255, 255, 255, 255), + "background_color": (0, 0, 0, 180) +} +message_log_settings = { + "enabled": True, + "max_lines": 5 +} + +# 画像生成・表示 +overlay_img = overlay_image.createOverlayImage( + message=translation_text, + language="Japanese", + ui_size=ui_size, + ui_settings=ui_settings, + message_log_settings=message_log_settings +) + +# オーバーレイに表示 +overlay_system.showOverlay(overlay_img, "small") + +# システム終了 +import time +time.sleep(10) +overlay_system.shutdownOverlay() +``` + +### HMD・コントローラー追跡設定 + +```python +# HMDに固定表示 +overlay_system.setTrackedDeviceRelative("HMD", "large") + +# 左手コントローラーに追従 +overlay_system.setTrackedDeviceRelative("LeftHand", "small") + +# 右手コントローラーに追従 +overlay_system.setTrackedDeviceRelative("RightHand", "small") + +# 位置・回転の微調整(設定変更) +overlay_system.settings["small"]["x_pos"] = 0.1 +overlay_system.settings["small"]["y_pos"] = -0.1 +overlay_system.settings["small"]["z_pos"] = 0.8 +overlay_system.settings["small"]["x_rotation"] = -30.0 +overlay_system.settings["small"]["y_rotation"] = 15.0 + +# 設定を適用 +overlay_system.setTrackedDeviceRelative("LeftHand", "small") +``` + +### フェード効果制御 + +```python +# フェード効果設定 +overlay_system.updateDisplayDuration(4.0, "large") # 4秒表示 +overlay_system.updateFadeoutDuration(2.0, "large") # 2秒でフェードアウト + +# 即座に透明度変更 +overlay_system.setOpacity(0.5, "large") # 50%透明度 + +# フェード効果を無効にして固定表示 +overlay_system.settings["small"]["fadeout_duration"] = 0 +overlay_system.setOpacity(1.0, "small") # 完全不透明で固定 +``` + +### 多言語対応表示 + +```python +# 日本語表示 +japanese_text = "これは日本語のテストです" +jp_image = overlay_image.createOverlayImage( + message=japanese_text, + language="Japanese", + ui_size=ui_size, + ui_settings=ui_settings, + message_log_settings=message_log_settings +) +overlay_system.showOverlay(jp_image, "large") + +# 韓国語表示 +korean_text = "이것은 한국어 테스트입니다" +kr_image = overlay_image.createOverlayImage( + message=korean_text, + language="Korean", + ui_size=ui_size, + ui_settings=ui_settings, + message_log_settings=message_log_settings +) +overlay_system.showOverlay(kr_image, "small") + +# 中国語(簡体字)表示 +chinese_text = "这是中文测试" +cn_image = overlay_image.createOverlayImage( + message=chinese_text, + language="Chinese Simplified", + ui_size=ui_size, + ui_settings=ui_settings, + message_log_settings=message_log_settings +) +overlay_system.showOverlay(cn_image, "large") +``` + +### メッセージログ機能 + +```python +from datetime import datetime + +# メッセージログの追加 +overlay_image.addMessageLog("最初のメッセージ", datetime.now()) +overlay_image.addMessageLog("翻訳結果: Hello -> こんにちは", datetime.now()) +overlay_image.addMessageLog("音声認識: こんにちは", datetime.now()) + +# ログ表示設定 +log_settings = { + "enabled": True, + "max_lines": 3, # 最大3行表示 + "show_timestamp": True, # タイムスタンプ表示 + "font_size": 16, + "text_color": (200, 200, 200, 255) +} + +# ログ付きオーバーレイ画像生成 +logged_image = overlay_image.createOverlayImage( + message="新しいメッセージ", + language="Japanese", + ui_size=ui_size, + ui_settings=ui_settings, + message_log_settings=log_settings +) + +overlay_system.showOverlay(logged_image, "large") + +# ログクリア +overlay_image.clearMessageLog() +``` + +## 座標系・変換システム + +### 基本座標設定 + +```python +# HMD基準座標(頭部固定表示) +def getHMDBaseMatrix() -> np.ndarray: + x_pos = 0.0 # 左右位置 + y_pos = -0.4 # 上下位置(下方向) + z_pos = 1.0 # 前後位置(前方向) + x_rotation = 0.0 # X軸回転 + y_rotation = 0.0 # Y軸回転 + z_rotation = 0.0 # Z軸回転 + +# 左手コントローラー基準座標 +def getLeftHandBaseMatrix() -> np.ndarray: + x_pos = 0.3 # 右側にオフセット + y_pos = 0.1 # 上方向にオフセット + z_pos = -0.31 # 手前にオフセット + x_rotation = -65.0 # 下向きに傾斜 + y_rotation = 165.0 # Y軸回転 + z_rotation = 115.0 # Z軸回転 + +# 右手コントローラー基準座標 +def getRightHandBaseMatrix() -> np.ndarray: + x_pos = -0.3 # 左側にオフセット + y_rotation = -165.0 # 左手と対称 + z_rotation = -115.0 # 左手と対称 +``` + +### 変換行列計算 (overlay_utils.py) + +```python +import numpy as np +from models.overlay.overlay_utils import * + +# 移動変換 +translation = (0.1, -0.2, 0.5) # x, y, z移動 +translation_matrix = calcTranslationMatrix(translation) + +# 回転変換(各軸独立) +x_rotation_matrix = calcRotationMatrixX(30.0) # X軸30度回転 +y_rotation_matrix = calcRotationMatrixY(45.0) # Y軸45度回転 +z_rotation_matrix = calcRotationMatrixZ(60.0) # Z軸60度回転 + +# オイラー角から回転行列生成 +euler_angles = (30.0, 45.0, 60.0) # X, Y, Z軸回転角度 +rotation_matrix = euler_to_rotation_matrix(euler_angles) + +# 基本行列への変換適用 +base_matrix = getHMDBaseMatrix() +translation = (0.05, -0.1, 0.2) +rotation = (10.0, -5.0, 0.0) +transformed_matrix = transform_matrix(base_matrix, translation, rotation) + +# 3x4行列を4x4同次座標に変換 +homogeneous_matrix = toHomogeneous(transformed_matrix) +``` + +### カスタム配置設定 + +```python +# カスタム位置でのオーバーレイ配置 +def createCustomOverlay(overlay_system, custom_pos, custom_rot, size): + """カスタム位置・回転でのオーバーレイ設定""" + + # 設定を動的に変更 + overlay_system.settings[size]["x_pos"] = custom_pos[0] + overlay_system.settings[size]["y_pos"] = custom_pos[1] + overlay_system.settings[size]["z_pos"] = custom_pos[2] + overlay_system.settings[size]["x_rotation"] = custom_rot[0] + overlay_system.settings[size]["y_rotation"] = custom_rot[1] + overlay_system.settings[size]["z_rotation"] = custom_rot[2] + + # 追跡デバイス設定を再適用 + overlay_system.setTrackedDeviceRelative("HMD", size) + +# 使用例:カスタム配置 +custom_position = (0.2, -0.3, 0.8) # やや右下前方 +custom_rotation = (-15.0, 10.0, 5.0) # 軽く傾斜 +createCustomOverlay(overlay_system, custom_position, custom_rotation, "large") +``` + +## 高度な機能 + +### 動的サイズ・レイアウト管理 + +```python +class AdaptiveOverlayManager: + """適応的オーバーレイ管理クラス""" + + def __init__(self, base_overlay_system, base_overlay_image): + self.overlay = base_overlay_system + self.image_gen = base_overlay_image + self.current_layout = "compact" + + def adaptLayoutToContent(self, message, language): + """コンテンツに応じたレイアウト自動調整""" + + # メッセージ長に応じてサイズ決定 + if len(message) < 50: + layout = "compact" + size_key = "small" + elif len(message) < 150: + layout = "standard" + size_key = "medium" + else: + layout = "expanded" + size_key = "large" + + # 言語に応じたフォントサイズ調整 + if language in ["Chinese Simplified", "Chinese Traditional"]: + font_scale = 1.1 # 中国語は少し大きめ + elif language == "Korean": + font_scale = 1.05 # 韓国語は微調整 + else: + font_scale = 1.0 # 日本語・その他 + + # UI設定の動的生成 + ui_size = self.getAdaptiveUiSize(layout) + ui_settings = { + "font_size": int(18 * font_scale), + "line_height": int(24 * font_scale), + "text_color": (255, 255, 255, 255), + "background_color": (0, 0, 0, 200), + "border_width": 2, + "border_color": (100, 150, 255, 255) + } + + return ui_size, ui_settings, size_key + + def getAdaptiveUiSize(self, layout): + """レイアウトに応じたUIサイズ取得""" + + layouts = { + "compact": { + "width": 400, + "height": 100, + "margin": 10, + "padding": 8 + }, + "standard": { + "width": 600, + "height": 150, + "margin": 15, + "padding": 12 + }, + "expanded": { + "width": 800, + "height": 200, + "margin": 20, + "padding": 16 + } + } + + return layouts.get(layout, layouts["standard"]) + +# 使用例 +adaptive_manager = AdaptiveOverlayManager(overlay_system, overlay_image) + +messages = [ + ("Hello!", "English"), + ("これは中程度の長さのメッセージです。翻訳結果を表示します。", "Japanese"), + ("这是一个很长的消息,用来测试自适应布局功能。当消息内容很长时,系统会自动选择更大的显示区域,并调整字体大小以确保良好的可读性。", "Chinese Simplified") +] + +for message, language in messages: + # 自動レイアウト調整 + ui_size, ui_settings, size_key = adaptive_manager.adaptLayoutToContent(message, language) + + # 画像生成・表示 + adaptive_image = overlay_image.createOverlayImage( + message=message, + language=language, + ui_size=ui_size, + ui_settings=ui_settings, + message_log_settings={"enabled": True, "max_lines": 3} + ) + + overlay_system.showOverlay(adaptive_image, size_key) + time.sleep(3) +``` + +### パフォーマンス監視・最適化 + +```python +class OverlayPerformanceMonitor: + """オーバーレイパフォーマンス監視クラス""" + + def __init__(self, overlay_system): + self.overlay = overlay_system + self.frame_times = [] + self.update_counts = {} + + def monitorFrameRate(self, duration=10.0): + """フレームレート監視""" + + start_time = time.monotonic() + frame_count = 0 + + while time.monotonic() - start_time < duration: + frame_start = time.monotonic() + + # フレーム処理(空の処理) + time.sleep(1/90) # 90Hz目標 + + frame_end = time.monotonic() + self.frame_times.append(frame_end - frame_start) + frame_count += 1 + + # 統計計算 + avg_frame_time = sum(self.frame_times) / len(self.frame_times) + avg_fps = 1.0 / avg_frame_time if avg_frame_time > 0 else 0 + + print(f"平均フレーム時間: {avg_frame_time*1000:.2f}ms") + print(f"平均FPS: {avg_fps:.1f}") + print(f"総フレーム数: {frame_count}") + + return avg_fps + + def optimizeSettings(self, target_fps=60): + """パフォーマンス目標に基づく設定最適化""" + + current_fps = self.monitorFrameRate(5.0) + + if current_fps < target_fps * 0.8: + print("パフォーマンス不足。設定を軽量化します...") + + # フェード処理間隔を延長 + for size in self.overlay.settings: + self.overlay.settings[size]["fadeout_duration"] *= 1.5 + + # 更新頻度を下げる + # (mainloopの sleep_time 調整は overlay.py 内で実装) + + elif current_fps > target_fps * 1.2: + print("パフォーマンスに余裕があります。品質を向上します...") + + # より滑らかなフェード + for size in self.overlay.settings: + self.overlay.settings[size]["fadeout_duration"] *= 0.8 + +# 使用例 +performance_monitor = OverlayPerformanceMonitor(overlay_system) +performance_monitor.monitorFrameRate(10.0) +performance_monitor.optimizeSettings(target_fps=60) +``` + +## エラーハンドリング・復旧 + +### 堅牢な接続管理 + +```python +class RobustOverlaySystem: + """堅牢性を高めたオーバーレイシステム""" + + def __init__(self, settings_dict): + self.base_overlay = Overlay(settings_dict) + self.connection_retries = 3 + self.auto_reconnect = True + + def safeStartOverlay(self, max_retries=None): + """安全なオーバーレイ開始(リトライ機構付き)""" + + retries = max_retries or self.connection_retries + + for attempt in range(retries): + try: + # SteamVR接続確認 + if not self.base_overlay.checkSteamvrRunning(): + print("SteamVRが起動していません。待機中...") + time.sleep(5) + continue + + # オーバーレイ開始 + self.base_overlay.startOverlay() + + # 初期化完了まで待機 + timeout = 10.0 + start_time = time.monotonic() + + while not self.base_overlay.initialized and time.monotonic() - start_time < timeout: + time.sleep(0.1) + + if self.base_overlay.initialized: + print("オーバーレイシステム開始完了") + return True + else: + print(f"初期化タイムアウト(試行 {attempt + 1}/{retries})") + + except Exception as e: + print(f"オーバーレイ開始エラー(試行 {attempt + 1}/{retries}): {e}") + + # 既存システムのクリーンアップ + try: + self.base_overlay.shutdownOverlay() + except Exception: + pass + + if attempt < retries - 1: + time.sleep(2 ** attempt) # 指数バックオフ + + print("オーバーレイシステムの開始に失敗しました") + return False + + def monitorConnection(self): + """接続監視・自動復旧""" + + while self.auto_reconnect: + try: + if self.base_overlay.initialized and not self.base_overlay.checkActive(): + print("OpenVR接続が切断されました。再接続を試行します...") + + self.base_overlay.shutdownOverlay() + time.sleep(2) + + if self.safeStartOverlay(): + print("オーバーレイシステムが復旧しました") + else: + print("復旧に失敗しました") + + time.sleep(1) + + except Exception as e: + print(f"接続監視エラー: {e}") + time.sleep(5) + +# 使用例 +robust_overlay = RobustOverlaySystem(settings) + +# 安全な開始 +if robust_overlay.safeStartOverlay(): + # 接続監視開始(別スレッド) + import threading + monitor_thread = threading.Thread(target=robust_overlay.monitorConnection, daemon=True) + monitor_thread.start() + + # 通常の操作 + overlay_img = overlay_image.createOverlayImage(...) + robust_overlay.base_overlay.showOverlay(overlay_img, "small") +``` + +## 依存関係・システム要件 + +### 必須依存関係 +- `openvr`: OpenVR Python バインディング +- `numpy`: 数値計算・行列演算 +- `PIL (Pillow)`: 画像処理・生成 +- `psutil`: プロセス監視 + +### システム要件 +```python +system_requirements = { + "steamvr": "SteamVR環境必須", + "openvr_runtime": "OpenVR Runtime", + "vr_headset": "対応VRヘッドセット(Oculus, Vive, Index等)", + "graphics": "VR対応GPU", + "python": "Python 3.7以上" +} + +performance_requirements = { + "cpu": "VR処理に十分なCPU性能", + "memory": "追加メモリ使用量 ~100-500MB", + "disk_space": "フォントファイル用容量 ~50MB" +} +``` + +### オプション依存関係 +- `utils.errorLogging`: エラーログ機能(フォールバック処理あり) + +## 注意事項・制限 + +### VR環境制限 +- SteamVRが起動していない場合は動作不可 +- VRヘッドセットが接続されていない場合は制限あり +- OpenVRドライバーの互換性に依存 + +### パフォーマンス制限 +- リアルタイム描画処理によるCPU・GPU負荷 +- フォントレンダリングによるメモリ使用量 +- 高解像度VRディスプレイでの描画負荷 + +### プラットフォーム制限 +```python +platform_limitations = { + "windows": "主要サポートプラットフォーム", + "linux": "SteamVR Linux版での制限あり", + "macos": "SteamVR macOS版サポート終了により制限", + "mobile_vr": "OpenVR非対応のため利用不可" +} +``` + +## 関連モジュール + +- `config.py`: オーバーレイ設定管理 +- `controller.py`: オーバーレイ制御インターフェース +- `model.py`: オーバーレイ機能統合 +- `utils.py`: エラーログ・ユーティリティ + +## 将来の改善点 + +- よりリッチなUI要素対応 +- アニメーション・エフェクト機能 +- カスタムフォント・テーマシステム +- パフォーマンス監視・自動最適化 +- 他のVRプラットフォーム対応検討 \ No newline at end of file diff --git a/src-python/docs/details/transcription_languages.md b/src-python/docs/details/transcription_languages.md new file mode 100644 index 00000000..1425593a --- /dev/null +++ b/src-python/docs/details/transcription_languages.md @@ -0,0 +1,229 @@ +# transcription_languages.py - 音声認識言語マッピング + +## 概要 + +音声認識エンジンが対応する言語コードのマッピングテーブルを提供するモジュールです。異なる音声認識エンジンの言語コード仕様の差異を吸収し、統一的なインターフェースを提供します。 + +## 主要機能 + +### 言語マッピングテーブル +- 表示用言語名から各エンジン固有の言語コードへの変換 +- 国・地域固有の言語バリエーション対応 +- 複数音声認識エンジンの統一的な言語管理 + +### 対応エンジン +- Google Speech Recognition +- OpenAI Whisper(faster-whisper) +- その他の音声認識エンジン + +## データ構造 + +### transcription_lang +```python +transcription_lang: Dict[str, List[Dict[str, str]]] +``` + +言語とその地域バリエーションのマッピング + +```python +transcription_lang = { + "English": [ + {"country": "United States", "google_language_code": "en-US"}, + {"country": "United Kingdom", "google_language_code": "en-GB"}, + {"country": "Australia", "google_language_code": "en-AU"} + ], + "Japanese": [ + {"country": "Japan", "google_language_code": "ja-JP"} + ], + "Korean": [ + {"country": "South Korea", "google_language_code": "ko-KR"} + ] +} +``` + +## 使用方法 + +### 基本的な言語コード取得 + +```python +from models.transcription.transcription_languages import transcription_lang + +# 日本語の言語コード取得 +japanese_codes = transcription_lang.get("Japanese", []) +if japanese_codes: + code = japanese_codes[0]["google_language_code"] # "ja-JP" + +# 英語の地域別言語コード取得 +english_codes = transcription_lang.get("English", []) +for region in english_codes: + print(f"{region['country']}: {region['google_language_code']}") +``` + +### 利用可能言語の一覧取得 + +```python +# 対応言語の一覧 +supported_languages = list(transcription_lang.keys()) +print(f"対応言語: {supported_languages}") + +# 言語と国の組み合わせ一覧 +language_country_pairs = [] +for lang, countries in transcription_lang.items(): + for country_data in countries: + language_country_pairs.append({ + "language": lang, + "country": country_data["country"], + "code": country_data["google_language_code"] + }) +``` + +### 翻訳システムとの連携 + +```python +# 翻訳システムで対応している言語の確認 +from models.translation.translation_languages import translation_lang + +transcription_langs = list(transcription_lang.keys()) +translation_langs = [] +for engine in translation_lang.keys(): + translation_langs.extend(translation_lang[engine]["source"].keys()) + +# 音声認識と翻訳の両方で対応している言語 +supported_langs = list(filter(lambda x: x in transcription_langs, translation_langs)) +``` + +## 主要対応言語 + +### 西欧言語 +- **English**: US, UK, Australia, Canada, India, South Africa +- **Spanish**: Spain, Mexico, Argentina, Colombia +- **French**: France, Canada, Belgium +- **German**: Germany, Austria, Switzerland +- **Italian**: Italy +- **Portuguese**: Brazil, Portugal + +### アジア言語 +- **Japanese**: Japan +- **Korean**: South Korea +- **Chinese**: China (Simplified), Taiwan (Traditional), Hong Kong +- **Thai**: Thailand +- **Vietnamese**: Vietnam + +### その他の言語 +- **Russian**: Russia +- **Arabic**: Saudi Arabia, UAE, Egypt +- **Hindi**: India +- **Dutch**: Netherlands +- **Swedish**: Sweden +- **Norwegian**: Norway + +## エンジン別言語コード形式 + +### Google Speech Recognition +- RFC 5646準拠の言語タグ形式 +- 例: "ja-JP", "en-US", "zh-CN" + +### OpenAI Whisper +- ISO 639-1言語コード(2文字) +- 例: "ja", "en", "zh" + +### その他のエンジン +- エンジン固有の形式に対応 +- マッピングテーブルによる変換 + +## 地域対応 + +### 同一言語の地域別対応 +```python +# 英語の地域バリエーション +"English": [ + {"country": "United States", "google_language_code": "en-US"}, + {"country": "United Kingdom", "google_language_code": "en-GB"}, + {"country": "Australia", "google_language_code": "en-AU"}, + {"country": "Canada", "google_language_code": "en-CA"}, + {"country": "India", "google_language_code": "en-IN"} +] +``` + +### 方言・変種対応 +```python +# 中国語の簡体字・繁体字対応 +"Chinese Simplified": [ + {"country": "China", "google_language_code": "zh-CN"} +], +"Chinese Traditional": [ + {"country": "Taiwan", "google_language_code": "zh-TW"}, + {"country": "Hong Kong", "google_language_code": "zh-HK"} +] +``` + +## 統合利用 + +### VRCTでの利用例 + +```python +def get_supported_transcription_languages(): + """音声認識対応言語の取得""" + languages = [] + for language, countries in transcription_lang.items(): + for country_data in countries: + languages.append({ + "language": language, + "country": country_data["country"], + "display_name": f"{language} ({country_data['country']})", + "code": country_data["google_language_code"] + }) + return languages +``` + +### エラーハンドリング + +```python +def get_language_code(language: str, country: str = None) -> str: + """安全な言語コード取得""" + try: + countries = transcription_lang.get(language, []) + if not countries: + return "en-US" # フォールバック + + if country: + for country_data in countries: + if country_data["country"] == country: + return country_data["google_language_code"] + + # 国指定なしまたは見つからない場合は最初の項目を返す + return countries[0]["google_language_code"] + except (KeyError, IndexError): + return "en-US" # エラー時のフォールバック +``` + +## 拡張性 + +### 新言語の追加 +```python +# 新しい言語の追加例 +transcription_lang["Turkish"] = [ + {"country": "Turkey", "google_language_code": "tr-TR"} +] +``` + +### 新エンジンへの対応 +```python +# 新しいエンジンのコードフィールドを追加 +transcription_lang["English"][0]["azure_language_code"] = "en-US" +transcription_lang["English"][0]["aws_language_code"] = "en-US" +``` + +## 注意事項 + +- 言語コードは各エンジンの仕様に依存 +- 新しいエンジン追加時は対応コードの追加が必要 +- 地域固有の音声認識精度差に注意 +- エンジンによってサポート言語が異なる場合がある + +## 関連モジュール + +- `transcription_transcriber.py`: 音声認識エンジン本体 +- `translation_languages.py`: 翻訳エンジン言語マッピング +- `config.py`: 言語設定管理 +- `controller.py`: 言語選択UI制御 \ No newline at end of file diff --git a/src-python/docs/details/transcription_recorder.md b/src-python/docs/details/transcription_recorder.md new file mode 100644 index 00000000..1ac6dddc --- /dev/null +++ b/src-python/docs/details/transcription_recorder.md @@ -0,0 +1,325 @@ +# transcription_recorder.py - 音声録音インターフェース + +## 概要 + +音声認識システムの入力となる音声データを録音するレコーダークラス群です。マイクとスピーカー出力の両方をサポートし、エネルギーレベル監視機能とともに音声データをキューに送信します。pyaudiowpatchライブラリを使用してWindowsの音声システムと統合します。 + +## 主要機能 + +### 音声録音機能 +- マイクからの音声録音 +- スピーカー出力の録音(ループバック) +- リアルタイム音声データキューイング + +### エネルギー監視 +- 音声エネルギーレベルの監視 +- 動的しきい値調整 +- 無音検出 + +### デバイス対応 +- 複数音声デバイスの対応 +- デバイス固有設定の管理 +- 自動デバイス選択 + +## クラス構造 + +### BaseRecorder クラス +```python +class BaseRecorder: + def __init__(self, source: Any, energy_threshold: int, dynamic_energy_threshold: bool, record_timeout: int) +``` + +基底レコーダークラス - 共通機能を提供 + +### SelectedMicRecorder クラス +```python +class SelectedMicRecorder(BaseRecorder): + def __init__(self, device: dict, energy_threshold: int, dynamic_energy_threshold: bool, record_timeout: int) +``` + +選択されたマイクデバイスからの録音 + +### SelectedSpeakerRecorder クラス +```python +class SelectedSpeakerRecorder(BaseRecorder): + def __init__(self, device: dict, energy_threshold: int, dynamic_energy_threshold: bool, record_timeout: int) +``` + +選択されたスピーカーデバイスからの録音(ループバック) + +### エネルギー監視クラス群 + +#### BaseEnergyRecorder クラス +```python +class BaseEnergyRecorder: + def __init__(self, source: Any) +``` + +エネルギーレベル監視の基底クラス + +#### SelectedMicEnergyRecorder クラス +```python +class SelectedMicEnergyRecorder(BaseEnergyRecorder): + def __init__(self, device: dict) +``` + +マイクエネルギーレベルの監視 + +#### SelectedSpeakerEnergyRecorder クラス +```python +class SelectedSpeakerEnergyRecorder(BaseEnergyRecorder): + def __init__(self, device: dict) +``` + +スピーカーエネルギーレベルの監視 + +### 統合録音クラス群 + +#### BaseEnergyAndAudioRecorder クラス +```python +class BaseEnergyAndAudioRecorder: + def __init__(self, source: Any, energy_threshold: int, dynamic_energy_threshold: bool, + phrase_time_limit: int, phrase_timeout: int, record_timeout: int) +``` + +音声録音とエネルギー監視を統合 + +#### SelectedMicEnergyAndAudioRecorder クラス +```python +class SelectedMicEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder): + def __init__(self, device: dict, energy_threshold: int, dynamic_energy_threshold: bool, + phrase_time_limit: int, phrase_timeout: int = 1, record_timeout: int = 5) +``` + +マイクの音声録音とエネルギー監視を統合 + +#### SelectedSpeakerEnergyAndAudioRecorder クラス +```python +class SelectedSpeakerEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder): + def __init__(self, device: dict, energy_threshold: int, dynamic_energy_threshold: bool, + phrase_time_limit: int, phrase_timeout: int = 1, record_timeout: int = 5) +``` + +スピーカーの音声録音とエネルギー監視を統合 + +## 主要メソッド + +### 録音制御 + +```python +adjustForNoise() -> None +``` +- 環境ノイズに合わせたしきい値調整 +- 録音開始前の較正 + +```python +recordIntoQueue(audio_queue: Queue) -> None +``` +- 音声データの継続的キューイング +- バックグラウンドスレッドでの実行 + +```python +pause() -> None +resume() -> None +stop() -> None +``` +- 録音の一時停止・再開・停止制御 + +### エネルギー監視 + +```python +recordIntoQueue(energy_queue: Queue) -> None +``` +- エネルギーレベルのキューイング +- リアルタイム監視データの提供 + +## 使用方法 + +### 基本的なマイク録音 + +```python +from queue import Queue +from models.transcription.transcription_recorder import SelectedMicRecorder + +# デバイス設定 +mic_device = { + "name": "マイク (USB Audio Device)", + "index": 0, + "channels": 1, + "sample_rate": 16000 +} + +# 録音設定 +energy_threshold = 300 +dynamic_threshold = True +record_timeout = 5 + +# レコーダー初期化 +recorder = SelectedMicRecorder( + device=mic_device, + energy_threshold=energy_threshold, + dynamic_energy_threshold=dynamic_threshold, + record_timeout=record_timeout +) + +# 音声キューの作成 +audio_queue = Queue() + +# 録音開始 +recorder.adjustForNoise() # ノイズ調整 +recorder.recordIntoQueue(audio_queue) + +# 音声データの取得 +while True: + if not audio_queue.empty(): + audio_data = audio_queue.get() + print(f"音声データ受信: {len(audio_data)} bytes") +``` + +### スピーカー録音(ループバック) + +```python +from models.transcription.transcription_recorder import SelectedSpeakerRecorder + +# スピーカーデバイス設定 +speaker_device = { + "name": "スピーカー (USB Audio Device)", + "index": 1, + "channels": 2, + "sample_rate": 44100 +} + +# スピーカーレコーダー +recorder = SelectedSpeakerRecorder( + device=speaker_device, + energy_threshold=500, + dynamic_energy_threshold=False, + record_timeout=3 +) + +audio_queue = Queue() +recorder.recordIntoQueue(audio_queue) +``` + +### エネルギー監視 + +```python +from models.transcription.transcription_recorder import SelectedMicEnergyRecorder + +# エネルギー監視のみ +energy_recorder = SelectedMicEnergyRecorder(mic_device) +energy_queue = Queue() + +energy_recorder.recordIntoQueue(energy_queue) + +# エネルギーレベルの取得 +while True: + if not energy_queue.empty(): + energy_level = energy_queue.get() + print(f"エネルギーレベル: {energy_level}") +``` + +### 統合録音(音声+エネルギー) + +```python +from models.transcription.transcription_recorder import SelectedMicEnergyAndAudioRecorder + +# 統合レコーダー +integrated_recorder = SelectedMicEnergyAndAudioRecorder( + device=mic_device, + energy_threshold=300, + dynamic_energy_threshold=True, + phrase_time_limit=5, # フレーズ制限時間 + phrase_timeout=1, # フレーズタイムアウト + record_timeout=5 # 録音タイムアウト +) + +audio_queue = Queue() +energy_queue = Queue() + +# 両方のキューに同時出力 +integrated_recorder.recordIntoQueue(audio_queue, energy_queue) +``` + +## 設定パラメータ + +### しきい値設定 +- **energy_threshold**: 音声検出のエネルギーしきい値 +- **dynamic_energy_threshold**: 動的しきい値調整の有効・無効 + +### タイムアウト設定 +- **record_timeout**: 録音継続時間の上限 +- **phrase_timeout**: フレーズ間の無音許容時間 +- **phrase_time_limit**: 単一フレーズの最大長 + +### デバイス設定 +- **name**: デバイス名 +- **index**: デバイスインデックス +- **channels**: チャンネル数(1=モノラル、2=ステレオ) +- **sample_rate**: サンプリングレート(Hz) + +## デバイス対応 + +### マイクデバイス +- USB マイク +- 内蔵マイク +- Bluetooth マイク +- 仮想マイクデバイス + +### スピーカーデバイス(ループバック) +- USB スピーカー/ヘッドフォン +- 内蔵スピーカー +- Bluetooth スピーカー +- 仮想音声デバイス + +## エラーハンドリング + +### デバイスエラー +- デバイス接続失敗の検出 +- 適切なエラーメッセージの提供 + +### 音声フォーマットエラー +- 非対応フォーマットの検出 +- 自動フォーマット変換 + +### メモリエラー +- キューオーバーフローの防止 +- メモリ使用量の最適化 + +## パフォーマンス特性 + +### レイテンシ +- 低レイテンシ録音(~10ms) +- リアルタイム処理最適化 + +### スループット +- 連続録音対応 +- 高サンプリングレート対応 + +### メモリ使用量 +- 効率的なバッファ管理 +- キューサイズの最適化 + +## 依存関係 + +### 必須依存関係 +- `speech_recognition`: 音声認識ライブラリ +- `pyaudiowpatch`: Windows音声システム統合 +- `queue`: データキューイング + +### オプション依存関係 +- `datetime`: タイムスタンプ機能 + +## 注意事項 + +- Windows専用(pyaudiowpatchによる制限) +- 適切な音声デバイスドライバーが必要 +- 排他制御による同時デバイスアクセス制限 +- 高サンプリングレート使用時のCPU使用率上昇 + +## 関連モジュール + +- `transcription_transcriber.py`: 音声認識エンジン +- `device_manager.py`: デバイス管理 +- `config.py`: 録音設定管理 +- `model.py`: 録音制御統合 \ No newline at end of file diff --git a/src-python/docs/details/transcription_transcriber.md b/src-python/docs/details/transcription_transcriber.md new file mode 100644 index 00000000..db5111f7 --- /dev/null +++ b/src-python/docs/details/transcription_transcriber.md @@ -0,0 +1,325 @@ +# transcription_transcriber.py - 音声文字起こしエンジン + +## 概要 + +音声データを文字テキストに変換する音声認識エンジンのメインクラスです。Google Speech RecognitionとOpenAI Whisper(faster-whisper)の両方をサポートし、オンライン・オフラインの音声認識を統合的に管理します。キューベースの非同期処理により、リアルタイム音声認識を実現します。 + +## 主要機能 + +### 音声認識エンジン +- Google Speech Recognition(オンライン) +- OpenAI Whisper(faster-whisper、オフライン) +- エンジン自動切り替え機能 + +### リアルタイム処理 +- 音声キューからの継続的データ処理 +- 非同期音声認識処理 +- 結果の即座通知 + +### 多言語対応 +- 複数言語の同時認識 +- 地域固有言語コードの対応 +- 自動言語検出 + +### 音声品質制御 +- 音声品質フィルタリング +- ノイズ除去機能 +- 信頼度スコア評価 + +## クラス構造 + +### AudioTranscriber クラス +```python +class AudioTranscriber: + def __init__(self, speaker: bool, source: Any, phrase_timeout: int, max_phrases: int, + transcription_engine: str, root: Optional[str] = None, + whisper_weight_type: Optional[str] = None, device: str = "cpu", + device_index: int = 0, compute_type: str = "auto") +``` + +音声認識の中核クラス + +#### 初期化パラメータ +- **speaker**: スピーカー音声かマイク音声か +- **source**: 音声ソース +- **phrase_timeout**: フレーズタイムアウト(秒) +- **max_phrases**: 最大フレーズ数 +- **transcription_engine**: 認識エンジン("Google"/"Whisper") +- **whisper_weight_type**: Whisperモデル種類 +- **device**: 計算デバイス("cpu"/"cuda") +- **device_index**: デバイスインデックス +- **compute_type**: 計算精度タイプ + +## 主要メソッド + +### 音声認識処理 + +```python +transcribeAudioQueue(audio_queue: Queue, languages: List[str], countries: List[str], + avg_logprob: float = -0.8, no_speech_prob: float = 0.6) -> bool +``` + +音声キューからの継続的音声認識 + +#### パラメータ +- **audio_queue**: 音声データキュー +- **languages**: 認識対象言語リスト +- **countries**: 地域コードリスト +- **avg_logprob**: Whisper平均対数確率しきい値 +- **no_speech_prob**: Whisper無音判定しきい値 + +### 結果管理 + +```python +getTranscript() -> dict +``` + +最新の認識結果を取得 + +```python +updateTranscript(result: dict) -> None +``` + +認識結果の更新と通知 + +```python +clearTranscriptData() -> None +``` + +認識データのクリア + +### 音声データ処理 + +```python +processMicData() -> AudioData +``` + +マイク音声データの前処理 + +```python +processSpeakerData() -> AudioData +``` + +スピーカー音声データの前処理 + +## 使用方法 + +### 基本的な音声認識 + +```python +from queue import Queue +from models.transcription.transcription_transcriber import AudioTranscriber + +# 音声認識の初期化 +transcriber = AudioTranscriber( + speaker=False, # マイク音声 + source=mic_source, # 音声ソース + phrase_timeout=3, # 3秒のフレーズタイムアウト + max_phrases=10, # 最大10フレーズ + transcription_engine="Google", # Google音声認識 + device="cpu" +) + +# 音声キューの準備 +audio_queue = Queue() + +# 認識対象言語の設定 +languages = ["Japanese", "English"] +countries = ["Japan", "United States"] + +# 音声認識の実行 +def transcription_loop(): + while True: + success = transcriber.transcribeAudioQueue( + audio_queue, languages, countries + ) + if success: + result = transcriber.getTranscript() + print(f"認識結果: {result['text']}") + print(f"言語: {result['language']}") + +# バックグラウンドで実行 +import threading +thread = threading.Thread(target=transcription_loop) +thread.daemon = True +thread.start() +``` + +### Whisperエンジンの使用 + +```python +# Whisper音声認識の初期化 +whisper_transcriber = AudioTranscriber( + speaker=True, # スピーカー音声 + source=speaker_source, + phrase_timeout=5, + max_phrases=5, + transcription_engine="Whisper", + whisper_weight_type="base", # Whisperモデル + device="cuda", # CUDA使用 + device_index=0, + compute_type="float16" # 半精度浮動小数点 +) + +# Whisper固有パラメータでの認識 +success = whisper_transcriber.transcribeAudioQueue( + audio_queue, languages, countries, + avg_logprob=-0.5, # より厳しい品質しきい値 + no_speech_prob=0.4 # より敏感な無音検出 +) +``` + +### コールバック処理 + +```python +def on_transcription_result(result): + """認識結果のコールバック処理""" + if result["text"]: + print(f"認識成功: {result['text']}") + print(f"言語: {result['language']}") + print(f"信頼度: {result.get('confidence', 'N/A')}") + else: + print("音声認識失敗") + +# 結果通知の設定 +transcriber.transcript_changed_event.set() # イベント設定 +``` + +### エラーハンドリング付きの使用 + +```python +def safe_transcription(transcriber, audio_queue, languages, countries): + """安全な音声認識処理""" + try: + success = transcriber.transcribeAudioQueue( + audio_queue, languages, countries + ) + + if success: + result = transcriber.getTranscript() + return result + else: + return {"text": False, "language": None, "error": "認識失敗"} + + except Exception as e: + print(f"音声認識エラー: {e}") + return {"text": False, "language": None, "error": str(e)} +``` + +## 認識エンジン比較 + +### Google Speech Recognition + +#### 利点 +- 高い認識精度 +- 多言語対応 +- リアルタイム処理 +- ノイズ耐性 + +#### 制限 +- インターネット接続必須 +- API制限 +- プライバシー懸念 +- レイテンシ + +### OpenAI Whisper(faster-whisper) + +#### 利点 +- オフライン動作 +- プライバシー保護 +- 高精度 +- 多言語対応 + +#### 制限 +- 初回起動時間 +- メモリ使用量 +- CUDA推奨 +- モデルファイル必要 + +## 設定パラメータ + +### フレーズ制御 +- **phrase_timeout**: フレーズ間無音時間(秒) +- **max_phrases**: バッファ内最大フレーズ数 + +### Whisper品質設定 +- **avg_logprob**: 平均対数確率しきい値(-1.0〜0.0) +- **no_speech_prob**: 無音判定しきい値(0.0〜1.0) + +### 計算設定 +- **device**: "cpu" または "cuda" +- **compute_type**: "float32", "float16", "int8" など + +## 音声データフォーマット + +### 入力形式 +- サンプリングレート: 16kHz推奨 +- ビット深度: 16bit +- チャンネル: モノラル推奨 +- フォーマット: WAV、FLAC等 + +### 処理フロー +1. 音声キューからデータ取得 +2. 音声フォーマット正規化 +3. 音声認識エンジン実行 +4. 結果の後処理・フィルタリング +5. 最終結果の通知 + +## パフォーマンス最適化 + +### メモリ管理 +- 音声バッファの適切なサイズ設定 +- 不要な音声データの早期解放 +- Whisperモデルのメモリ効率化 + +### 計算最適化 +- CUDA使用による高速化 +- 適切な計算精度選択 +- バッチ処理の活用 + +### レイテンシ削減 +- 音声バッファサイズの最適化 +- エンジン切り替えの高速化 +- キャッシュ機能の活用 + +## エラーハンドリング + +### ネットワークエラー +- Google API接続失敗の検出 +- 自動Whisperエンジン切り替え + +### 音声品質エラー +- 低品質音声の検出・フィルタリング +- ノイズレベル監視 + +### リソースエラー +- VRAM不足の検出 +- メモリ不足時の対応 + +## 依存関係 + +### 必須依存関係 +- `speech_recognition`: Google音声認識 +- `faster_whisper`: Whisper音声認識 +- `pyaudiowpatch`: 音声入力 +- `pydub`: 音声処理 + +### オプション依存関係 +- `torch`: CUDA計算 +- `utils`: エラーログ機能 + +## 注意事項 + +- Google APIは使用制限あり +- Whisperは初回起動に時間要 +- CUDA使用時はVRAM消費に注意 +- 音声品質が認識精度に大きく影響 +- 多言語認識時は処理負荷増加 + +## 関連モジュール + +- `transcription_recorder.py`: 音声録音 +- `transcription_whisper.py`: Whisperモデル管理 +- `transcription_languages.py`: 言語コード管理 +- `config.py`: 認識設定管理 +- `model.py`: 音声認識統合制御 \ No newline at end of file diff --git a/src-python/docs/details/transcription_whisper.md b/src-python/docs/details/transcription_whisper.md new file mode 100644 index 00000000..70dcedc9 --- /dev/null +++ b/src-python/docs/details/transcription_whisper.md @@ -0,0 +1,373 @@ +# transcription_whisper.py - Whisperモデル管理 + +## 概要 + +OpenAI Whisper(faster-whisper)モデルのダウンロード、検証、読み込みを管理するユーティリティモジュールです。複数のモデルサイズをサポートし、Hugging Face Hubからの自動ダウンロード機能とファイル整合性チェック機能を提供します。 + +## 主要機能 + +### モデル管理 +- 複数Whisperモデルサイズの対応 +- Hugging Face Hubからの自動ダウンロード +- モデルファイルの整合性検証 + +### ダウンロード機能 +- 進捗表示付きダウンロード +- レジューム対応 +- エラーハンドリング + +### モデル読み込み +- 効率的なモデル初期化 +- CUDA対応 +- 計算タイプ最適化 + +## サポートモデル + +### 利用可能なモデル +```python +_MODELS = { + "tiny": "Systran/faster-whisper-tiny", # ~39MB + "base": "Systran/faster-whisper-base", # ~74MB + "small": "Systran/faster-whisper-small", # ~244MB + "medium": "Systran/faster-whisper-medium", # ~769MB + "large-v1": "Systran/faster-whisper-large-v1", # ~1.5GB + "large-v2": "Systran/faster-whisper-large-v2", # ~1.5GB + "large-v3": "Systran/faster-whisper-large-v3", # ~1.5GB + "large-v3-turbo-int8": "Zoont/faster-whisper-large-v3-turbo-int8-ct2", # ~794MB + "large-v3-turbo": "deepdml/faster-whisper-large-v3-turbo-ct2" # ~1.58GB +} +``` + +### モデル特性比較 + +#### tiny +- **サイズ**: ~39MB +- **精度**: 低 +- **速度**: 最高速 +- **用途**: リアルタイム処理、リソース制限環境 + +#### base +- **サイズ**: ~74MB +- **精度**: 中程度 +- **速度**: 高速 +- **用途**: 一般的な用途、バランス重視 + +#### small +- **サイズ**: ~244MB +- **精度**: 良好 +- **速度**: 中程度 +- **用途**: 品質重視、モバイル環境 + +#### medium +- **サイズ**: ~769MB +- **精度**: 高 +- **速度**: やや低速 +- **用途**: 高品質認識、デスクトップ環境 + +#### large系 +- **サイズ**: ~1.5GB +- **精度**: 最高 +- **速度**: 低速 +- **用途**: 最高品質、サーバー環境 + +## 主要関数 + +### ファイルダウンロード + +```python +downloadFile(url: str, path: str, func: Optional[Callable[[float], None]] = None) -> None +``` + +ファイルのストリームダウンロード + +#### パラメータ +- **url**: ダウンロードURL +- **path**: 保存先パス +- **func**: 進捗コールバック関数 + +### モデル検証 + +```python +checkWhisperWeight(root: str, weight_type: str) -> bool +``` + +Whisperモデルの利用可能性確認 + +#### パラメータ +- **root**: アプリケーションルートパス +- **weight_type**: モデルタイプ("tiny", "base"等) + +#### 戻り値 +- **bool**: モデルが利用可能かどうか + +### モデルダウンロード + +```python +downloadWhisperWeight(root: str, weight_type: str, + callback: Optional[Callable[[float], None]] = None, + end_callback: Optional[Callable[[], None]] = None) -> None +``` + +Whisperモデルのダウンロード + +#### パラメータ +- **root**: アプリケーションルートパス +- **weight_type**: ダウンロードするモデルタイプ +- **callback**: 進捗コールバック +- **end_callback**: 完了コールバック + +### モデル読み込み + +```python +getWhisperModel(root: str, weight_type: str, device: str = "cpu", + device_index: int = 0, compute_type: str = "auto") -> WhisperModel +``` + +Whisperモデルの初期化 + +#### パラメータ +- **root**: アプリケーションルートパス +- **weight_type**: 使用するモデルタイプ +- **device**: 計算デバイス("cpu"/"cuda") +- **device_index**: デバイスインデックス +- **compute_type**: 計算精度タイプ + +#### 戻り値 +- **WhisperModel**: 初期化されたWhisperモデルインスタンス + +## 使用方法 + +### モデルの確認とダウンロード + +```python +from models.transcription.transcription_whisper import checkWhisperWeight, downloadWhisperWeight + +root_path = "." +model_type = "base" + +# モデルの利用可能性確認 +if not checkWhisperWeight(root_path, model_type): + print(f"{model_type}モデルが見つかりません。ダウンロードします...") + + # 進捗コールバック + def progress_callback(progress): + print(f"ダウンロード進捗: {progress:.1%}") + + # 完了コールバック + def completion_callback(): + print("ダウンロード完了!") + + # モデルダウンロード + downloadWhisperWeight( + root=root_path, + weight_type=model_type, + callback=progress_callback, + end_callback=completion_callback + ) +else: + print(f"{model_type}モデルは利用可能です") +``` + +### モデルの読み込みと使用 + +```python +from models.transcription.transcription_whisper import getWhisperModel + +# CPUでのモデル読み込み +model = getWhisperModel( + root=".", + weight_type="base", + device="cpu" +) + +# CUDAでのモデル読み込み(GPU使用) +gpu_model = getWhisperModel( + root=".", + weight_type="small", + device="cuda", + device_index=0, + compute_type="float16" # 半精度で高速化 +) + +# 音声認識の実行 +audio_file = "audio.wav" +segments, info = model.transcribe(audio_file, language="ja") + +for segment in segments: + print(f"{segment.start:.1f}s - {segment.end:.1f}s: {segment.text}") +``` + +### エラーハンドリング付きの使用 + +```python +def safe_model_loading(root, weight_type, device="cpu"): + """安全なモデル読み込み""" + try: + # モデル存在確認 + if not checkWhisperWeight(root, weight_type): + print(f"モデル {weight_type} をダウンロード中...") + downloadWhisperWeight(root, weight_type) + + # モデル読み込み + model = getWhisperModel(root, weight_type, device) + return model + + except Exception as e: + print(f"モデル読み込みエラー: {e}") + # フォールバック: より小さなモデルを試す + if weight_type != "tiny": + return safe_model_loading(root, "tiny", device) + return None +``` + +### 進捗表示付きダウンロード + +```python +import sys + +def download_with_progress(root, weight_type): + """進捗表示付きダウンロード""" + def show_progress(progress): + bar_length = 40 + filled_length = int(bar_length * progress) + bar = '█' * filled_length + '-' * (bar_length - filled_length) + sys.stdout.write(f'\r[{bar}] {progress:.1%}') + sys.stdout.flush() + + def download_complete(): + print("\nダウンロード完了!") + + print(f"Whisper {weight_type} モデルをダウンロード中...") + downloadWhisperWeight(root, weight_type, show_progress, download_complete) +``` + +## ディレクトリ構造 + +### モデルファイル配置 +``` +root/ +└── weights/ + └── whisper/ + ├── tiny/ + │ ├── config.json + │ ├── preprocessor_config.json + │ ├── model.bin + │ ├── tokenizer.json + │ └── vocabulary.txt + ├── base/ + └── small/ +``` + +### 必要ファイル +```python +_FILENAMES = [ + "config.json", # モデル設定 + "preprocessor_config.json", # 前処理設定 + "model.bin", # モデルウェイト + "tokenizer.json", # トークナイザー + "vocabulary.txt", # 語彙ファイル + "vocabulary.json" # 語彙ファイル(JSON形式) +] +``` + +## パフォーマンス考慮事項 + +### メモリ使用量 +- **tiny**: ~100MB RAM +- **base**: ~200MB RAM +- **small**: ~500MB RAM +- **medium**: ~1.5GB RAM +- **large**: ~3GB RAM + +### VRAM使用量(CUDA使用時) +- **tiny**: ~200MB VRAM +- **base**: ~300MB VRAM +- **small**: ~600MB VRAM +- **medium**: ~1.8GB VRAM +- **large**: ~3.5GB VRAM + +### 処理速度(目安) +- **tiny**: リアルタイム処理可能 +- **base**: 1x-2x リアルタイム +- **small**: 0.5x-1x リアルタイム +- **medium**: 0.2x-0.5x リアルタイム +- **large**: 0.1x-0.3x リアルタイム + +## 計算タイプ設定 + +### 利用可能な計算タイプ +- **float32**: 最高精度、低速 +- **float16**: 高精度、中速(CUDA推奨) +- **int8**: 中精度、高速 +- **int8_float16**: 混合精度、バランス + +### 推奨設定 +```python +# CPU使用時 +compute_type = "int8" # 速度重視 + +# CUDA使用時(RTX以上) +compute_type = "float16" # 精度と速度のバランス + +# CUDA使用時(VRAM制限) +compute_type = "int8_float16" # メモリ効率重視 +``` + +## エラーハンドリング + +### ダウンロードエラー +- ネットワーク接続失敗 +- ディスク容量不足 +- 権限不足 + +### モデル読み込みエラー +- VRAM不足 +- 破損したモデルファイル +- 非対応デバイス + +### 対応策 +```python +def robust_model_loading(root, preferred_type="base"): + """堅牢なモデル読み込み""" + model_priority = ["tiny", "base", "small", "medium"] + + # 優先モデルを先頭に配置 + if preferred_type in model_priority: + model_priority.remove(preferred_type) + model_priority.insert(0, preferred_type) + + for model_type in model_priority: + try: + if checkWhisperWeight(root, model_type): + return getWhisperModel(root, model_type) + except Exception as e: + print(f"{model_type} モデル読み込み失敗: {e}") + continue + + raise RuntimeError("利用可能なWhisperモデルがありません") +``` + +## 依存関係 + +### 必須依存関係 +- `faster_whisper`: Whisperエンジン +- `requests`: ファイルダウンロード +- `utils`: ユーティリティ機能 + +### オプション依存関係 +- `torch`: CUDA計算(GPU使用時) + +## 注意事項 + +- 初回モデル読み込み時はダウンロードに時間がかかる +- 大きなモデルほど高精度だが、メモリとVRAMを大量消費 +- CUDAを使用する場合は適切なGPUドライバーが必要 +- モデルファイルの整合性チェックが重要 +- ネットワーク環境によってダウンロード時間が大きく変動 + +## 関連モジュール + +- `transcription_transcriber.py`: Whisper音声認識エンジン +- `config.py`: Whisperモデル設定管理 +- `utils.py`: 計算デバイス管理 +- `model.py`: Whisper統合制御 \ No newline at end of file diff --git a/src-python/docs/details/translation_languages.md b/src-python/docs/details/translation_languages.md new file mode 100644 index 00000000..9943e3e6 --- /dev/null +++ b/src-python/docs/details/translation_languages.md @@ -0,0 +1,342 @@ +# translation_languages.py - 翻訳言語マッピング + +## 概要 + +翻訳エンジンが対応する言語コードのマッピングテーブルを提供するモジュールです。複数の翻訳エンジン(DeepL、Google、Bing、Papago等)の言語コード仕様の差異を吸収し、統一的な翻訳言語管理を実現します。 + +## 主要機能 + +### 多エンジン対応 +- DeepL(無料版・API版) +- Google Translate +- Microsoft Translator(Bing) +- Papago Translator +- その他のWeb翻訳サービス + +### 言語コード統合管理 +- 各エンジン固有の言語コード形式を統一 +- 送信元(source)と送信先(target)言語の分離管理 +- 地域固有言語バリエーションの対応 + +## データ構造 + +### translation_lang +```python +translation_lang: Dict[str, Dict[str, Dict[str, str]]] = { + "エンジン名": { + "source": {"言語名": "言語コード", ...}, + "target": {"言語名": "言語コード", ...} + } +} +``` + +### DeepL翻訳エンジン(無料版) + +```python +translation_lang["DeepL"] = { + "source": { + "Arabic": "ar", "Bulgarian": "bg", "Czech": "cs", "Danish": "da", + "German": "de", "Greek": "el", "English": "en", "Spanish": "es", + "Estonian": "et", "Finnish": "fi", "French": "fr", "Irish": "ga", + "Croatian": "hr", "Hungarian": "hu", "Indonesian": "id", + "Icelandic": "is", "Italian": "it", "Japanese": "ja", + "Korean": "ko", "Lithuanian": "lt", "Latvian": "lv", + "Maltese": "mt", "Bokmal": "nb", "Dutch": "nl", + "Norwegian": "no", "Polish": "pl", "Portuguese": "pt", + "Romanian": "ro", "Russian": "ru", "Slovak": "sk", + "Slovenian": "sl", "Swedish": "sv", "Turkish": "tr", + "Ukrainian": "uk", "Chinese Simplified": "zh", + "Chinese Traditional": "zh" + }, + "target": {/* 同じマッピング */} +} +``` + +### DeepL API(有料版) + +```python +translation_lang["DeepL_API"] = { + "source": {/* 基本的にDeepLと同様 */}, + "target": { + "Japanese": "ja", + "English American": "en-US", # 地域別対応 + "English British": "en-GB", + "Portuguese Brazilian": "pt-BR", # ブラジル・ポルトガル語 + "Portuguese European": "pt-PT", # ヨーロッパ・ポルトガル語 + "Chinese Simplified": "zh", + "Chinese Traditional": "zh" + /* その他の言語 */ + } +} +``` + +## 主要対応言語 + +### 西欧言語 +- **English**: 英語(米国・英国バリエーション) +- **German**: ドイツ語 +- **French**: フランス語 +- **Spanish**: スペイン語 +- **Italian**: イタリア語 +- **Portuguese**: ポルトガル語(ブラジル・欧州) +- **Dutch**: オランダ語 +- **Swedish**: スウェーデン語 +- **Norwegian**: ノルウェー語 + +### 東欧・スラブ言語 +- **Russian**: ロシア語 +- **Polish**: ポーランド語 +- **Czech**: チェコ語 +- **Slovak**: スロバキア語 +- **Ukrainian**: ウクライナ語 +- **Bulgarian**: ブルガリア語 +- **Croatian**: クロアチア語 +- **Slovenian**: スロベニア語 + +### アジア言語 +- **Japanese**: 日本語 +- **Korean**: 韓国語 +- **Chinese Simplified**: 中国語(簡体字) +- **Chinese Traditional**: 中国語(繁体字) +- **Indonesian**: インドネシア語 + +### その他の言語 +- **Arabic**: アラビア語 +- **Turkish**: トルコ語 +- **Finnish**: フィンランド語 +- **Estonian**: エストニア語 +- **Latvian**: ラトビア語 +- **Lithuanian**: リトアニア語 +- **Maltese**: マルタ語 +- **Irish**: アイルランド語 + +## 使用方法 + +### 基本的な言語コード取得 + +```python +from models.translation.translation_languages import translation_lang + +# DeepLで日本語から英語への翻訳 +deepl_source = translation_lang["DeepL"]["source"]["Japanese"] # "ja" +deepl_target = translation_lang["DeepL"]["target"]["English"] # "en" + +# DeepL APIで地域固有の英語指定 +deepl_api_target = translation_lang["DeepL_API"]["target"]["English American"] # "en-US" +``` + +### 対応言語の確認 + +```python +def get_supported_languages(engine_name): + """指定エンジンの対応言語一覧取得""" + if engine_name in translation_lang: + engine_data = translation_lang[engine_name] + source_langs = list(engine_data["source"].keys()) + target_langs = list(engine_data["target"].keys()) + return { + "source": source_langs, + "target": target_langs, + "common": list(set(source_langs) & set(target_langs)) + } + return None + +# 使用例 +deepl_langs = get_supported_languages("DeepL") +print(f"DeepL対応言語数: {len(deepl_langs['common'])}") +``` + +### 言語コード変換 + +```python +def convert_language_code(language_name, from_engine, to_engine, direction="source"): + """エンジン間での言語コード変換""" + try: + # 元エンジンから言語名を確認 + from_codes = translation_lang[from_engine][direction] + to_codes = translation_lang[to_engine][direction] + + if language_name in from_codes and language_name in to_codes: + return to_codes[language_name] + return None + except KeyError: + return None + +# 使用例:DeepLからGoogle Translateへの変換 +google_code = convert_language_code("Japanese", "DeepL", "Google", "target") +``` + +### 翻訳システムでの統合利用 + +```python +class TranslationLanguageManager: + """翻訳言語管理クラス""" + + @staticmethod + def get_language_code(engine, language, direction="target"): + """安全な言語コード取得""" + try: + return translation_lang[engine][direction][language] + except KeyError: + return None + + @staticmethod + def is_language_supported(engine, language, direction="target"): + """言語サポート確認""" + try: + return language in translation_lang[engine][direction] + except KeyError: + return False + + @staticmethod + def get_compatible_engines(source_lang, target_lang): + """両言語をサポートするエンジン一覧""" + compatible = [] + for engine in translation_lang: + source_supported = TranslationLanguageManager.is_language_supported( + engine, source_lang, "source" + ) + target_supported = TranslationLanguageManager.is_language_supported( + engine, target_lang, "target" + ) + if source_supported and target_supported: + compatible.append(engine) + return compatible + +# 使用例 +manager = TranslationLanguageManager() + +# 日本語→英語をサポートするエンジン +engines = manager.get_compatible_engines("Japanese", "English") +print(f"対応エンジン: {engines}") + +# 特定エンジンでの言語コード取得 +ja_code = manager.get_language_code("DeepL", "Japanese", "source") +en_code = manager.get_language_code("DeepL", "English", "target") +``` + +## エンジン別特徴 + +### DeepL(無料版) +- **強み**: 高精度、自然な翻訳 +- **制限**: 月間使用量制限、API制限 +- **対応**: 26言語 + +### DeepL API(有料版) +- **強み**: DeepLの高精度、地域別言語対応 +- **制限**: 従量課金 +- **対応**: 地域固有言語バリエーション + +### Google Translate +- **強み**: 多言語対応、高速 +- **制限**: API制限、精度のばらつき +- **対応**: 100+言語 + +### Microsoft Translator +- **強み**: リアルタイム翻訳、音声対応 +- **制限**: APIキー必要 +- **対応**: 70+言語 + +## 地域バリエーション対応 + +### 英語の地域別対応 +```python +# DeepL APIでの英語バリエーション +"English American": "en-US", # アメリカ英語 +"English British": "en-GB", # イギリス英語 +``` + +### ポルトガル語の地域別対応 +```python +# ブラジル・ポルトガル語とヨーロッパ・ポルトガル語 +"Portuguese Brazilian": "pt-BR", +"Portuguese European": "pt-PT", +``` + +### 中国語の文字体系対応 +```python +# 簡体字・繁体字の区別 +"Chinese Simplified": "zh", # 簡体字(中国本土) +"Chinese Traditional": "zh", # 繁体字(台湾・香港) +``` + +## 拡張性 + +### 新エンジンの追加 +```python +# 新しい翻訳エンジンの追加例 +translation_lang["NewEngine"] = { + "source": { + "Japanese": "jp", + "English": "en", + "Korean": "kr" + }, + "target": { + "Japanese": "jp", + "English": "en", + "Korean": "kr" + } +} +``` + +### 新言語の追加 +```python +# 既存エンジンへの新言語追加 +translation_lang["DeepL"]["source"]["Hindi"] = "hi" +translation_lang["DeepL"]["target"]["Hindi"] = "hi" +``` + +## エラーハンドリング + +### 安全な言語コード取得 +```python +def safe_get_language_code(engine, language, direction="target", fallback="en"): + """フォールバック機能付き言語コード取得""" + try: + return translation_lang[engine][direction][language] + except KeyError: + # フォールバック言語を返す + try: + return translation_lang[engine][direction].get("English", fallback) + except KeyError: + return fallback +``` + +### 言語サポート検証 +```python +def validate_translation_pair(engine, source_lang, target_lang): + """翻訳ペアの有効性検証""" + try: + engine_data = translation_lang[engine] + source_supported = source_lang in engine_data["source"] + target_supported = target_lang in engine_data["target"] + + return { + "valid": source_supported and target_supported, + "source_supported": source_supported, + "target_supported": target_supported + } + except KeyError: + return { + "valid": False, + "source_supported": False, + "target_supported": False, + "error": f"Unknown engine: {engine}" + } +``` + +## 注意事項 + +- エンジンによって言語コード形式が異なる +- 地域バリエーションはエンジンにより対応状況が異なる +- 新しい言語追加時は全エンジンでの対応状況を確認 +- API制限や課金体系はエンジンごとに異なる +- 一部の言語ペアは翻訳精度に差がある場合がある + +## 関連モジュール + +- `translation_translator.py`: 翻訳エンジン本体 +- `translation_utils.py`: 翻訳ユーティリティ +- `transcription_languages.py`: 音声認識言語マッピング +- `config.py`: 翻訳言語設定管理 +- `controller.py`: 言語選択UI制御 \ No newline at end of file diff --git a/src-python/docs/details/translation_translator.md b/src-python/docs/details/translation_translator.md new file mode 100644 index 00000000..970385ae --- /dev/null +++ b/src-python/docs/details/translation_translator.md @@ -0,0 +1,406 @@ +# translation_translator.py - 翻訳エンジン統合クラス + +## 概要 + +複数の翻訳エンジンを統合管理する高レベル翻訳インターフェースです。DeepL、Google、Bing、Papago、CTranslate2などの多様な翻訳サービスを統一的に扱い、エラー時の自動フォールバック機能と認証管理を提供します。 + +## 主要機能 + +### 多エンジン統合 +- DeepL(無料版・API版) +- Google Translate(Webスクレイピング) +- Microsoft Translator(Bing) +- Papago Translator +- CTranslate2(ローカル翻訳) + +### 統一インターフェース +- エンジン依存を隠蔽した単一の翻訳メソッド +- 自動エラーハンドリング・フォールバック +- 認証情報の統合管理 + +### オフライン翻訳対応 +- CTranslate2による完全オフライン翻訳 +- 複数モデルサイズ(small/large)対応 +- CUDA高速化サポート + +## クラス構造 + +### Translator クラス +```python +class Translator: + def __init__(self) -> None: + self.deepl_client: Optional[DeepLClient] = None + self.ctranslate2_translator: Any = None + self.ctranslate2_tokenizer: Any = None + self.is_loaded_ctranslate2_model: bool = False + self.is_changed_translator_parameters: bool = False + self.is_enable_translators: bool = ENABLE_TRANSLATORS +``` + +翻訳機能の中核クラス + +#### 属性 +- **deepl_client**: DeepL APIクライアント +- **ctranslate2_translator**: ローカル翻訳モデル +- **ctranslate2_tokenizer**: CTranslate2トークナイザー +- **is_loaded_ctranslate2_model**: ローカルモデル読み込み状態 +- **is_enable_translators**: Web翻訳サービス利用可能フラグ + +## 主要メソッド + +### 翻訳実行 + +```python +translate(translator_name: str, source_language: str, target_language: str, + target_country: str, message: str) -> Any +``` + +統一翻訳インターフェース + +#### パラメータ +- **translator_name**: 翻訳エンジン名("DeepL", "Google", "CTranslate2"等) +- **source_language**: 送信元言語 +- **target_language**: 送信先言語 +- **target_country**: 送信先国・地域 +- **message**: 翻訳対象テキスト + +#### 戻り値 +- **str**: 翻訳結果(成功時) +- **False**: 翻訳失敗時 + +### DeepL認証管理 + +```python +authenticationDeepLAuthKey(authkey: str) -> bool +``` + +DeepL APIキーの認証と設定 + +#### パラメータ +- **authkey**: DeepL APIキー + +#### 戻り値 +- **bool**: 認証成功可否 + +### CTranslate2管理 + +```python +changeCTranslate2Model(path: str, model_type: str, device: str = "cpu", + device_index: int = 0, compute_type: str = "auto") -> None +``` + +ローカル翻訳モデルの読み込み・変更 + +#### パラメータ +- **path**: モデルファイルのベースパス +- **model_type**: モデルサイズ("small"/"large") +- **device**: 計算デバイス("cpu"/"cuda") +- **device_index**: デバイスインデックス +- **compute_type**: 計算精度タイプ + +### 状態管理 + +```python +isLoadedCTranslate2Model() -> bool +``` + +CTranslate2モデルの読み込み状態確認 + +```python +isChangedTranslatorParameters() -> bool +setChangedTranslatorParameters(is_changed: bool) -> None +``` + +翻訳設定変更フラグの管理 + +## 使用方法 + +### 基本的な翻訳 + +```python +from models.translation.translation_translator import Translator + +# 翻訳器の初期化 +translator = Translator() + +# Google翻訳の使用 +result = translator.translate( + translator_name="Google", + source_language="Japanese", + target_language="English", + target_country="United States", + message="こんにちは、世界!" +) + +if result != False: + print(f"翻訳結果: {result}") # "Hello, world!" +else: + print("翻訳に失敗しました") +``` + +### DeepL API使用 + +```python +# DeepL APIキーの設定 +api_key = "your-deepl-api-key" +auth_success = translator.authenticationDeepLAuthKey(api_key) + +if auth_success: + print("DeepL API認証成功") + + # DeepL APIで翻訳 + result = translator.translate( + translator_name="DeepL_API", + source_language="English", + target_language="Japanese", + target_country="Japan", + message="Hello, world!" + ) + print(f"DeepL翻訳: {result}") +else: + print("DeepL API認証失敗") +``` + +### ローカル翻訳(CTranslate2)の使用 + +```python +# ローカルモデルの読み込み +translator.changeCTranslate2Model( + path=".", # アプリケーションルート + model_type="small", # smallモデル使用 + device="cuda", # GPU使用 + device_index=0, + compute_type="float16" # 半精度で高速化 +) + +# モデル読み込み確認 +if translator.isLoadedCTranslate2Model(): + print("CTranslate2モデル読み込み完了") + + # ローカル翻訳実行 + result = translator.translate( + translator_name="CTranslate2", + source_language="Japanese", + target_language="English", + target_country="United States", + message="機械翻訳のテストです" + ) + print(f"ローカル翻訳: {result}") +else: + print("CTranslate2モデル読み込み失敗") +``` + +### エラーハンドリング付きの翻訳 + +```python +def safe_translate(translator, message, source_lang="Japanese", target_lang="English"): + """安全な翻訳処理""" + # 翻訳エンジンの優先順位 + engines = ["DeepL_API", "DeepL", "Google", "CTranslate2"] + + for engine in engines: + try: + result = translator.translate( + translator_name=engine, + source_language=source_lang, + target_language=target_lang, + target_country="United States", + message=message + ) + + if result != False: + print(f"{engine}で翻訳成功: {result}") + return result + else: + print(f"{engine}翻訳失敗、次のエンジンを試行") + + except Exception as e: + print(f"{engine}でエラー: {e}") + continue + + print("全ての翻訳エンジンで失敗") + return None + +# 使用例 +result = safe_translate(translator, "こんにちは") +``` + +### 翻訳設定の管理 + +```python +# 設定変更フラグの確認 +if translator.isChangedTranslatorParameters(): + print("翻訳設定が変更されています") + + # 設定変更の適用(例:モデル再読み込み) + translator.changeCTranslate2Model(".", "small", "cpu") + + # フラグのリセット + translator.setChangedTranslatorParameters(False) +``` + +## 翻訳エンジン比較 + +### DeepL API(有料) +- **精度**: 最高レベル +- **速度**: 高速 +- **制限**: API使用料、月間制限 +- **対応**: 26言語、地域別対応 + +### DeepL(無料) +- **精度**: 高品質 +- **速度**: 中程度 +- **制限**: 月間使用量制限、文字数制限 +- **対応**: 26言語 + +### Google Translate +- **精度**: 良好 +- **速度**: 高速 +- **制限**: アクセス頻度制限 +- **対応**: 100+言語 + +### CTranslate2(ローカル) +- **精度**: 中〜高(モデル依存) +- **速度**: 高速(GPU使用時) +- **制限**: なし(オフライン) +- **対応**: 主要言語ペア + +### その他(Bing, Papago等) +- **精度**: 中程度 +- **速度**: 中程度 +- **制限**: サービス依存 +- **対応**: サービス固有 + +## CTranslate2詳細 + +### 対応モデル +```python +ctranslate2_weights = { + "small": { + "url": "m2m100_418m.zip", + "directory_name": "m2m100_418m", + "tokenizer": "facebook/m2m100_418M" + }, + "large": { + "url": "m2m100_12b.zip", + "directory_name": "m2m100_12b", + "tokenizer": "facebook/m2m100_1.2b" + } +} +``` + +### パフォーマンス特性 + +#### small モデル +- **サイズ**: ~400MB +- **メモリ**: ~1GB RAM +- **VRAM**: ~500MB(CUDA使用時) +- **速度**: 高速 +- **精度**: 良好 + +#### large モデル +- **サイズ**: ~4.8GB +- **メモリ**: ~6GB RAM +- **VRAM**: ~3GB(CUDA使用時) +- **速度**: 中程度 +- **精度**: 高品質 + +### 計算タイプ設定 +```python +# CPU使用時 +compute_type = "int8" # 速度重視 + +# CUDA使用時 +compute_type = "float16" # バランス重視 +compute_type = "int8_float16" # メモリ効率重視 +``` + +## エラーハンドリング + +### ネットワークエラー +- 接続タイムアウト +- API制限超過 +- サービス一時停止 + +### 認証エラー +- 無効なAPIキー +- 期限切れアカウント +- 使用量上限到達 + +### モデルエラー +- ファイル破損 +- VRAM不足 +- 非対応言語ペア + +### 対応策 +```python +def robust_translation(translator, message, source_lang, target_lang): + """堅牢な翻訳処理""" + # オンライン翻訳を先に試行 + online_engines = ["DeepL_API", "DeepL", "Google"] + + for engine in online_engines: + try: + result = translator.translate(engine, source_lang, target_lang, "", message) + if result != False: + return result + except Exception as e: + print(f"{engine}エラー: {e}") + continue + + # オンライン翻訳が全て失敗した場合、ローカル翻訳にフォールバック + try: + if not translator.isLoadedCTranslate2Model(): + translator.changeCTranslate2Model(".", "small", "cpu") + + result = translator.translate("CTranslate2", source_lang, target_lang, "", message) + if result != False: + return result + except Exception as e: + print(f"ローカル翻訳エラー: {e}") + + return "翻訳に失敗しました" +``` + +## 依存関係 + +### 必須依存関係 +- `translation_languages`: 言語コード管理 +- `translation_utils`: CTranslate2ユーティリティ +- `utils`: エラーログ、計算デバイス管理 + +### オプション依存関係 +- `deepl`: DeepL APIライブラリ +- `translators`: Web翻訳サービスライブラリ +- `ctranslate2`: ローカル翻訳エンジン +- `transformers`: トークナイザー + +## 設定要件 + +### 環境変数 +- `DEEPL_AUTH_KEY`: DeepL APIキー(オプション) + +### ファイル配置 +``` +root/ +└── weights/ + └── ctranslate2/ + ├── m2m100_418m/ # smallモデル + └── m2m100_12b/ # largeモデル +``` + +## 注意事項 + +- Web翻訳サービスは利用制限に注意 +- CTranslate2の初回読み込みは時間がかかる +- GPU使用時はVRAM消費量に注意 +- API認証情報の適切な管理が必要 +- 長文翻訳時は分割処理を推奨 + +## 関連モジュール + +- `translation_languages.py`: 言語コードマッピング +- `translation_utils.py`: CTranslate2ユーティリティ +- `config.py`: 翻訳設定管理 +- `model.py`: 翻訳機能統合 +- `controller.py`: 翻訳制御インターフェース \ No newline at end of file diff --git a/src-python/docs/details/translation_utils.md b/src-python/docs/details/translation_utils.md new file mode 100644 index 00000000..9eab4d2b --- /dev/null +++ b/src-python/docs/details/translation_utils.md @@ -0,0 +1,438 @@ +# translation_utils.py - CTranslate2モデル管理ユーティリティ + +## 概要 + +CTranslate2によるローカル機械翻訳モデルの自動ダウンロード、展開、管理を行うユーティリティモジュールです。複数のモデルサイズ(small/large)とプラットフォーム(CPU/CUDA)に対応し、モデルファイルの完全性チェックと自動修復機能を提供します。 + +## 主要機能 + +### モデル自動管理 +- CTranslate2モデルの自動ダウンロード +- ZIP形式モデルの展開・配置 +- モデルファイルの完全性検証 +- 破損モデルの自動再取得 + +### マルチプラットフォーム対応 +- CPU版・CUDA版の両対応 +- 複数モデルサイズの管理 +- プラットフォーム別最適化 + +## 定数・設定 + +### モデル定義 + +```python +# CTranslate2重みファイル情報 +ctranslate2_weights = { + "small": { + "url": "m2m100_418m.zip", + "directory_name": "m2m100_418m", + "tokenizer": "facebook/m2m100_418M" + }, + "large": { + "url": "m2m100_12b.zip", + "directory_name": "m2m100_12b", + "tokenizer": "facebook/m2m100_1.2b" + } +} +``` + +### 設定パラメータ +- **BASE_WEIGHTS_URL**: モデル配布ベースURL +- **LOCAL_WEIGHTS_DIR**: ローカル保存ディレクトリ +- **CHUNK_SIZE**: ダウンロード時のチャンクサイズ + +## 主要機能 + +### モデルダウンロード + +```python +def downloadCTranslate2Model(model_type: str, device: str = "cpu") -> bool: + """CTranslate2モデルの自動ダウンロード""" +``` + +指定されたモデルタイプとデバイス用のモデルをダウンロード + +#### パラメータ +- **model_type**: モデルサイズ("small"/"large") +- **device**: 計算デバイス("cpu"/"cuda") + +#### 戻り値 +- **bool**: ダウンロード成功可否 + +### モデル存在確認 + +```python +def checkCTranslate2ModelExists(model_type: str, device: str = "cpu") -> bool: + """モデルファイルの存在確認""" +``` + +指定されたモデルがローカルに存在するかチェック + +#### パラメータ +- **model_type**: 確認対象モデルタイプ +- **device**: 対象デバイス + +#### 戻り値 +- **bool**: モデル存在可否 + +### モデル完全性検証 + +```python +def validateCTranslate2Model(model_type: str, device: str = "cpu") -> bool: + """モデルファイルの完全性検証""" +``` + +ダウンロード済みモデルの整合性を確認 + +#### パラメータ +- **model_type**: 検証対象モデル +- **device**: 対象デバイス + +#### 戻り値 +- **bool**: モデル正常性 + +## 使用方法 + +### 基本的なモデル管理 + +```python +from models.translation.translation_utils import * + +# smallモデル(CPU版)のダウンロード確認 +if not checkCTranslate2ModelExists("small", "cpu"): + print("smallモデルが見つかりません。ダウンロード中...") + success = downloadCTranslate2Model("small", "cpu") + + if success: + print("ダウンロード完了") + else: + print("ダウンロード失敗") +else: + print("smallモデルは既に存在します") +``` + +### GPU用モデルの準備 + +```python +# CUDA版largeモデルのセットアップ +model_type = "large" +device = "cuda" + +# 既存モデルの確認 +if checkCTranslate2ModelExists(model_type, device): + # モデルの完全性検証 + if validateCTranslate2Model(model_type, device): + print(f"{model_type}モデル({device}版)準備完了") + else: + print("モデルが破損しています。再ダウンロード中...") + # 破損モデルの再取得 + downloadCTranslate2Model(model_type, device) +else: + # 新規ダウンロード + print(f"{model_type}モデル({device}版)をダウンロード中...") + downloadCTranslate2Model(model_type, device) +``` + +### 自動モデル管理システム + +```python +def ensureModelReady(model_type="small", device="cpu", max_retries=3): + """モデルの準備を保証する関数""" + + for attempt in range(max_retries): + print(f"モデル準備 試行 {attempt + 1}/{max_retries}") + + # モデル存在確認 + if not checkCTranslate2ModelExists(model_type, device): + print("モデルが見つかりません。ダウンロード中...") + if not downloadCTranslate2Model(model_type, device): + print(f"ダウンロード失敗(試行 {attempt + 1})") + continue + + # モデル完全性確認 + if validateCTranslate2Model(model_type, device): + print("モデル準備完了") + return True + else: + print("モデルが破損しています。再取得中...") + # 破損ファイルの削除(実装依存) + # remove_corrupted_model(model_type, device) + continue + + print("モデル準備に失敗しました") + return False + +# 使用例 +if ensureModelReady("small", "cpu"): + print("翻訳システム初期化可能") +else: + print("翻訳システム初期化失敗") +``` + +### 複数モデルの一括管理 + +```python +def setupAllModels(): + """全モデルの一括セットアップ""" + + models = [ + ("small", "cpu"), + ("small", "cuda"), + ("large", "cpu"), + ("large", "cuda") + ] + + results = {} + + for model_type, device in models: + print(f"\n=== {model_type}モデル({device}版)セットアップ ===") + + # デバイス利用可能性チェック(CUDA版の場合) + if device == "cuda" and not torch.cuda.is_available(): + print("CUDA環境が利用できません。スキップします。") + results[(model_type, device)] = False + continue + + # モデル準備 + success = ensureModelReady(model_type, device) + results[(model_type, device)] = success + + if success: + print(f"✓ {model_type}({device}版)準備完了") + else: + print(f"✗ {model_type}({device}版)準備失敗") + + # 結果サマリー + print("\n=== セットアップ結果 ===") + for (model_type, device), success in results.items(): + status = "成功" if success else "失敗" + print(f"{model_type}({device}版): {status}") + + return results + +# 全モデルセットアップの実行 +setupAllModels() +``` + +## モデル仕様 + +### smallモデル(m2m100_418m) + +```python +model_info = { + "name": "m2m100_418m", + "size": "~400MB", + "parameters": "418M", + "languages": "100言語", + "tokenizer": "facebook/m2m100_418M", + "memory_requirements": { + "cpu": "~1GB RAM", + "cuda": "~500MB VRAM" + }, + "performance": { + "speed": "高速", + "quality": "良好" + } +} +``` + +#### 特徴 +- 高速処理に適している +- メモリ使用量が少ない +- リアルタイム翻訳に最適 +- 100言語ペア対応 + +### largeモデル(m2m100_12b) + +```python +model_info = { + "name": "m2m100_12b", + "size": "~4.8GB", + "parameters": "1.2B", + "languages": "100言語", + "tokenizer": "facebook/m2m100_1.2b", + "memory_requirements": { + "cpu": "~6GB RAM", + "cuda": "~3GB VRAM" + }, + "performance": { + "speed": "中程度", + "quality": "高品質" + } +} +``` + +#### 特徴 +- 高品質翻訳が可能 +- 大容量メモリが必要 +- バッチ処理に適している +- 複雑な文章に対応 + +## ファイル構造 + +### ディレクトリレイアウト +``` +weights/ +└── ctranslate2/ + ├── m2m100_418m/ # smallモデル(CPU版) + │ ├── model.bin + │ ├── vocabulary.txt + │ ├── config.json + │ └── shared_vocabulary.txt + ├── m2m100_418m_cuda/ # smallモデル(CUDA版) + │ └── [同様のファイル構成] + ├── m2m100_12b/ # largeモデル(CPU版) + │ └── [同様のファイル構成] + └── m2m100_12b_cuda/ # largeモデル(CUDA版) + └── [同様のファイル構成] +``` + +### 必須ファイル +- `model.bin`: 変換済みモデルウェイト +- `vocabulary.txt`: 語彙ファイル +- `config.json`: モデル設定ファイル +- `shared_vocabulary.txt`: 共有語彙ファイル + +## ダウンロード処理 + +### ネットワーク処理 + +```python +def downloadWithProgress(url: str, destination: str) -> bool: + """進捗表示付きダウンロード""" + try: + response = requests.get(url, stream=True) + response.raise_for_status() + + total_size = int(response.headers.get('content-length', 0)) + + with open(destination, 'wb') as file: + downloaded = 0 + for chunk in response.iter_content(chunk_size=CHUNK_SIZE): + if chunk: + file.write(chunk) + downloaded += len(chunk) + + # 進捗表示 + if total_size > 0: + progress = (downloaded / total_size) * 100 + print(f"\rダウンロード進捗: {progress:.1f}%", end="") + + print(f"\nダウンロード完了: {destination}") + return True + + except Exception as e: + print(f"\nダウンロードエラー: {e}") + return False +``` + +### 展開処理 + +```python +def extractZipModel(zip_path: str, extract_to: str) -> bool: + """ZIPファイルの展開""" + try: + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + # 展開先ディレクトリの作成 + os.makedirs(extract_to, exist_ok=True) + + # ファイル展開 + zip_ref.extractall(extract_to) + + print(f"展開完了: {extract_to}") + + # 元のZIPファイルを削除(オプション) + os.remove(zip_path) + print(f"一時ファイル削除: {zip_path}") + + return True + + except Exception as e: + print(f"展開エラー: {e}") + return False +``` + +## エラーハンドリング + +### ネットワークエラー +- 接続タイムアウト +- ダウンロード中断 +- サーバーエラー + +### ファイルシステムエラー +- 容量不足 +- 権限エラー +- ファイル破損 + +### リトライ機構 + +```python +def downloadWithRetry(url: str, destination: str, max_retries: int = 3) -> bool: + """リトライ付きダウンロード""" + + for attempt in range(max_retries): + print(f"ダウンロード試行 {attempt + 1}/{max_retries}") + + try: + if downloadWithProgress(url, destination): + return True + except Exception as e: + print(f"試行 {attempt + 1} 失敗: {e}") + + # 一時ファイルの清理 + if os.path.exists(destination): + os.remove(destination) + + # 最後の試行でない場合は少し待機 + if attempt < max_retries - 1: + time.sleep(2 ** attempt) # 指数バックオフ + + print("全ての試行が失敗しました") + return False +``` + +## パフォーマンス最適化 + +### ダウンロード最適化 +- チャンク単位での分割ダウンロード +- 進捗表示による体験向上 +- 自動リトライによる信頼性確保 + +### ストレージ最適化 +- 一時ファイルの自動削除 +- 重複ファイルの検出・排除 +- 容量効率的なファイル管理 + +### メモリ最適化 +- ストリーミングダウンロード +- 大容量ファイル対応 +- メモリ使用量の制御 + +## 依存関係 + +### 必須依存関係 +- `requests`: HTTPダウンロード +- `zipfile`: アーカイブ展開 +- `os`: ファイルシステム操作 +- `pathlib`: パス操作 + +### オプション依存関係 +- `tqdm`: 進捗バー表示(実装による) +- `hashlib`: ファイル整合性検証(実装による) + +## 注意事項 + +- 初回ダウンロードは時間がかかる(モデルサイズ依存) +- 十分なストレージ容量を確保 +- ネットワーク環境によってダウンロード速度が変動 +- CUDA版は対応GPU環境が必要 +- モデルファイルのバックアップ推奨 + +## 関連モジュール + +- `translation_translator.py`: モデル利用クラス +- `translation_languages.py`: 言語コード管理 +- `config.py`: 設定管理 +- `utils.py`: 共通ユーティリティ +- `device_manager.py`: デバイス管理 \ No newline at end of file diff --git a/src-python/docs/details/transliteration_context_rules.md b/src-python/docs/details/transliteration_context_rules.md new file mode 100644 index 00000000..f98531c8 --- /dev/null +++ b/src-python/docs/details/transliteration_context_rules.md @@ -0,0 +1,397 @@ +# transliteration_context_rules.py - 文脈的転写ルールエンジン + +## 概要 + +トークン化された結果に対して文脈依存の転写ルールを適用するコンパクトなルールエンジンです。隣接するトークンの情報に基づいて読み(かな)を動的に修正し、より自然で正確な転写を実現します。 + +## 主要機能 + +### 文脈依存転写 +- 隣接トークン情報を利用した読み修正 +- 優先度ベースのルール適用順序 +- 正規表現・完全一致の両方に対応 + +### ルールエンジン +- 埋め込み型ルール定義(外部JSONファイル不要) +- 前方・後方の隣接トークン検査対応 +- インプレース変更による効率的処理 + +### 動的読み変更 +- 文脈に応じたかな読みの書き換え +- ひらがな・ヘボン式の自動クリア +- 呼び出し元での再計算トリガー + +## ルール定義構造 + +### DEFAULT_RULES + +```python +DEFAULT_RULES = { + "rules": [ + { + "name": "nan_next_tdna", # ルール名 + "target": "何", # 対象文字 + "match_mode": "equals", # マッチモード + "direction": "next", # 検査方向 + "kana_set": ["タ", "チ", "ツ"...], # 条件文字セット + "on_true": {"kana": "ナン"}, # 条件真時のアクション + "on_false": {"kana": "ナニ"} # 条件偽時のアクション + } + ] +} +``` + +### ルール要素 + +#### 基本設定 +- **name**: ルールの識別名 +- **target**: 適用対象となる文字・文字列 +- **priority**: 適用優先度(高い順に処理) +- **match_mode**: マッチングモード("equals"/"regex") + +#### 条件設定 +- **direction**: 隣接トークン検査方向("next"/"prev") +- **kana_set**: 条件判定用の文字セット +- **pattern**: 正規表現パターン(regex時) + +#### アクション設定 +- **on_true**: 条件成立時のアクション +- **on_false**: 条件不成立時のアクション +- **kana**: 設定する新しいかな読み + +## 主要関数 + +### apply_context_rules + +```python +def apply_context_rules(results: List[Dict[str, Any]], use_macron: bool = False) -> List[Dict[str, Any]] +``` + +文脈ルールをトークンリストに適用 + +#### パラメータ +- **results**: `Transliterator.split_kanji_okurigana`で生成されたトークン辞書のリスト +- **use_macron**: 互換性のためのパラメータ(ルール処理では未使用) + +#### 戻り値 +- **List[Dict[str, Any]]**: 修正されたトークンリスト(インプレース変更も実施) + +#### 必須キー +各トークン辞書は以下のキーを含む必要があります: +- **orig**: 元の文字・文字列 +- **kana**: かな読み +- **hira**: ひらがな表記 +- **hepburn**: ヘボン式ローマ字 + +## 使用方法 + +### 基本的な文脈ルール適用 + +```python +from models.transliteration.transliteration_context_rules import apply_context_rules + +# トークン化された結果(例) +results = [ + {"orig": "何", "kana": "ナニ", "hira": "なに", "hepburn": "nani"}, + {"orig": "度", "kana": "ド", "hira": "ど", "hepburn": "do"}, + {"orig": "も", "kana": "モ", "hira": "も", "hepburn": "mo"} +] + +# 文脈ルールの適用 +modified_results = apply_context_rules(results) + +# 結果確認 +for token in modified_results: + print(f"{token['orig']}: {token['kana']} -> {token['hira']} ({token['hepburn']})") + +# 期待される出力(「何度」の場合): +# 何: ナン -> (再計算必要) (再計算必要) +# 度: ド -> ど (do) +# も: モ -> も (mo) +``` + +### カスタムルールでの処理 + +```python +# 独自ルール定義の例 +custom_rules = { + "rules": [ + { + "name": "custom_rule_example", + "target": "今", + "match_mode": "equals", + "direction": "next", + "kana_set": ["バ", "ビ", "ブ", "ベ", "ボ"], + "priority": 100, + "on_true": {"kana": "イマ"}, + "on_false": {"kana": "コン"} + } + ] +} + +# 注意:現在の実装では DEFAULT_RULES が固定使用されています +# カスタムルールを使用するには関数の拡張が必要です +``` + +### 正規表現マッチングの例 + +```python +# 正規表現ルールの定義例 +regex_rule = { + "name": "kanji_pattern_rule", + "match_mode": "regex", + "pattern": r"^[一-龯]$", # 任意の漢字1文字 + "direction": "next", + "kana_set": ["ア", "イ", "ウ", "エ", "オ"], + "priority": 50, + "on_true": {"kana": "特殊読み"}, + "on_false": {"kana": "通常読み"} +} +``` + +### 転写パイプラインでの統合 + +```python +def complete_transliteration_pipeline(text): + """完全な転写パイプライン""" + + # 1. 初期分割・転写 + transliterator = Transliterator() + tokens = transliterator.split_kanji_okurigana(text) + + # 2. 文脈ルール適用 + tokens = apply_context_rules(tokens) + + # 3. 修正されたトークンの再計算 + for token in tokens: + if token.get("kana") and not token.get("hira"): + # ひらがな・ヘボン式の再計算 + token["hira"] = katakana_to_hiragana(token["kana"]) + token["hepburn"] = hiragana_to_hepburn(token["hira"]) + + return tokens + +# 使用例 +text = "何度でも挑戦する" +result = complete_transliteration_pipeline(text) + +for token in result: + print(f"{token['orig']} -> {token['kana']} -> {token['hira']} -> {token['hepburn']}") +``` + +## ルール処理ロジック + +### 処理フロー + +1. **ルール準備** + - 優先度の降順でソート + - 正規表現の事前コンパイル + +2. **トークン走査** + - 各トークンに対してルールを順次適用 + - 空の`orig`を持つトークンはスキップ + +3. **マッチング判定** + - `equals`: 完全一致判定 + - `regex`: 正規表現マッチ判定 + +4. **隣接トークン検査** + - `direction`に基づく隣接トークン特定 + - 空のトークンをスキップして有効トークンを検索 + +5. **条件評価** + - 隣接トークンの`kana`の先頭文字チェック + - `kana_set`との一致判定 + +6. **アクション実行** + - 条件に応じて`on_true`/`on_false`を選択 + - `kana`の書き換えと`hira`/`hepburn`のクリア + +### アルゴリズム詳細 + +```python +def process_token_with_rules(token_index, tokens, rules): + """単一トークンのルール処理アルゴリズム""" + + token = tokens[token_index] + orig = token.get("orig", "") + + # 空トークンはスキップ + if not orig: + return + + for rule in rules: # 優先度順 + # マッチング判定 + if not matches_rule(orig, rule): + continue + + # 隣接トークン検索 + neighbor = find_neighbor_token(token_index, tokens, rule["direction"]) + + if neighbor: + # 条件評価 + condition = evaluate_condition(neighbor, rule["kana_set"]) + + # アクション実行 + action = rule["on_true"] if condition else rule["on_false"] + apply_action(token, action) + + # 最初にマッチしたルールで処理終了 + break + +def find_neighbor_token(current_index, tokens, direction): + """隣接する有効トークンを検索""" + + if direction == "next": + for i in range(current_index + 1, len(tokens)): + if tokens[i].get("orig"): + return tokens[i] + elif direction == "prev": + for i in range(current_index - 1, -1, -1): + if tokens[i].get("orig"): + return tokens[i] + + return None +``` + +## 具体的なルール例 + +### 「何」の読み分けルール + +```python +{ + "name": "nan_next_tdna", + "target": "何", + "match_mode": "equals", + "direction": "next", + "kana_set": ["タ", "チ", "ツ", "テ", "ト", "ダ", "ヂ", "ヅ", "デ", "ド", "ナ", "ニ", "ヌ", "ネ", "ノ"], + "on_true": {"kana": "ナン"}, + "on_false": {"kana": "ナニ"} +} +``` + +#### 動作例 + +```python +# 「何度」の場合 +tokens = [ + {"orig": "何", "kana": "ナニ"}, # 初期状態 + {"orig": "度", "kana": "ド"} # 次のトークン +] + +# ルール適用後 +tokens = [ + {"orig": "何", "kana": "ナン"}, # 「ド」が kana_set に含まれるため "ナン" に変更 + {"orig": "度", "kana": "ド"} +] + +# 「何回」の場合 +tokens = [ + {"orig": "何", "kana": "ナニ"}, # 初期状態 + {"orig": "回", "kana": "カイ"} # 次のトークン +] + +# ルール適用後 +tokens = [ + {"orig": "何", "kana": "ナニ"}, # 「カイ」が kana_set に含まれないため "ナニ" のまま + {"orig": "回", "kana": "カイ"} +] +``` + +## エラーハンドリング + +### 正規表現コンパイルエラー +```python +# 不正な正規表現の安全な処理 +for rule in rules: + if rule.get("match_mode") == "regex" and rule.get("pattern"): + try: + rule["_re"] = re.compile(rule["pattern"]) + except Exception as e: + print(f"正規表現コンパイルエラー: {rule['pattern']} - {e}") + rule["_re"] = None # 無効化 +``` + +### 不正なトークン構造 +```python +# 必須キーの存在確認 +def validate_token(token): + """トークンの妥当性検証""" + required_keys = ["orig", "kana", "hira", "hepburn"] + + for key in required_keys: + if key not in token: + print(f"警告: トークンに必須キー '{key}' が不足") + token[key] = "" # デフォルト値を設定 + + return token +``` + +## パフォーマンス考慮事項 + +### 効率的な処理 +- インプレース変更によるメモリ効率 +- 優先度ソートによる早期終了 +- 正規表現の事前コンパイル + +### スケーラビリティ +- 大量トークンでの線形処理時間 +- ルール数の増加に対する適切な対応 +- キャッシュ機能の追加可能性 + +## 拡張可能性 + +### ルール形式の拡張 +```python +# より複雑なルール例(将来的な拡張) +complex_rule = { + "name": "multi_condition_rule", + "target": "言", + "conditions": [ + {"direction": "prev", "kana_set": ["オ", "コ"]}, + {"direction": "next", "kana_set": ["ハ", "バ"]} + ], + "operator": "AND", # or "OR" + "actions": { + "all_true": {"kana": "ゴン"}, + "any_true": {"kana": "ゲン"}, + "all_false": {"kana": "イ"} + } +} +``` + +### 動的ルール追加 +```python +def add_runtime_rule(new_rule): + """実行時ルール追加(拡張版)""" + # ルールの検証 + if validate_rule_format(new_rule): + DEFAULT_RULES["rules"].append(new_rule) + return True + return False +``` + +## 依存関係 + +### 必須依存関係 +- `typing`: 型ヒント +- `re`: 正規表現処理 + +### 関連モジュール +- `transliteration_transliterator.py`: メイン転写クラス +- `transliteration_kana_to_hepburn.py`: かな→ヘボン式変換 + +## 注意事項 + +- ルール適用後は`hira`と`hepburn`が空文字列になるため、呼び出し元での再計算が必要 +- 現在のルールは日本語に特化している +- ルール適用順序は優先度に依存するため、適切な設定が重要 +- 正規表現ルールはパフォーマンスに影響する可能性がある + +## 将来の改善点 + +- 外部ルールファイルの読み込み対応 +- より複雑な条件式のサポート +- ルール適用ログ・デバッグ機能 +- 言語別ルールセットの対応 +- パフォーマンス最適化とキャッシュ機能 \ No newline at end of file diff --git a/src-python/docs/details/transliteration_kana_to_hepburn.md b/src-python/docs/details/transliteration_kana_to_hepburn.md new file mode 100644 index 00000000..39e6d042 --- /dev/null +++ b/src-python/docs/details/transliteration_kana_to_hepburn.md @@ -0,0 +1,465 @@ +# transliteration_kana_to_hepburn.py - カタカナ→ヘボン式変換 + +## 概要 + +カタカナ文字列を標準的なヘボン式ローマ字に変換するモジュールです。マクロン(長音記号)対応、外来語音の変換、促音・撥音処理など、日本語のローマ字表記に必要な機能を包括的に提供します。 + +## 主要機能 + +### 標準的なヘボン式変換 +- カタカナ文字の基本ローマ字変換 +- マクロン(ā ī ū ē ō)による長音表現 +- 連続母音表記の選択的対応 + +### 特殊音処理 +- 促音(ッ)の適切な子音重複処理 +- 撥音(ン)のm/n使い分け +- 長音符(ー)の前母音延長処理 + +### 外来語対応 +- シェ(she)、チェ(che)等の組み合わせ +- ヴ音(vu, va, vi, ve, vo)の変換 +- ファ行(fa, fi, fe, fo)の処理 + +## 主要関数 + +### katakana_to_hepburn + +```python +def katakana_to_hepburn(kata: str, use_macron: bool = True) -> str +``` + +カタカナ文字列をヘボン式ローマ字に変換 + +#### パラメータ +- **kata**: 変換対象のカタカナ文字列 +- **use_macron**: マクロン使用フラグ(True=ā ī ū ē ō、False=aa ii uu ee oo) + +#### 戻り値 +- **str**: ヘボン式ローマ字文字列(小文字) + +## 使用方法 + +### 基本的な変換 + +```python +from models.transliteration.transliteration_kana_to_hepburn import katakana_to_hepburn + +# 基本的なカタカナ変換 +result1 = katakana_to_hepburn("カタカナ") +print(result1) # "katakana" + +# 長音のマクロン表記 +result2 = katakana_to_hepburn("コンピューター", use_macron=True) +print(result2) # "konpyūtā" + +# 長音の連続母音表記 +result3 = katakana_to_hepburn("コンピューター", use_macron=False) +print(result3) # "konpyuutaa" +``` + +### 特殊音の処理 + +```python +# 促音(ッ)の処理 +result1 = katakana_to_hepburn("キャッチ") +print(result1) # "kyatchi" + +result2 = katakana_to_hepburn("マッチャ") +print(result2) # "matcha" + +# 撥音(ン)の処理 +result3 = katakana_to_hepburn("ホンバン") # ホン+バン +print(result3) # "homban" (n→m変換) + +result4 = katakana_to_hepburn("ホンテン") # ホン+テン +print(result4) # "honten" (nのまま) +``` + +### 外来語音の変換 + +```python +# 外来語特殊音 +result1 = katakana_to_hepburn("シェア") +print(result1) # "shea" + +result2 = katakana_to_hepburn("チェック") +print(result2) # "chekku" + +result3 = katakana_to_hepburn("ジェット") +print(result3) # "jetto" + +# ヴ音の処理 +result4 = katakana_to_hepburn("ヴァイオリン") +print(result4) # "vaiorin" + +result5 = katakana_to_hepburn("ヴィーナス") +print(result5) # "vīnasu" + +# ファ行の処理 +result6 = katakana_to_hepburn("ファイル") +print(result6) # "fairu" + +result7 = katakana_to_hepburn("フィルム") +print(result7) # "firumu" +``` + +### 長音処理の詳細 + +```python +# 長音符(ー)の処理 +result1 = katakana_to_hepburn("スーパー", use_macron=True) +print(result1) # "sūpā" + +result2 = katakana_to_hepburn("パーティー", use_macron=True) +print(result2) # "pātī" + +# ou → ō の変換(東京型) +result3 = katakana_to_hepburn("トウキョウ", use_macron=True) +print(result3) # "tōkyō" + +# 連続母音表記との比較 +result4 = katakana_to_hepburn("トウキョウ", use_macron=False) +print(result4) # "toukyou" +``` + +### 複雑な組み合わせ + +```python +# 拗音(ゃゅょ)の組み合わせ +test_cases = [ + ("キャンプ", "kyanpu"), + ("シュート", "shūto"), + ("チョコレート", "chokorēto"), + ("ギュウニュウ", "gyūnyū"), + ("リュックサック", "ryukkusakku"), + ("ピョンピョン", "pyonpyon") +] + +for kata, expected in test_cases: + result = katakana_to_hepburn(kata) + print(f"{kata} -> {result}") + # キャンプ -> kyanpu + # シュート -> shūto + # チョコレート -> chokorēto + # ギュウニュウ -> gyūnyū + # リュックサック -> ryukkusakku + # ピョンピョン -> pyonpyon +``` + +## 変換ルール詳細 + +### 基本音対応表 + +```python +base_mapping = { + # 清音 + 'ア':'a', 'イ':'i', 'ウ':'u', 'エ':'e', 'オ':'o', + 'カ':'ka', 'キ':'ki', 'ク':'ku', 'ケ':'ke', 'コ':'ko', + 'サ':'sa', 'シ':'shi', 'ス':'su', 'セ':'se', 'ソ':'so', + 'タ':'ta', 'チ':'chi', 'ツ':'tsu', 'テ':'te', 'ト':'to', + 'ナ':'na', 'ニ':'ni', 'ヌ':'nu', 'ネ':'ne', 'ノ':'no', + 'ハ':'ha', 'ヒ':'hi', 'フ':'fu', 'ヘ':'he', 'ホ':'ho', + 'マ':'ma', 'ミ':'mi', 'ム':'mu', 'メ':'me', 'モ':'mo', + 'ヤ':'ya', 'ユ':'yu', 'ヨ':'yo', + 'ラ':'ra', 'リ':'ri', 'ル':'ru', 'レ':'re', 'ロ':'ro', + 'ワ':'wa', 'ヲ':'wo', 'ン':'n', + + # 濁音・半濁音 + 'ガ':'ga', 'ギ':'gi', 'グ':'gu', 'ゲ':'ge', 'ゴ':'go', + 'ザ':'za', 'ジ':'ji', 'ズ':'zu', 'ゼ':'ze', 'ゾ':'zo', + 'ダ':'da', 'ヂ':'ji', 'ヅ':'zu', 'デ':'de', 'ド':'do', + 'バ':'ba', 'ビ':'bi', 'ブ':'bu', 'ベ':'be', 'ボ':'bo', + 'パ':'pa', 'ピ':'pi', 'プ':'pu', 'ペ':'pe', 'ポ':'po', + + # 特殊音 + 'ヴ':'vu' +} +``` + +### 拗音組み合わせ + +```python +digraphs_mapping = { + # キャ行 + ('キ','ャ'):'kya', ('キ','ュ'):'kyu', ('キ','ョ'):'kyo', + ('ギ','ャ'):'gya', ('ギ','ュ'):'gyu', ('ギ','ョ'):'gyo', + + # シャ行 + ('シ','ャ'):'sha', ('シ','ュ'):'shu', ('シ','ョ'):'sho', + ('ジ','ャ'):'ja', ('ジ','ュ'):'ju', ('ジ','ョ'):'jo', + + # チャ行 + ('チ','ャ'):'cha', ('チ','ュ'):'chu', ('チ','ョ'):'cho', + + # その他の拗音 + ('ニ','ャ'):'nya', ('ヒ','ャ'):'hya', ('ビ','ャ'):'bya', + ('ピ','ャ'):'pya', ('ミ','ャ'):'mya', ('リ','ャ'):'rya', + + # 外来語音(ファ行等) + ('フ','ァ'):'fa', ('フ','ィ'):'fi', ('フ','ェ'):'fe', ('フ','ォ'):'fo', + ('シ','ェ'):'she', ('チ','ェ'):'che', ('テ','ィ'):'ti', + ('ツ','ァ'):'tsa', ('ツ','ィ'):'tsi', ('ツ','ェ'):'tse', ('ツ','ォ'):'tso', + + # ヴ音組み合わせ + ('ヴ','ァ'):'va', ('ヴ','ィ'):'vi', ('ヴ','ェ'):'ve', + ('ヴ','ォ'):'vo', ('ヴ','ュ'):'vyu' +} +``` + +### マクロン変換規則 + +```python +macron_rules = { + 'aa': 'ā', # カア → kā + 'ii': 'ī', # キイ → kī + 'uu': 'ū', # クウ → kū + 'ee': 'ē', # ケエ → kē + 'oo': 'ō', # コオ → kō + 'ou': 'ō' # コウ → kō(東京型長音) +} +``` + +## 特殊処理アルゴリズム + +### 促音(ッ)処理 + +```python +def handle_sokuon(current_pos, kata_string, result_list): + """促音の処理アルゴリズム""" + + # 次の音を確認 + if current_pos + 1 < len(kata_string): + next_kana = kata_string[current_pos + 1] + + # 次の音のローマ字を取得 + next_roman = get_next_roman(next_kana, kata_string[current_pos + 1:]) + + # 子音部分を抽出して重複 + consonant = extract_initial_consonant(next_roman) + if consonant: + result_list.append(consonant[0]) # 先頭子音を重複 + + # 促音自体は消費 + return current_pos + 1 + +# 例: +# マッチャ -> ma + tcha (ッ -> t重複) -> matcha +# キャッチ -> kya + tchi (ッ -> t重複) -> kyatchi +``` + +### 撥音(ン)処理 + +```python +def handle_hatsuon(roman_string): + """撥音のm/n使い分け処理""" + + # n の後に b/p/m が続く場合は m に変換 + import re + + # パターン: n + [bmp] -> m + [bmp] + result = re.sub(r'n(?=[bmp])', 'm', roman_string) + + return result + +# 例: +# ホンバン -> honban -> homban +# サンポ -> sanpo -> sampo +# コンマ -> konma -> komma +# but: ホンテン -> honten (変更なし) +``` + +### 長音符(ー)処理 + +```python +def handle_choonpu(roman_list): + """長音符の前母音延長処理""" + + result = [] + i = 0 + + while i < len(roman_list): + if roman_list[i] == '-': # 長音符マーカー + if i > 0: + prev_char = result[-1] # 直前の文字 + if prev_char in 'aiueo': + # 前が母音なら重複(後でマクロン処理) + result.append(prev_char) + # else: 子音の場合は無視 + else: + result.append(roman_list[i]) + i += 1 + + return result + +# 例: +# スー -> su + - -> suu -> sū (マクロン処理後) +# パーティー -> pa + - + ti + - -> paatii -> pātī +``` + +## 実装例・テストケース + +### 基本テストセット + +```python +def run_basic_tests(): + """基本変換テストセット""" + + test_cases = [ + # 基本音 + ("アイウエオ", "aiueo"), + ("カキクケコ", "kakikukeko"), + ("サシスセソ", "sashisuseso"), + + # 濁音・半濁音 + ("ガギグゲゴ", "gagigugego"), + ("ザジズゼゾ", "zajizuzezo"), + ("バビブベボ", "babibubebo"), + ("パピプペポ", "papipupepo"), + + # 特殊音 + ("シャシュショ", "shashusho"), + ("チャチュチョ", "chachucho"), + ("ジャジュジョ", "jajujo"), + + # 促音 + ("アッパ", "appa"), + ("イッキ", "ikki"), + ("エッサ", "essa"), + + # 撥音 + ("アンパン", "ampan"), + ("コンマ", "komma"), + ("ホンテン", "honten") + ] + + for kata, expected in test_cases: + result = katakana_to_hepburn(kata) + assert result == expected, f"Failed: {kata} -> {result} (expected {expected})" + print(f"✓ {kata} -> {result}") + +run_basic_tests() +``` + +### 外来語テストセット + +```python +def run_foreign_word_tests(): + """外来語変換テストセット""" + + foreign_tests = [ + # ファ行 + ("ファイル", "fairu"), + ("フィルム", "firumu"), + ("フェイス", "feisu"), + ("フォント", "fonto"), + + # シェ・チェ + ("シェア", "shea"), + ("シェル", "sheru"), + ("チェック", "chekku"), + ("チェイン", "chein"), + + # ヴ音 + ("ヴァイオリン", "vaiorin"), + ("ヴィーナス", "vīnasu"), + ("ヴェール", "vēru"), + ("ヴォーカル", "vōkaru"), + + # ティ・トゥ・ドゥ + ("ティー", "tī"), + ("パーティー", "pātī"), + ("トゥー", "tū"), + ("ドゥー", "dū") + ] + + for kata, expected in foreign_tests: + result = katakana_to_hepburn(kata, use_macron=True) + print(f"✓ {kata} -> {result}") + # 実際のexpectedとの比較は実装依存 + +run_foreign_word_tests() +``` + +### 長音テストセット + +```python +def run_long_vowel_tests(): + """長音処理テストセット""" + + long_vowel_tests = [ + # マクロンあり + ("コーヒー", "kōhī", True), + ("スーパー", "sūpā", True), + ("パーティー", "pātī", True), + ("トウキョウ", "tōkyō", True), # ou -> ō + + # マクロンなし + ("コーヒー", "koohii", False), + ("スーパー", "suupaa", False), + ("パーティー", "paatii", False), + ("トウキョウ", "toukyou", False) + ] + + for kata, expected, use_macron in long_vowel_tests: + result = katakana_to_hepburn(kata, use_macron=use_macron) + print(f"✓ {kata} -> {result} (macron={use_macron})") + +run_long_vowel_tests() +``` + +## パフォーマンス考慮事項 + +### 効率的な処理 +- 単一パス処理による高速変換 +- 正規表現の最小限使用 +- 辞書ルックアップの最適化 + +### メモリ効率 +- 文字列連結の最適化 +- 不要な中間オブジェクトの削減 +- 大量テキスト処理への対応 + +## 制限事項・注意点 + +### 変換精度の制限 +- 文脈に依存する読み分けは非対応 +- 固有名詞の特殊読みは非対応 +- 方言・古語の特殊音は非対応 + +### ヘボン式の範囲 +- 標準的なヘボン式に準拠 +- 一部の外来語音は近似変換 +- 撥音の文脈依存ルールは簡略化 + +### 入力制限 +```python +# 適切な入力例 +good_inputs = ["カタカナ", "シャープ", "コンピューター"] + +# 問題のある入力例 +problematic_inputs = [ + "ひらがな", # ひらがな混在(処理されるがそのまま) + "English", # 英字混在(処理されるがそのまま) + "123数字", # 数字混在(処理されるがそのまま) + "", # 空文字列(空文字列を返す) +] + +# 混在入力の処理例 +mixed_result = katakana_to_hepburn("カタカナと英語English") +print(mixed_result) # "katakanaと英語english" +``` + +## 関連モジュール + +- `transliteration_transliterator.py`: メイン転写クラス +- `transliteration_context_rules.py`: 文脈依存ルール +- 外部のひらがな↔カタカナ変換モジュール(必要に応じて) + +## 将来の改善点 + +- 更なる外来語音への対応 +- 文脈依存の読み分け機能 +- パフォーマンス最適化 +- より詳細なヘボン式バリエーション対応 +- 音韻変化ルールの追加 \ No newline at end of file diff --git a/src-python/docs/details/transliteration_transliterator.md b/src-python/docs/details/transliteration_transliterator.md new file mode 100644 index 00000000..73dad537 --- /dev/null +++ b/src-python/docs/details/transliteration_transliterator.md @@ -0,0 +1,659 @@ +# transliteration_transliterator.py - 総合音写・転写システム + +## 概要 + +SudachiPyを利用した日本語のローマ字転写システムのメインクラスです。形態素解析、漢字・送り仮名の分離、文脈依存ルールの適用、ヘボン式変換を統合し、高精度な日本語ローマ字化を提供します。 + +## 主要機能 + +### 統合転写システム +- SudachiPyによる高精度形態素解析 +- 漢字・送り仮名の自動分離処理 +- 文脈依存読み変更ルールの適用 + +### 多層変換処理 +- カタカナ読み取得・分配 +- ひらがな自動変換 +- ヘボン式ローマ字生成 + +### 並行処理対応 +- スレッドセーフなトークナイザー利用 +- ロック機構による安全な並行実行 +- 高負荷環境での安定動作 + +## クラス構造 + +### Transliterator クラス +```python +class Transliterator: + def __init__(self) -> None: + self.tokenizer_obj: tokenizer.Tokenizer + self.mode: tokenizer.Tokenizer.SplitMode + self._tokenizer_lock: threading.Lock +``` + +日本語転写処理の中核クラス + +#### 属性 +- **tokenizer_obj**: SudachiPyトークナイザーインスタンス +- **mode**: 分割モード(SplitMode.C = 最長一致) +- **_tokenizer_lock**: 並行アクセス制御用ミューテックス + +## 主要メソッド + +### analyze + +```python +def analyze(self, text: str, use_macron: bool = False) -> List[Dict[str, Any]] +``` + +テキストを解析して転写情報を生成 + +#### パラメータ +- **text**: 解析対象の日本語テキスト +- **use_macron**: マクロン使用フラグ(長音表記方式) + +#### 戻り値 +- **List[Dict[str, Any]]**: トークン転写情報のリスト + +#### 出力辞書構造 +```python +{ + "orig": str, # 元の文字・文字列 + "kana": str, # カタカナ読み + "hira": str, # ひらがな読み + "hepburn": str # ヘボン式ローマ字 +} +``` + +### split_kanji_okurigana (静的メソッド) + +```python +@staticmethod +def split_kanji_okurigana(surface: str, reading_kana: str, use_macron: bool = True) -> List[Dict[str, str]] +``` + +単語の表層形と読みを漢字・送り仮名ブロックに分割 + +#### パラメータ +- **surface**: 表層形(漢字+ひらがな混在可能) +- **reading_kana**: 全体のカタカナ読み +- **use_macron**: ヘボン式変換でのマクロン使用 + +#### 戻り値 +- **List[Dict[str, str]]**: 分割された部分の転写情報 + +## 補助メソッド + +### is_kanji (静的メソッド) + +```python +@staticmethod +def is_kanji(ch: str) -> bool +``` + +文字が漢字かどうかを判定 + +#### パラメータ +- **ch**: 判定対象文字 + +#### 戻り値 +- **bool**: 漢字判定結果 + +### kata_to_hira (静的メソッド) + +```python +@staticmethod +def kata_to_hira(text: str) -> str +``` + +カタカナをひらがなに変換 + +#### パラメータ +- **text**: 変換対象のカタカナテキスト + +#### 戻り値 +- **str**: ひらがな変換結果 + +## 使用方法 + +### 基本的な転写処理 + +```python +from models.transliteration.transliteration_transliterator import Transliterator + +# 転写システムの初期化 +transliterator = Transliterator() + +# 基本的な文章の転写 +text = "向こうへ行く" +results = transliterator.analyze(text) + +for token in results: + print(f"{token['orig']} -> {token['kana']} -> {token['hira']} -> {token['hepburn']}") + +# 期待される出力例: +# 向こう -> ムコウ -> むこう -> mukou +# へ -> ヘ -> へ -> he +# 行く -> イク -> いく -> iku +``` + +### マクロン使用の長音処理 + +```python +# マクロンを使用した長音表記 +text = "東京に行く" +results_macron = transliterator.analyze(text, use_macron=True) +results_normal = transliterator.analyze(text, use_macron=False) + +print("=== マクロンあり ===") +for token in results_macron: + print(f"{token['orig']} -> {token['hepburn']}") + +print("=== マクロンなし ===") +for token in results_normal: + print(f"{token['orig']} -> {token['hepburn']}") + +# 期待される出力: +# === マクロンあり === +# 東京 -> tōkyō +# に -> ni +# 行く -> iku + +# === マクロンなし === +# 東京 -> toukyou +# に -> ni +# 行く -> iku +``` + +### 複雑な文章の処理 + +```python +# 漢字・ひらがな・カタカナ・英語混在文の処理 +complex_text = "パーティーで美しい花を見る" +results = transliterator.analyze(complex_text, use_macron=True) + +for token in results: + print(f"原文: '{token['orig']}'") + print(f" カナ: {token['kana']}") + print(f" ひら: {token['hira']}") + print(f" ローマ: {token['hepburn']}") + print() + +# 期待される出力: +# 原文: 'パーティー' +# カナ: パーティー +# ひら: ぱーてぃー +# ローマ: pātī +# +# 原文: 'で' +# カナ: デ +# ひら: で +# ローマ: de +# +# 原文: '美しい' +# カナ: ウツクシイ +# ひら: うつくしい +# ローマ: utsukushii +``` + +### 文脈依存ルールの効果確認 + +```python +# 文脈に依存する読み変更の例(「何」の読み分け) +test_cases = [ + "何が好き?", # 何 -> ナニ (後続が「ガ」) + "何度も挑戦", # 何 -> ナン (後続が「ド」) + "何色ありますか?" # 何 -> ナニ (後続が「イ」) +] + +for text in test_cases: + results = transliterator.analyze(text) + + print(f"入力: {text}") + + # 「何」トークンを探して読みを確認 + for token in results: + if token['orig'] == '何': + print(f"「何」の読み: {token['kana']} -> {token['hepburn']}") + break + print() + +# 期待される出力: +# 入力: 何が好き? +# 「何」の読み: ナニ -> nani +# +# 入力: 何度も挑戦 +# 「何」の読み: ナン -> nan +# +# 入力: 何色ありますか? +# 「何」の読み: ナニ -> nani +``` + +### 特殊文字・記号の処理 + +```python +# 記号・英数字混在テキストの処理 +mixed_text = "ID:12345、URL:https://example.com" +results = transliterator.analyze(mixed_text) + +for token in results: + print(f"'{token['orig']}' -> '{token['hepburn']}'") + +# 期待される出力: +# 'ID' -> 'ID' # 英字はそのまま +# ':' -> ':' # 記号はそのまま +# '12345' -> '12345' # 数字はそのまま +# '、' -> '、' # 区切り記号はそのまま +# 'URL' -> 'URL' # 英字はそのまま +``` + +## 内部処理フロー + +### 解析処理パイプライン + +```python +def analyze_pipeline_explained(self, text): + """転写処理パイプラインの詳細説明""" + + # 1. SudachiPy形態素解析 + with self._tokenizer_lock: + tokens = self.tokenizer_obj.tokenize(text, self.mode) + + results = [] + + # 2. 各トークンの処理 + for token in tokens: + surface = token.surface() # 表層形 + reading = token.reading_form() # 読み(カタカナ) + pos = token.part_of_speech() # 品詞情報 + + # 3. 記号・空白の特別処理 + if pos and pos[0] in ["記号", "補助記号", "空白"]: + reading = surface # 記号は表層形をそのまま使用 + + # 4. 表層形と読みが同じ場合(ひらがな・記号等) + if surface == reading: + results.append({ + "orig": surface, + "kana": reading, + "hira": surface, # そのまま + "hepburn": surface # そのまま + }) + continue + + # 5. 単一文字の処理 + if len(surface) == 1: + results.append({ + "orig": surface, + "kana": reading, + "hira": self.kata_to_hira(reading), + "hepburn": katakana_to_hepburn(reading, use_macron) + }) + else: + # 6. 複数文字の漢字・送り仮名分離 + parts = self.split_kanji_okurigana(surface, reading, use_macron) + results.extend(parts) + + # 7. 文脈依存ルールの適用 + try: + results = apply_context_rules(results, use_macron) or results + except Exception: + pass # ルール適用失敗時は元の結果を使用 + + # 8. ルール適用後の再計算 + for entry in results: + kana = entry.get("kana", "") + if kana: + entry["hira"] = self.kata_to_hira(kana) + entry["hepburn"] = katakana_to_hepburn(kana, use_macron) + + return results +``` + +### 漢字・送り仮名分離アルゴリズム + +```python +def split_algorithm_explained(surface, reading_kana): + """分離アルゴリズムの詳細説明""" + + # 1. 表層形のブロック分割 + blocks = [] + current_block = "" + prev_is_kanji = None + + for char in surface: + is_kanji = Transliterator.is_kanji(char) + + if prev_is_kanji is None or is_kanji == prev_is_kanji: + # 同じタイプの文字は同じブロックに + current_block += char + else: + # タイプが変わったら新しいブロック + blocks.append((prev_is_kanji, current_block)) + current_block = char + + prev_is_kanji = is_kanji + + if current_block: + blocks.append((prev_is_kanji, current_block)) + + # 例: "向こう" -> [(True, "向"), (False, "こう")] + # "行く" -> [(True, "行"), (False, "く")] + + # 2. 読みの分配 + kana_len = len(reading_kana) + + # 初期割当: 各ブロックの文字数に比例 + allocations = [len(block_text) for _, block_text in blocks] + allocated_total = sum(allocations) + remaining = kana_len - allocated_total + + # 3. 余った読みの分配(漢字ブロック優先) + if remaining > 0: + # まず漢字ブロックに分配 + for i, (is_kanji, _) in enumerate(blocks): + if remaining <= 0: + break + if is_kanji: + allocations[i] += 1 + remaining -= 1 + + # まだ余りがある場合は左から順に分配 + i = 0 + while remaining > 0 and len(blocks) > 0: + allocations[i] += 1 + remaining -= 1 + i = (i + 1) % len(blocks) + + # 4. 読みが不足している場合は右から削減 + if remaining < 0: + need_to_remove = -remaining + i = len(blocks) - 1 + + while need_to_remove > 0 and i >= 0: + can_remove = max(0, allocations[i] - 1) + remove_amount = min(can_remove, need_to_remove) + allocations[i] -= remove_amount + need_to_remove -= remove_amount + i -= 1 + + # 5. 最終的な読み分配 + pos = 0 + result = [] + + for (is_kanji, block_text), allocation in zip(blocks, allocations): + block_reading = reading_kana[pos:pos + allocation] + pos += allocation + + result.append({ + "orig": block_text, + "kana": block_reading, + "hira": Transliterator.kata_to_hira(block_reading), + "hepburn": katakana_to_hepburn(block_reading, use_macron) + }) + + return result +``` + +## 並行処理・スレッドセーフティ + +### ロック機構 + +```python +class ThreadSafeUsage: + """スレッドセーフな使用例""" + + def __init__(self): + self.transliterator = Transliterator() + + def process_texts_concurrently(self, texts): + """複数テキストの並行処理""" + import concurrent.futures + + def process_single(text): + return self.transliterator.analyze(text) + + with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: + # 内部のロック機構により安全に並行実行 + futures = [executor.submit(process_single, text) for text in texts] + results = [f.result() for f in futures] + + return results + +# 使用例 +processor = ThreadSafeUsage() +texts = ["東京に行く", "大阪で食事", "名古屋を観光", "福岡に宿泊"] +results = processor.process_texts_concurrently(texts) + +for i, result in enumerate(results): + print(f"テキスト{i+1}: {texts[i]}") + for token in result: + print(f" {token['orig']} -> {token['hepburn']}") +``` + +### パフォーマンス考慮事項 + +```python +# 大量テキスト処理のベストプラクティス +def efficient_batch_processing(texts, batch_size=100): + """効率的なバッチ処理""" + + transliterator = Transliterator() + results = [] + + for i in range(0, len(texts), batch_size): + batch = texts[i:i + batch_size] + + batch_results = [] + for text in batch: + # 各テキストを個別に処理(ロック制御あり) + result = transliterator.analyze(text) + batch_results.append(result) + + results.extend(batch_results) + + # バッチ間で少し休憩(メモリ管理) + if len(results) % 1000 == 0: + print(f"処理済み: {len(results)} テキスト") + + return results +``` + +## エラーハンドリング + +### 例外処理 + +```python +def safe_analyze(text): + """安全な解析処理""" + + transliterator = Transliterator() + + try: + results = transliterator.analyze(text) + return results, None + + except RuntimeError as e: + if "Already borrowed" in str(e): + # SudachiPyの並行アクセスエラー + print("並行アクセスエラーが発生しました。リトライします。") + return None, "RETRY_NEEDED" + else: + print(f"実行時エラー: {e}") + return None, "RUNTIME_ERROR" + + except Exception as e: + print(f"予期しないエラー: {e}") + return None, "UNKNOWN_ERROR" + +# 使用例(リトライ機構付き) +def analyze_with_retry(text, max_retries=3): + """リトライ機構付き解析""" + + for attempt in range(max_retries): + results, error = safe_analyze(text) + + if results is not None: + return results + + if error == "RETRY_NEEDED": + print(f"リトライ {attempt + 1}/{max_retries}") + import time + time.sleep(0.1 * (attempt + 1)) # 指数バックオフ + continue + else: + break + + # 全てのリトライが失敗した場合のフォールバック + print("解析に失敗しました。フォールバック処理を実行します。") + return [{"orig": text, "kana": text, "hira": text, "hepburn": text}] +``` + +## 設定・カスタマイズ + +### SudachiPy設定 + +```python +# カスタムSudachiPy設定での初期化 +class CustomTransliterator(Transliterator): + def __init__(self, dict_type="full", split_mode="C"): + """カスタム設定での初期化""" + + # 辞書タイプの選択 + dict_types = { + "small": dictionary.Dictionary.create(dict_type="small"), + "core": dictionary.Dictionary.create(dict_type="core"), + "full": dictionary.Dictionary.create(dict_type="full") + } + + self.tokenizer_obj = dict_types.get(dict_type, dict_types["full"]) + + # 分割モードの選択 + split_modes = { + "A": tokenizer.Tokenizer.SplitMode.A, # 短い単位 + "B": tokenizer.Tokenizer.SplitMode.B, # 中間単位 + "C": tokenizer.Tokenizer.SplitMode.C # 長い単位(デフォルト) + } + + self.mode = split_modes.get(split_mode, split_modes["C"]) + self._tokenizer_lock = threading.Lock() + +# 使用例 +# 短い単位での分割を使用 +small_unit_transliterator = CustomTransliterator(dict_type="core", split_mode="A") + +text = "取り敢えず検索してみる" +results = small_unit_transliterator.analyze(text) + +for token in results: + print(f"{token['orig']} -> {token['hepburn']}") +``` + +## テスト・デバッグ + +### 包括的テストセット + +```python +def run_comprehensive_tests(): + """包括的な機能テスト""" + + transliterator = Transliterator() + + test_cases = [ + # 基本的な文章 + ("向こうへ行く", "向こう", "ムコウ"), + ("美しい花", "美しい", "ウツクシイ"), + + # 文脈依存 + ("何度も", "何", "ナン"), + ("何が", "何", "ナニ"), + + # 外来語 + ("パーティー", "パーティー", "パーティー"), + ("コンピューター", "コンピューター", "コンピューター"), + + # 漢字・送り仮名 + ("取り敢えず", "取り", "トリ"), + ("見知らぬ", "見知ら", "ミシラ"), + + # 記号・英数字 + ("ID:12345", "ID", "ID"), + ("SessionIDを取得", "SessionID", "SessionID") + ] + + for text, target_orig, expected_kana in test_cases: + results = transliterator.analyze(text) + + # 対象トークンを検索 + target_token = None + for token in results: + if token['orig'] == target_orig: + target_token = token + break + + if target_token: + actual_kana = target_token['kana'] + status = "✓" if actual_kana == expected_kana else "✗" + print(f"{status} {text}: {target_orig} -> {actual_kana} (期待値: {expected_kana})") + else: + print(f"✗ {text}: トークン '{target_orig}' が見つかりません") + +run_comprehensive_tests() +``` + +## 依存関係 + +### 必須依存関係 +- `sudachipy`: 形態素解析エンジン +- `threading`: 並行制御 +- `typing`: 型ヒント + +### 内部モジュール依存 +- `transliteration_kana_to_hepburn`: ヘボン式変換 +- `transliteration_context_rules`: 文脈依存ルール + +### システム要件 +- Python 3.7以上 +- SudachiPy辞書ファイル(自動ダウンロード) +- 十分なメモリ(辞書読み込み用) + +## 注意事項・制限 + +### 処理精度の制限 +- 形態素解析結果に依存 +- 未知語・固有名詞は読み推定 +- 文脈によっては不正確な分割 + +### パフォーマンス制限 +- 初回実行時の辞書読み込み時間 +- 大量テキスト処理時のメモリ使用量 +- 並行アクセス時のロック待機 + +### 出力形式の制限 +```python +# 現在サポートしていない機能 +unsupported_features = [ + "アクセント記号(音調)", + "方言・古語の特殊読み", + "人名・地名の特殊読み", + "外国語の音写(中国語・韓国語等)", + "カスタム読み辞書", + "品詞情報の出力" +] +``` + +## 関連モジュール + +- `transliteration_kana_to_hepburn.py`: ヘボン式変換処理 +- `transliteration_context_rules.py`: 文脈依存ルール適用 +- `config.py`: システム設定管理 +- `utils.py`: ユーティリティ関数 + +## 将来の改善点 + +- カスタム読み辞書対応 +- より高精度な文脈解析 +- 他言語音写システムとの統合 +- リアルタイム処理最適化 +- 分散処理対応 \ No newline at end of file diff --git a/src-python/docs/details/utils.md b/src-python/docs/details/utils.md new file mode 100644 index 00000000..e924bb3a --- /dev/null +++ b/src-python/docs/details/utils.md @@ -0,0 +1,213 @@ +# utils.py - ユーティリティ関数モジュール + +## 概要 + +VRCTアプリケーション全体で使用される汎用的なユーティリティ関数を提供するモジュールです。データ検証、ネットワーク接続確認、計算デバイス管理、ログ機能などの共通機能を集約しています。 + +## 主要機能 + +### データ検証機能 + +- 辞書構造の型安全な検証 +- IPアドレス形式の検証 +- WebSocketサーバーの可用性確認 + +### システム情報取得 + +- 利用可能な計算デバイス(CPU/CUDA)の一覧取得 +- 最適な計算タイプの自動選択 +- デバイス固有の制約対応 + +### ログ機能 + +- 構造化ログの出力 +- ローテーションログファイル管理 +- エラーログとプロセスログの分離 + +### ネットワーク機能 + +- インターネット接続状態の確認 +- Base64エンコード/デコード処理 + +## 主要関数 + +### データ検証 + +```python +validateDictStructure(data: dict, structure: dict) -> bool +``` + +- 辞書とその期待される構造が完全に一致するかを判別 +- 入れ子構造にも対応 +- 型安全性を保証 + +### ネットワーク関連 + +```python +isConnectedNetwork(url="http://www.google.com", timeout=3) -> bool +``` + +- 指定URLへの接続可能性をチェック +- タイムアウト設定可能 + +```python +isAvailableWebSocketServer(host: str, port: int) -> bool +``` + +- WebSocketサーバーのバインド可能性を確認 + +```python +isValidIpAddress(ip_address: str) -> bool +``` + +- IPv4/IPv6アドレスの有効性を検証 + +### 計算デバイス管理 + +```python +getComputeDeviceList() -> List[Dict[str, Any]] +``` + +- 利用可能なCPU/CUDA計算デバイスの一覧を取得 +- 各デバイスの計算タイプを含む詳細情報を提供 + +```python +getBestComputeType(device: str, device_index: int) -> str +``` + +- デバイスに最適な計算タイプを自動選択 +- GPU固有の制約を考慮(GTX、RTX、Tesla、A100、Quadro等) + +### ログ機能 + +```python +setupLogger(name: str, log_file: str, level: int = logging.INFO) -> logging.Logger +``` + +- ローテーション機能付きログの設定 +- 10MBサイズでのローテーション +- UTF-8エンコード対応 + +```python +printLog(log: str, data: Any = None) -> None +``` + +- 構造化プロセスログの出力 +- JSON形式での標準出力 + +```python +printResponse(status: int, endpoint: str, result: Any = None) -> None +``` + +- APIレスポンスの構造化出力 +- シリアライゼーションエラーの安全な処理 + +```python +errorLogging() -> None +``` + +- 例外トレースバックのログ記録 +- フォールバック機能付き + +### その他のユーティリティ + +```python +encodeBase64(data: str) -> Dict[str, Any] +``` + +- Base64エンコード済みJSON文字列のデコード +- エラー処理付き + +```python +removeLog() -> None +``` + +- プロセスログファイルの初期化 + +## 使用方法 + +### 基本的な使い方 + +```python +from utils import validateDictStructure, isConnectedNetwork, printLog + +# 辞書構造の検証 +expected_structure = {"name": str, "age": int} +data = {"name": "test", "age": 25} +is_valid = validateDictStructure(data, expected_structure) + +# ネットワーク接続確認 +is_connected = isConnectedNetwork() + +# ログ出力 +printLog("処理開始", {"user_id": 123}) +``` + +### 計算デバイス管理 + +```python +from utils import getComputeDeviceList, getBestComputeType + +# 利用可能デバイス一覧 +devices = getComputeDeviceList() + +# 最適な計算タイプ選択 +compute_type = getBestComputeType("cuda", 0) +``` + +### ログ設定 + +```python +from utils import setupLogger, errorLogging + +# カスタムログの設定 +logger = setupLogger("my_module", "my_module.log") +logger.info("処理完了") + +# エラーログ記録 +try: + # 何らかの処理 + pass +except Exception: + errorLogging() +``` + +## 依存関係 + +### 必須依存関係 + +- `json`: JSON処理 +- `logging`: ログ機能 +- `requests`: HTTP通信 +- `ipaddress`: IPアドレス検証 +- `socket`: ソケット通信 + +### オプション依存関係 + +- `torch`: CUDA計算デバイス情報取得 +- `ctranslate2`: 計算タイプ情報取得 + +## デバイス別計算タイプ制約 + +### GTXシリーズ +- サポート: `float32`のみ +- 理由: 古いアーキテクチャによる制約 + +### RTX/Tesla/A100/Quadroシリーズ +- サポート: フル機能 +- 優先順位: `int8_bfloat16` > `int8_float16` > `int8` > `bfloat16` > `float16` > `int8_float32` > `float32` + +### CPU +- サポート: 全計算タイプ(ハードウェア依存) + +## エラーハンドリング + +- すべての関数は例外安全性を考慮 +- オプション依存関係の欠如に対する適切なフォールバック +- ログ機能は多段階のフェールセーフ機構を持つ + +## 注意事項 + +- 計算デバイス情報取得は初回実行時にやや時間がかかる場合がある +- ログローテーションは10MBサイズで自動実行 +- ネットワーク接続確認はデフォルト3秒のタイムアウト設定 \ No newline at end of file diff --git a/src-python/docs/details/watchdog.md b/src-python/docs/details/watchdog.md new file mode 100644 index 00000000..e574b882 --- /dev/null +++ b/src-python/docs/details/watchdog.md @@ -0,0 +1,670 @@ +# watchdog.py - 軽量監視システム + +## 概要 + +タイムアウトベースの軽量監視(ウォッチドッグ)システムです。定期的な"餌やり"(feed)により正常動作を確認し、指定時間内に餌やりがない場合にコールバック関数を実行する単純で効果的な監視機構を提供します。 + +## 主要機能 + +### タイムアウト監視 +- 最後の餌やり時刻からの経過時間監視 +- 設定可能なタイムアウト閾値 +- タイムアウト時の自動コールバック実行 + +### 柔軟な実行モード +- 単発チェック(手動呼び出し) +- バックグラウンドスレッド実行 +- カスタムチェック間隔設定 + +### 防御的設計 +- コールバック例外の隔離処理 +- スレッドセーフな制御機構 +- 適切なリソース管理 + +## クラス構造 + +### Watchdog クラス + +```python +class Watchdog: + def __init__(self, timeout: int = 60, interval: int = 20) -> None: + self.timeout: int # タイムアウト秒数 + self.interval: int # チェック間隔秒数 + self.last_feed_time: float # 最後の餌やり時刻 + self.callback: Optional[Callable] # タイムアウト時コールバック + self._thread: Optional[Thread] # バックグラウンドスレッド + self._stop_event: Optional[Event] # 停止イベント +``` + +軽量ウォッチドッグの中核クラス + +#### パラメータ +- **timeout**: 餌やりなしでタイムアウトするまでの秒数 +- **interval**: 監視チェックの推奨間隔秒数 + +## 主要メソッド + +### 基本制御 + +```python +def feed(self) -> None +``` + +ウォッチドッグに餌やりを行い、タイマーをリセット + +```python +def setCallback(self, callback: Callable[[], None]) -> None +``` + +タイムアウト時に実行するコールバック関数を設定 + +#### パラメータ +- **callback**: 引数なしの呼び出し可能オブジェクト + +### 監視実行 + +```python +def start(self) -> None +``` + +単発のウォッチドッグチェックを実行し、間隔秒数だけスリープ + +```python +def start_in_thread(self, daemon: bool = True) -> None +``` + +バックグラウンドスレッドでウォッチドッグを開始 + +#### パラメータ +- **daemon**: デーモンスレッドとして実行するかのフラグ + +```python +def stop(self, timeout: Optional[float] = None) -> None +``` + +バックグラウンドスレッドを停止 + +#### パラメータ +- **timeout**: スレッド終了待機のタイムアウト秒数 + +## 使用方法 + +### 基本的な監視システム + +```python +from models.watchdog.watchdog import Watchdog +import time + +def on_timeout(): + """タイムアウト時の処理""" + print("警告: システムの応答がありません!") + # ログ出力、アラート送信、復旧処理等 + +# ウォッチドッグの初期化 +watchdog = Watchdog(timeout=30, interval=10) # 30秒でタイムアウト、10秒間隔 +watchdog.setCallback(on_timeout) + +# バックグラウンドで監視開始 +watchdog.start_in_thread(daemon=True) + +# メインプロセスのシミュレーション +for i in range(10): + print(f"処理中... {i}") + + # 正常な処理では定期的に餌やり + if i % 3 == 0: # 3回に1回餌やり + watchdog.feed() + print("ウォッチドッグに餌やりしました") + + time.sleep(5) + +# 監視停止 +watchdog.stop() +``` + +### 手動チェックモード + +```python +# 手動でウォッチドッグをチェック +def manual_monitoring_example(): + watchdog = Watchdog(timeout=60, interval=5) + + def system_failure_handler(): + print("システム障害を検出しました") + # 復旧処理、通知等 + + watchdog.setCallback(system_failure_handler) + + # メインループ内で定期チェック + while True: + # 何らかの重要な処理 + process_critical_work() + + # 処理が正常なら餌やり + if is_system_healthy(): + watchdog.feed() + + # 監視チェック実行(5秒間隔でスリープ) + watchdog.start() + +def process_critical_work(): + """重要な処理のシミュレーション""" + time.sleep(2) + +def is_system_healthy(): + """システム正常性チェックのシミュレーション""" + import random + return random.random() > 0.2 # 80%の確率で正常 + +# manual_monitoring_example() +``` + +### プロセス監視システム + +```python +class ProcessMonitor: + """外部プロセス監視システム""" + + def __init__(self, process_name, check_interval=30): + self.process_name = process_name + self.watchdog = Watchdog(timeout=60, interval=check_interval) + self.watchdog.setCallback(self.on_process_timeout) + self.monitoring = False + + def on_process_timeout(self): + """プロセス応答タイムアウト時の処理""" + print(f"警告: プロセス {self.process_name} が応答しません") + + # プロセス存在確認 + if self.is_process_running(): + print("プロセスは実行中ですが応答なし。再起動を試行します。") + self.restart_process() + else: + print("プロセスが停止しています。再起動します。") + self.start_process() + + def is_process_running(self): + """プロセス実行状態確認""" + import psutil + for proc in psutil.process_iter(['name']): + if proc.info['name'] == self.process_name: + return True + return False + + def start_process(self): + """プロセス起動""" + print(f"プロセス {self.process_name} を起動中...") + # 実際の起動処理 + + def restart_process(self): + """プロセス再起動""" + print(f"プロセス {self.process_name} を再起動中...") + # 実際の再起動処理 + + def feed_watchdog(self): + """外部から呼び出される餌やりメソッド""" + self.watchdog.feed() + + def start_monitoring(self): + """監視開始""" + self.monitoring = True + self.watchdog.start_in_thread(daemon=True) + print(f"プロセス {self.process_name} の監視を開始しました") + + def stop_monitoring(self): + """監視停止""" + self.monitoring = False + self.watchdog.stop() + print(f"プロセス {self.process_name} の監視を停止しました") + +# 使用例 +vrchat_monitor = ProcessMonitor("VRChat.exe", check_interval=15) +vrchat_monitor.start_monitoring() + +# VRChatプロセスが正常に動作している時の餌やり +# (実際にはVRChatからのOSC通信等で判定) +for _ in range(20): + time.sleep(10) + + if vrchat_monitor.is_process_running(): + vrchat_monitor.feed_watchdog() + print("VRChat正常動作確認") + +vrchat_monitor.stop_monitoring() +``` + +### ネットワーク監視システム + +```python +class NetworkWatchdog: + """ネットワーク接続監視""" + + def __init__(self, target_host="8.8.8.8", timeout=45): + self.target_host = target_host + self.watchdog = Watchdog(timeout=timeout, interval=15) + self.watchdog.setCallback(self.on_network_timeout) + self.last_ping_success = True + + def on_network_timeout(self): + """ネットワークタイムアウト処理""" + print("ネットワーク接続に問題があります") + + # 複数ホストでの確認 + test_hosts = ["8.8.8.8", "1.1.1.1", "google.com"] + + for host in test_hosts: + if self.ping_host(host): + print(f"{host} への接続は正常です") + self.watchdog.feed() # 1つでも成功したら復旧とみなす + return + + print("すべてのホストへの接続に失敗。ネットワーク設定を確認してください。") + self.handle_network_failure() + + def ping_host(self, host): + """ホストへのping確認""" + import subprocess + import platform + + # プラットフォームに応じたpingコマンド + if platform.system().lower() == "windows": + cmd = ["ping", "-n", "1", "-w", "3000", host] + else: + cmd = ["ping", "-c", "1", "-W", "3", host] + + try: + result = subprocess.run(cmd, capture_output=True, timeout=5) + return result.returncode == 0 + except (subprocess.TimeoutExpired, Exception): + return False + + def handle_network_failure(self): + """ネットワーク障害時の処理""" + print("ネットワーク障害対応処理を実行中...") + # DNS設定リセット、ネットワークアダプター再起動等 + + def check_network_continuously(self): + """継続的なネットワーク監視""" + self.watchdog.start_in_thread(daemon=True) + + while True: + if self.ping_host(self.target_host): + if not self.last_ping_success: + print("ネットワーク接続が復旧しました") + + self.watchdog.feed() + self.last_ping_success = True + else: + if self.last_ping_success: + print("ネットワーク接続に問題が発生しました") + + self.last_ping_success = False + + time.sleep(10) + +# 使用例 +network_monitor = NetworkWatchdog(target_host="google.com", timeout=60) + +# バックグラウンドでネットワーク監視 +import threading +monitor_thread = threading.Thread(target=network_monitor.check_network_continuously, daemon=True) +monitor_thread.start() + +# メインプログラムの実行 +print("ネットワーク監視システム開始...") +time.sleep(120) # 2分間監視 + +network_monitor.watchdog.stop() +``` + +### システムリソース監視 + +```python +class SystemResourceWatchdog: + """システムリソース監視""" + + def __init__(self): + self.cpu_watchdog = Watchdog(timeout=300, interval=30) # CPU使用率監視 + self.memory_watchdog = Watchdog(timeout=180, interval=20) # メモリ監視 + + self.cpu_watchdog.setCallback(self.on_cpu_overload) + self.memory_watchdog.setCallback(self.on_memory_pressure) + + self.cpu_threshold = 80.0 # CPU使用率閾値(%) + self.memory_threshold = 85.0 # メモリ使用率閾値(%) + + def on_cpu_overload(self): + """CPU過負荷時の処理""" + print("警告: CPU使用率が長時間高い状態です") + self.optimize_cpu_usage() + + def on_memory_pressure(self): + """メモリ圧迫時の処理""" + print("警告: メモリ使用率が危険なレベルです") + self.free_memory() + + def get_cpu_usage(self): + """CPU使用率取得""" + import psutil + return psutil.cpu_percent(interval=1) + + def get_memory_usage(self): + """メモリ使用率取得""" + import psutil + return psutil.virtual_memory().percent + + def optimize_cpu_usage(self): + """CPU使用率最適化""" + print("CPU最適化処理を実行中...") + # 低優先度プロセスの特定・制限 + # 不要なバックグラウンドタスクの停止等 + + def free_memory(self): + """メモリ解放処理""" + print("メモリ解放処理を実行中...") + # ガベージコレクション実行 + import gc + gc.collect() + # キャッシュクリア等 + + def monitor_resources(self): + """リソース監視メインループ""" + self.cpu_watchdog.start_in_thread(daemon=True) + self.memory_watchdog.start_in_thread(daemon=True) + + while True: + # CPU使用率チェック + cpu_usage = self.get_cpu_usage() + if cpu_usage < self.cpu_threshold: + self.cpu_watchdog.feed() + + # メモリ使用率チェック + memory_usage = self.get_memory_usage() + if memory_usage < self.memory_threshold: + self.memory_watchdog.feed() + + print(f"CPU: {cpu_usage:.1f}%, メモリ: {memory_usage:.1f}%") + time.sleep(5) + + def stop_monitoring(self): + """監視停止""" + self.cpu_watchdog.stop() + self.memory_watchdog.stop() + +# 使用例 +resource_monitor = SystemResourceWatchdog() + +# リソース監視開始 +import threading +resource_thread = threading.Thread(target=resource_monitor.monitor_resources, daemon=True) +resource_thread.start() + +# しばらく監視 +time.sleep(60) + +resource_monitor.stop_monitoring() +``` + +## 高度な使用パターン + +### 多段階監視システム + +```python +class MultilevelWatchdog: + """多段階警告レベル対応ウォッチドッグ""" + + def __init__(self): + # 異なるタイムアウトレベル + self.warning_watchdog = Watchdog(timeout=30, interval=10) # 警告レベル + self.critical_watchdog = Watchdog(timeout=60, interval=15) # 危険レベル + self.emergency_watchdog = Watchdog(timeout=120, interval=20) # 緊急レベル + + # 各レベルのコールバック設定 + self.warning_watchdog.setCallback(self.on_warning) + self.critical_watchdog.setCallback(self.on_critical) + self.emergency_watchdog.setCallback(self.on_emergency) + + self.alert_level = "normal" + + def on_warning(self): + """警告レベルのコールバック""" + self.alert_level = "warning" + print("⚠️ 警告: システムの応答が遅くなっています") + # 軽度の対応処理 + + def on_critical(self): + """危険レベルのコールバック""" + self.alert_level = "critical" + print("🔴 危険: システムの重大な問題を検出") + # 中程度の復旧処理 + + def on_emergency(self): + """緊急レベルのコールバック""" + self.alert_level = "emergency" + print("🚨 緊急: システムが完全に応答停止") + # 強制復旧・再起動処理 + + def feed_all(self): + """すべてのウォッチドッグに餌やり""" + self.warning_watchdog.feed() + self.critical_watchdog.feed() + self.emergency_watchdog.feed() + + if self.alert_level != "normal": + print("✅ システム復旧確認") + self.alert_level = "normal" + + def start_monitoring(self): + """多段階監視開始""" + self.warning_watchdog.start_in_thread(daemon=True) + self.critical_watchdog.start_in_thread(daemon=True) + self.emergency_watchdog.start_in_thread(daemon=True) + + def stop_monitoring(self): + """監視停止""" + self.warning_watchdog.stop() + self.critical_watchdog.stop() + self.emergency_watchdog.stop() + +# 使用例 +multilevel_monitor = MultilevelWatchdog() +multilevel_monitor.start_monitoring() + +# 正常時は定期餌やり、異常時は餌やり停止で段階的警告 +for i in range(30): + time.sleep(5) + + # 時々餌やりを忘れるシミュレーション + if i % 7 != 0: # 7回に1回は餌やりしない + multilevel_monitor.feed_all() + + print(f"現在の警告レベル: {multilevel_monitor.alert_level}") + +multilevel_monitor.stop_monitoring() +``` + +## エラーハンドリング・防御機構 + +### 例外安全性 + +```python +def safe_callback_example(): + """安全なコールバック実装例""" + + def potentially_failing_callback(): + """例外を起こす可能性があるコールバック""" + print("重要な処理を実行中...") + + # 何らかの理由で例外が発生 + raise RuntimeError("処理中にエラーが発生しました") + + # ウォッチドッグは例外を隔離するため、システム全体は継続動作 + watchdog = Watchdog(timeout=10, interval=5) + watchdog.setCallback(potentially_failing_callback) + + # エラーが発生してもウォッチドッグ自体は停止しない + watchdog.start_in_thread(daemon=True) + + # 通常の処理継続 + for i in range(5): + time.sleep(3) + watchdog.feed() + print(f"メイン処理継続中: {i}") + + watchdog.stop() + print("ウォッチドッグは例外に関係なく正常に停止しました") + +safe_callback_example() +``` + +### 堅牢なスレッド制御 + +```python +class RobustWatchdog: + """より堅牢なウォッチドッグ実装""" + + def __init__(self, timeout=60, interval=20): + self.base_watchdog = Watchdog(timeout, interval) + self.restart_count = 0 + self.max_restarts = 3 + + def robust_callback(self): + """自動復旧機能付きコールバック""" + try: + print(f"問題検出(再起動回数: {self.restart_count}/{self.max_restarts})") + + if self.restart_count < self.max_restarts: + self.attempt_recovery() + self.restart_count += 1 + else: + print("最大再起動回数に達しました。管理者に連絡してください。") + self.emergency_shutdown() + + except Exception as e: + print(f"復旧処理中にエラー: {e}") + + def attempt_recovery(self): + """復旧処理の試行""" + print("自動復旧処理を実行中...") + time.sleep(2) # 復旧処理のシミュレーション + + # 復旧成功時はカウンターリセット + if self.check_system_health(): + self.restart_count = 0 + self.base_watchdog.feed() + print("復旧成功") + else: + print("復旧失敗") + + def check_system_health(self): + """システム正常性確認""" + # 実際のヘルスチェックロジック + import random + return random.random() > 0.3 # 70%成功率 + + def emergency_shutdown(self): + """緊急停止処理""" + print("緊急停止処理を実行します") + # 安全な停止処理 + + def start_robust_monitoring(self): + """堅牢監視開始""" + self.base_watchdog.setCallback(self.robust_callback) + self.base_watchdog.start_in_thread(daemon=True) + + def feed(self): + """餌やり(成功時はカウンターリセット)""" + self.base_watchdog.feed() + self.restart_count = 0 + + def stop(self): + """監視停止""" + self.base_watchdog.stop() + +# 使用例 +robust_monitor = RobustWatchdog(timeout=20, interval=8) +robust_monitor.start_robust_monitoring() + +# 不安定なシステムのシミュレーション +for i in range(15): + time.sleep(3) + + # 時々システム異常をシミュレート + if random.random() > 0.7: # 30%の確率で異常 + print("システム異常発生...") + # 餌やりしない + else: + robust_monitor.feed() + print("システム正常動作") + +robust_monitor.stop() +``` + +## 性能・リソース考慮事項 + +### 軽量設計の特徴 +- 最小限のメモリフットプリント +- 効率的なスレッド利用 +- CPU使用量の最適化 + +### 推奨設定値 +```python +# 用途別推奨設定 +usage_patterns = { + "realtime_monitoring": { + "timeout": 10, # 10秒 + "interval": 2 # 2秒間隔 + }, + "service_monitoring": { + "timeout": 60, # 1分 + "interval": 15 # 15秒間隔 + }, + "batch_processing": { + "timeout": 300, # 5分 + "interval": 60 # 1分間隔 + }, + "background_tasks": { + "timeout": 1800, # 30分 + "interval": 300 # 5分間隔 + } +} +``` + +## 依存関係・要件 + +### 必須依存関係 +- `threading`: スレッド制御 +- `time`: 時刻管理 +- 標準ライブラリのみ(外部依存なし) + +### システム要件 +- Python 3.7以上 +- マルチスレッド対応OS +- 最小限のシステムリソース + +## 注意事項・制限 + +### 設計上の制限 +- 単純なタイムアウトベース監視のみ +- 複雑な条件判定は非対応 +- ネットワーク監視等は上位層で実装 + +### 使用上の注意 +- コールバック関数は軽量に保つ +- 長時間ブロックする処理は避ける +- 適切なタイムアウト値の設定が重要 + +## 関連モジュール + +- `threading`: スレッド管理 +- `config.py`: 監視設定管理 +- `utils.py`: エラーログ・ユーティリティ +- `controller.py`: 監視制御インターフェース + +## 将来の改善点 + +- より複雑な監視条件のサポート +- 監視統計・メトリクス収集機能 +- 設定可能な復旧戦略 +- 分散監視システムとの連携 +- Webインターフェースでの監視状態表示 \ No newline at end of file diff --git a/src-python/docs/details/websocket_server.md b/src-python/docs/details/websocket_server.md new file mode 100644 index 00000000..2c1d18e9 --- /dev/null +++ b/src-python/docs/details/websocket_server.md @@ -0,0 +1,989 @@ +# websocket_server.py - WebSocket通信サーバー + +## 概要 + +非同期WebSocket通信を提供する包括的なサーバーシステムです。クライアント接続管理、メッセージ配信、外部スレッドからの安全な操作を統合し、VRCTアプリケーションとWebフロントエンド間のリアルタイム通信を実現します。 + +## 主要機能 + +### 非同期WebSocket通信 +- asyncio/websockets による高性能WebSocketサーバー +- 複数クライアント同時接続対応 +- 自動接続・切断管理 + +### メッセージング機能 +- リアルタイムメッセージ受信処理 +- 全クライアントへのブロードキャスト配信 +- カスタムメッセージハンドラー対応 + +### スレッド間通信 +- GUI等の外部スレッドからの安全なメッセージ送信 +- 非同期キューによる効率的な通信制御 +- スレッドセーフな操作保証 + +## クラス構造 + +### WebSocketServer クラス + +```python +class WebSocketServer: + def __init__(self, host: str='127.0.0.1', port: int=8765): + self.host: str # サーバーホスト + self.port: int # サーバーポート + self.clients: Set[WebSocketServerProtocol] # 接続クライアント集合 + self._message_handler: Optional[Callable] # メッセージハンドラー + self._loop: Optional[asyncio.AbstractEventLoop] # イベントループ + self._server: Optional[websockets.serve] # WebSocketサーバー + self._thread: Optional[threading.Thread] # サーバースレッド + self._send_queue: Optional[asyncio.Queue] # 送信キュー + self.is_running: bool # 動作状態フラグ +``` + +WebSocket通信の中核管理クラス + +## 主要メソッド + +### サーバー制御 + +```python +def start_server(self) -> None +``` + +WebSocketサーバーを開始(バックグラウンドスレッド) + +```python +def stop_server(self) -> None +``` + +WebSocketサーバーを停止・リソース解放 + +### メッセージハンドリング + +```python +def set_message_handler(self, handler: Callable[['WebSocketServer', WebSocketServerProtocol, str], None]) -> None +``` + +クライアントからのメッセージ受信時コールバック設定 + +#### パラメータ +- **handler**: メッセージハンドラー関数 `(server, websocket, message) -> None` + +### メッセージ送信 + +```python +def send(self, message: str) -> None +``` + +外部スレッドから安全にメッセージを全クライアントに送信 + +#### パラメータ +- **message**: 送信するメッセージ文字列 + +```python +def broadcast(self, message: str) -> None +``` + +非同期的に全クライアントにメッセージをブロードキャスト + +#### パラメータ +- **message**: ブロードキャストするメッセージ + +## 使用方法 + +### 基本的なWebSocketサーバー + +```python +from models.websocket.websocket_server import WebSocketServer +import time +import json + +# メッセージハンドラーの定義 +def on_message_received(server, websocket, message): + """クライアントからのメッセージ処理""" + print(f"クライアントからメッセージ受信: {message}") + + try: + # JSONメッセージの解析 + data = json.loads(message) + + if data.get('type') == 'translation_request': + # 翻訳要求の処理 + handle_translation_request(server, data) + elif data.get('type') == 'config_update': + # 設定更新の処理 + handle_config_update(server, data) + else: + # エコーバック + response = { + 'type': 'echo', + 'original_message': data, + 'timestamp': time.time() + } + server.broadcast(json.dumps(response)) + + except json.JSONDecodeError: + # テキストメッセージの場合 + response = f"受信しました: {message}" + server.broadcast(response) + +def handle_translation_request(server, data): + """翻訳要求の処理""" + text = data.get('text', '') + target_lang = data.get('target_language', 'English') + + # 実際の翻訳処理(ここではモック) + translated_text = f"[{target_lang}] {text}" + + response = { + 'type': 'translation_result', + 'original': text, + 'translated': translated_text, + 'target_language': target_lang + } + + server.broadcast(json.dumps(response)) + +def handle_config_update(server, data): + """設定更新の処理""" + config_key = data.get('key') + config_value = data.get('value') + + print(f"設定更新: {config_key} = {config_value}") + + response = { + 'type': 'config_updated', + 'key': config_key, + 'value': config_value, + 'status': 'success' + } + + server.broadcast(json.dumps(response)) + +# WebSocketサーバーの起動 +ws_server = WebSocketServer(host='127.0.0.1', port=8765) +ws_server.set_message_handler(on_message_received) +ws_server.start_server() + +print("WebSocketサーバーが起動しました: ws://127.0.0.1:8765") + +# 定期的なステータス送信 +for i in range(10): + status_message = { + 'type': 'status', + 'server_time': time.time(), + 'uptime': i * 5, + 'connected_clients': len(ws_server.clients) + } + + ws_server.send(json.dumps(status_message)) + time.sleep(5) + +# サーバー停止 +ws_server.stop_server() +``` + +### VRCTアプリケーション統合 + +```python +class VRCTWebSocketInterface: + """VRCT用WebSocketインターフェース""" + + def __init__(self, controller, port=8765): + self.controller = controller # VRCTコントローラー + self.ws_server = WebSocketServer(host='127.0.0.1', port=port) + self.ws_server.set_message_handler(self.handle_web_message) + + def handle_web_message(self, server, websocket, message): + """Webクライアントからのメッセージ処理""" + try: + data = json.loads(message) + command = data.get('command') + + if command == 'get_config': + self.send_config(server) + elif command == 'set_config': + self.update_config(server, data) + elif command == 'start_translation': + self.start_translation_service(server, data) + elif command == 'stop_translation': + self.stop_translation_service(server) + elif command == 'get_status': + self.send_status(server) + elif command == 'translate_text': + self.translate_text(server, data) + else: + self.send_error(server, f"未知のコマンド: {command}") + + except Exception as e: + self.send_error(server, f"メッセージ処理エラー: {e}") + + def send_config(self, server): + """設定情報をWebクライアントに送信""" + config_data = { + 'type': 'config', + 'data': { + 'source_language': self.controller.config.source_language, + 'target_language': self.controller.config.target_language, + 'translation_engine': self.controller.config.translation_engine, + 'osc_enabled': self.controller.config.osc_enabled, + 'overlay_enabled': self.controller.config.overlay_enabled + } + } + server.broadcast(json.dumps(config_data)) + + def update_config(self, server, data): + """設定更新""" + config_updates = data.get('config', {}) + + for key, value in config_updates.items(): + if hasattr(self.controller.config, key): + setattr(self.controller.config, key, value) + print(f"設定更新: {key} = {value}") + + # 更新確認を送信 + response = { + 'type': 'config_updated', + 'status': 'success', + 'updated_keys': list(config_updates.keys()) + } + server.broadcast(json.dumps(response)) + + def start_translation_service(self, server, data): + """翻訳サービス開始""" + try: + self.controller.start_translation() + + response = { + 'type': 'service_status', + 'service': 'translation', + 'status': 'started', + 'message': '翻訳サービスが開始されました' + } + server.broadcast(json.dumps(response)) + + except Exception as e: + self.send_error(server, f"翻訳サービス開始エラー: {e}") + + def stop_translation_service(self, server): + """翻訳サービス停止""" + try: + self.controller.stop_translation() + + response = { + 'type': 'service_status', + 'service': 'translation', + 'status': 'stopped', + 'message': '翻訳サービスが停止されました' + } + server.broadcast(json.dumps(response)) + + except Exception as e: + self.send_error(server, f"翻訳サービス停止エラー: {e}") + + def send_status(self, server): + """システム状態送信""" + status_data = { + 'type': 'system_status', + 'data': { + 'translation_active': self.controller.is_translation_active(), + 'osc_connected': self.controller.is_osc_connected(), + 'overlay_active': self.controller.is_overlay_active(), + 'connected_clients': len(server.clients), + 'uptime': self.controller.get_uptime(), + 'memory_usage': self.controller.get_memory_usage() + } + } + server.broadcast(json.dumps(status_data)) + + def translate_text(self, server, data): + """即座翻訳実行""" + text = data.get('text', '') + source_lang = data.get('source_language') + target_lang = data.get('target_language') + + try: + # 翻訳実行 + result = self.controller.translate_text( + text, source_lang, target_lang + ) + + response = { + 'type': 'translation_result', + 'original': text, + 'translated': result, + 'source_language': source_lang, + 'target_language': target_lang, + 'timestamp': time.time() + } + server.broadcast(json.dumps(response)) + + except Exception as e: + self.send_error(server, f"翻訳エラー: {e}") + + def send_error(self, server, error_message): + """エラーメッセージ送信""" + error_data = { + 'type': 'error', + 'message': error_message, + 'timestamp': time.time() + } + server.broadcast(json.dumps(error_data)) + + def start(self): + """WebSocketインターフェース開始""" + self.ws_server.start_server() + print(f"VRCT WebSocketインターフェース開始: ws://127.0.0.1:{self.ws_server.port}") + + def stop(self): + """WebSocketインターフェース停止""" + self.ws_server.stop_server() + print("VRCT WebSocketインターフェース停止") + + def notify_translation_result(self, original, translated, source_lang, target_lang): + """翻訳結果の通知(VRCTコントローラーから呼び出し)""" + notification = { + 'type': 'live_translation', + 'original': original, + 'translated': translated, + 'source_language': source_lang, + 'target_language': target_lang, + 'timestamp': time.time() + } + self.ws_server.send(json.dumps(notification)) + +# 使用例(VRCTアプリケーション内) +# vrct_ws_interface = VRCTWebSocketInterface(controller) +# vrct_ws_interface.start() +``` + +### リアルタイム監視ダッシュボード + +```python +class MonitoringDashboard: + """リアルタイム監視ダッシュボード""" + + def __init__(self, system_components, port=8766): + self.components = system_components + self.ws_server = WebSocketServer(host='127.0.0.1', port=port) + self.ws_server.set_message_handler(self.handle_dashboard_message) + self.monitoring_active = False + + def handle_dashboard_message(self, server, websocket, message): + """ダッシュボードからのメッセージ処理""" + try: + data = json.loads(message) + action = data.get('action') + + if action == 'start_monitoring': + self.start_monitoring(server) + elif action == 'stop_monitoring': + self.stop_monitoring(server) + elif action == 'get_metrics': + self.send_metrics(server) + elif action == 'get_logs': + self.send_logs(server, data.get('limit', 100)) + + except Exception as e: + self.send_dashboard_error(server, str(e)) + + def start_monitoring(self, server): + """監視開始""" + if not self.monitoring_active: + self.monitoring_active = True + + # 監視スレッド開始 + import threading + monitor_thread = threading.Thread( + target=self.monitoring_loop, + args=(server,), + daemon=True + ) + monitor_thread.start() + + response = { + 'type': 'monitoring_status', + 'status': 'started' + } + server.broadcast(json.dumps(response)) + + def stop_monitoring(self, server): + """監視停止""" + self.monitoring_active = False + + response = { + 'type': 'monitoring_status', + 'status': 'stopped' + } + server.broadcast(json.dumps(response)) + + def monitoring_loop(self, server): + """リアルタイム監視ループ""" + while self.monitoring_active: + try: + # システムメトリクス収集 + metrics = self.collect_metrics() + + # ダッシュボードに送信 + dashboard_data = { + 'type': 'live_metrics', + 'metrics': metrics, + 'timestamp': time.time() + } + server.broadcast(json.dumps(dashboard_data)) + + time.sleep(2) # 2秒間隔で更新 + + except Exception as e: + print(f"監視ループエラー: {e}") + time.sleep(5) + + def collect_metrics(self): + """システムメトリクス収集""" + import psutil + + metrics = { + 'system': { + 'cpu_percent': psutil.cpu_percent(), + 'memory_percent': psutil.virtual_memory().percent, + 'disk_usage': psutil.disk_usage('/').percent + }, + 'network': { + 'bytes_sent': psutil.net_io_counters().bytes_sent, + 'bytes_recv': psutil.net_io_counters().bytes_recv + }, + 'vrct': { + 'translation_count': self.components.get('translation_count', 0), + 'osc_messages_sent': self.components.get('osc_count', 0), + 'overlay_updates': self.components.get('overlay_count', 0), + 'active_connections': len(self.ws_server.clients) + } + } + + return metrics + + def send_metrics(self, server): + """メトリクス送信""" + metrics = self.collect_metrics() + + response = { + 'type': 'metrics_snapshot', + 'metrics': metrics, + 'timestamp': time.time() + } + server.broadcast(json.dumps(response)) + + def send_logs(self, server, limit): + """ログ送信""" + # ログファイルから最新のログを取得(実装例) + logs = self.get_recent_logs(limit) + + response = { + 'type': 'log_data', + 'logs': logs, + 'count': len(logs) + } + server.broadcast(json.dumps(response)) + + def get_recent_logs(self, limit): + """最新ログ取得""" + # 実際のログファイル読み込み処理 + mock_logs = [ + {'level': 'INFO', 'message': 'システム開始', 'timestamp': time.time() - 60}, + {'level': 'DEBUG', 'message': '翻訳処理完了', 'timestamp': time.time() - 30}, + {'level': 'WARNING', 'message': 'メモリ使用量増加', 'timestamp': time.time() - 10} + ] + return mock_logs[-limit:] + + def send_dashboard_error(self, server, error_message): + """ダッシュボードエラー送信""" + error_data = { + 'type': 'dashboard_error', + 'message': error_message, + 'timestamp': time.time() + } + server.broadcast(json.dumps(error_data)) + + def start_dashboard(self): + """ダッシュボード開始""" + self.ws_server.start_server() + print(f"監視ダッシュボード開始: ws://127.0.0.1:{self.ws_server.port}") + + def stop_dashboard(self): + """ダッシュボード停止""" + self.monitoring_active = False + self.ws_server.stop_server() + +# 使用例 +system_components = { + 'translation_count': 150, + 'osc_count': 75, + 'overlay_count': 200 +} + +dashboard = MonitoringDashboard(system_components) +dashboard.start_dashboard() + +# しばらく実行 +time.sleep(60) + +dashboard.stop_dashboard() +``` + +### 高度なメッセージルーティング + +```python +class WebSocketRouter: + """WebSocketメッセージルーティングシステム""" + + def __init__(self, port=8767): + self.ws_server = WebSocketServer(host='127.0.0.1', port=port) + self.ws_server.set_message_handler(self.route_message) + self.routes = {} + self.middleware = [] + self.client_subscriptions = {} + + def add_route(self, message_type, handler): + """メッセージタイプに対するハンドラー登録""" + self.routes[message_type] = handler + + def add_middleware(self, middleware_func): + """ミドルウェア追加""" + self.middleware.append(middleware_func) + + def route_message(self, server, websocket, message): + """メッセージルーティング処理""" + try: + # JSON解析 + data = json.loads(message) + message_type = data.get('type') + + # ミドルウェア実行 + for middleware in self.middleware: + data = middleware(data, websocket) + if data is None: # ミドルウェアがNoneを返した場合は処理中断 + return + + # ルーティング実行 + if message_type in self.routes: + handler = self.routes[message_type] + response = handler(data, websocket, server) + + if response: + server.broadcast(json.dumps(response)) + else: + # 未定義メッセージタイプ + error_response = { + 'type': 'error', + 'message': f'未対応メッセージタイプ: {message_type}', + 'original_type': message_type + } + websocket.send(json.dumps(error_response)) + + except json.JSONDecodeError as e: + error_response = { + 'type': 'error', + 'message': f'JSON解析エラー: {e}' + } + websocket.send(json.dumps(error_response)) + except Exception as e: + error_response = { + 'type': 'error', + 'message': f'処理エラー: {e}' + } + websocket.send(json.dumps(error_response)) + + def subscription_middleware(self, data, websocket): + """購読管理ミドルウェア""" + message_type = data.get('type') + + if message_type == 'subscribe': + # 購読登録 + topics = data.get('topics', []) + client_id = id(websocket) + self.client_subscriptions[client_id] = topics + + response = { + 'type': 'subscription_confirmed', + 'topics': topics + } + websocket.send(json.dumps(response)) + return None # 処理終了 + + elif message_type == 'unsubscribe': + # 購読解除 + client_id = id(websocket) + if client_id in self.client_subscriptions: + del self.client_subscriptions[client_id] + + response = { + 'type': 'unsubscription_confirmed' + } + websocket.send(json.dumps(response)) + return None + + return data # そのまま次の処理に進む + + def authentication_middleware(self, data, websocket): + """認証ミドルウェア""" + # 簡易認証例 + api_key = data.get('api_key') + + if api_key != 'valid_api_key_123': + error_response = { + 'type': 'authentication_error', + 'message': '無効なAPIキー' + } + websocket.send(json.dumps(error_response)) + return None + + return data + + def logging_middleware(self, data, websocket): + """ログ記録ミドルウェア""" + client_ip = websocket.remote_address[0] if websocket.remote_address else 'unknown' + message_type = data.get('type', 'unknown') + + print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {client_ip} -> {message_type}") + + return data + + def broadcast_to_subscribers(self, topic, message_data): + """購読者へのトピック配信""" + message_data['topic'] = topic + message_json = json.dumps(message_data) + + for client_id, topics in self.client_subscriptions.items(): + if topic in topics: + # 該当クライアントを検索 + for client in self.ws_server.clients: + if id(client) == client_id: + try: + client.send(message_json) + except Exception as e: + print(f"配信エラー: {e}") + break + + def start_router(self): + """ルーター開始""" + self.ws_server.start_server() + print(f"WebSocketルーター開始: ws://127.0.0.1:{self.ws_server.port}") + + def stop_router(self): + """ルーター停止""" + self.ws_server.stop_server() + +# 使用例 +def handle_chat_message(data, websocket, server): + """チャットメッセージハンドラー""" + username = data.get('username', 'Anonymous') + message = data.get('message', '') + + response = { + 'type': 'chat_broadcast', + 'username': username, + 'message': message, + 'timestamp': time.time() + } + + return response + +def handle_translation_request(data, websocket, server): + """翻訳要求ハンドラー""" + text = data.get('text', '') + # 翻訳処理(モック) + translated = f"[翻訳] {text}" + + response = { + 'type': 'translation_response', + 'original': text, + 'translated': translated + } + + return response + +# ルーター設定 +router = WebSocketRouter() + +# ミドルウェア登録 +router.add_middleware(router.logging_middleware) +router.add_middleware(router.subscription_middleware) +# router.add_middleware(router.authentication_middleware) # 認証が必要な場合 + +# ルート登録 +router.add_route('chat_message', handle_chat_message) +router.add_route('translation_request', handle_translation_request) + +router.start_router() + +# トピック配信テスト +time.sleep(2) +router.broadcast_to_subscribers('system_updates', { + 'type': 'system_notification', + 'message': 'システム更新完了', + 'severity': 'info' +}) + +time.sleep(10) +router.stop_router() +``` + +## 高度な機能・パターン + +### 接続プール管理 + +```python +class ConnectionPoolManager: + """WebSocket接続プール管理""" + + def __init__(self): + self.pools = {} # pool_name -> set of websockets + + def assign_to_pool(self, websocket, pool_name): + """クライアントをプールに割り当て""" + if pool_name not in self.pools: + self.pools[pool_name] = set() + + self.pools[pool_name].add(websocket) + print(f"クライアントを {pool_name} プールに追加") + + def remove_from_pools(self, websocket): + """すべてのプールからクライアントを削除""" + for pool_name, pool in self.pools.items(): + if websocket in pool: + pool.discard(websocket) + print(f"クライアントを {pool_name} プールから削除") + + def broadcast_to_pool(self, pool_name, message): + """特定プールに対してブロードキャスト""" + if pool_name in self.pools: + for websocket in self.pools[pool_name].copy(): + try: + websocket.send(message) + except Exception: + # 切断されたクライアントを削除 + self.pools[pool_name].discard(websocket) + + def get_pool_stats(self): + """プール統計情報""" + stats = {} + for pool_name, pool in self.pools.items(): + stats[pool_name] = len(pool) + return stats +``` + +### メッセージ永続化・再送機能 + +```python +class PersistentMessageSystem: + """メッセージ永続化・再送システム""" + + def __init__(self, max_history=1000): + self.message_history = [] + self.max_history = max_history + self.client_last_seen = {} # client_id -> last_message_id + + def store_message(self, message_data): + """メッセージを履歴に保存""" + message_id = len(self.message_history) + stored_message = { + 'id': message_id, + 'data': message_data, + 'timestamp': time.time() + } + + self.message_history.append(stored_message) + + # 履歴サイズ制限 + if len(self.message_history) > self.max_history: + self.message_history = self.message_history[-self.max_history:] + + return message_id + + def get_missed_messages(self, client_id, last_seen_id): + """クライアントが見逃したメッセージを取得""" + missed_messages = [] + + for msg in self.message_history: + if msg['id'] > last_seen_id: + missed_messages.append(msg) + + return missed_messages + + def client_reconnected(self, websocket, client_id): + """クライアント再接続時の処理""" + last_seen = self.client_last_seen.get(client_id, -1) + missed_messages = self.get_missed_messages(client_id, last_seen) + + # 見逃したメッセージを再送 + for msg in missed_messages: + try: + recovery_data = { + 'type': 'message_recovery', + 'original_message': msg['data'], + 'message_id': msg['id'], + 'original_timestamp': msg['timestamp'] + } + websocket.send(json.dumps(recovery_data)) + except Exception as e: + print(f"メッセージ再送エラー: {e}") + + print(f"クライアント {client_id} に {len(missed_messages)} 件のメッセージを再送") + + def update_client_position(self, client_id, message_id): + """クライアントの最新メッセージ位置更新""" + self.client_last_seen[client_id] = message_id +``` + +## パフォーマンス・スケーラビリティ + +### 負荷分散・最適化 + +```python +class OptimizedWebSocketServer(WebSocketServer): + """最適化されたWebSocketサーバー""" + + def __init__(self, host='127.0.0.1', port=8765): + super().__init__(host, port) + self.message_stats = { + 'total_messages': 0, + 'messages_per_second': 0, + 'last_reset_time': time.time() + } + self.compression_enabled = True + self.batch_size = 50 + self.batch_timeout = 0.1 + + def enable_message_batching(self, batch_size=50, timeout=0.1): + """メッセージバッチング有効化""" + self.batch_size = batch_size + self.batch_timeout = timeout + + async def optimized_broadcast(self, message_batch): + """最適化されたバッチブロードキャスト""" + if not self.clients: + return + + # 圧縮対応 + if self.compression_enabled and len(message_batch) > 1: + # 複数メッセージをまとめて送信 + combined_message = json.dumps({ + 'type': 'batch', + 'messages': message_batch, + 'count': len(message_batch) + }) + else: + combined_message = json.dumps(message_batch[0]) + + # 並列送信(エラー処理付き) + send_tasks = [] + for client in self.clients.copy(): + send_tasks.append(self.safe_send(client, combined_message)) + + results = await asyncio.gather(*send_tasks, return_exceptions=True) + + # 失敗したクライアントを削除 + for i, result in enumerate(results): + if isinstance(result, Exception): + failed_client = list(self.clients)[i] + self.clients.discard(failed_client) + print(f"クライアント削除(送信失敗): {result}") + + # 統計更新 + self.update_message_stats(len(message_batch)) + + async def safe_send(self, client, message): + """安全なメッセージ送信""" + try: + await client.send(message) + except Exception as e: + raise e # gather で捕捉される + + def update_message_stats(self, message_count): + """メッセージ統計更新""" + self.message_stats['total_messages'] += message_count + + current_time = time.time() + time_diff = current_time - self.message_stats['last_reset_time'] + + if time_diff >= 1.0: # 1秒ごとに速度計算 + self.message_stats['messages_per_second'] = message_count / time_diff + self.message_stats['last_reset_time'] = current_time + + def get_performance_stats(self): + """パフォーマンス統計取得""" + return { + 'connected_clients': len(self.clients), + 'total_messages': self.message_stats['total_messages'], + 'messages_per_second': self.message_stats['messages_per_second'], + 'compression_enabled': self.compression_enabled, + 'batch_size': self.batch_size + } +``` + +## 依存関係・システム要件 + +### 必須依存関係 +- `asyncio`: 非同期処理フレームワーク +- `websockets`: WebSocketライブラリ +- `threading`: マルチスレッド制御 +- `json`: JSON形式データ処理 + +### システム要件 +```python +system_requirements = { + "python_version": "3.7以上", + "asyncio_support": "非同期処理対応", + "network_stack": "TCP/WebSocket対応", + "memory": "同時接続数に応じた十分なメモリ" +} + +performance_characteristics = { + "concurrent_connections": "数百~数千接続対応", + "message_throughput": "秒間数千メッセージ処理可能", + "latency": "低レイテンシー(ミリ秒オーダー)", + "memory_per_connection": "約1-5MB(接続当たり)" +} +``` + +### オプション依存関係 +- `ujson`: 高速JSON処理(パフォーマンス向上) +- `compression`: メッセージ圧縮(帯域節約) + +## 注意事項・制限 + +### ネットワーク制限 +- ファイアウォール設定の要確認 +- プロキシ環境での制限可能性 +- ブラウザーのWebSocket接続制限 + +### スケーラビリティ制限 +- 単一プロセスでの同時接続数制限 +- メモリ使用量の線形増加 +- CPU集約的な処理での性能劣化 + +### セキュリティ考慮事項 +```python +security_considerations = { + "authentication": "認証機構の実装推奨", + "authorization": "適切な認可制御", + "rate_limiting": "レート制限の実装", + "input_validation": "入力データの検証必須", + "cors_policy": "CORS設定の適切な構成" +} +``` + +## 関連モジュール + +- `config.py`: WebSocket設定管理 +- `controller.py`: WebSocket制御インターフェース +- `utils.py`: エラーログ・ユーティリティ +- `model.py`: WebSocket機能統合 + +## 将来の改善点 + +- Redis等を用いたメッセージブローカー連携 +- 負荷分散・クラスタリング対応 +- より高度な認証・認可システム +- WebRTC等のより高速な通信プロトコル対応 +- GraphQL over WebSocketサポート +- リアルタイム監視・分析機能の強化 \ No newline at end of file