diff --git a/README.md b/README.md new file mode 100644 index 00000000..0c880a05 --- /dev/null +++ b/README.md @@ -0,0 +1,101 @@ +# VRCT (VRChat Chatbox Translator & Transcription) + +## Overview +VRChatのChatBoxにOSC経由でメッセージを送信するツール +翻訳エンジンを使用してメッセージとその翻訳部分を同時に送信することができる + +## Requirement +- python 3.9.13 +- pillow +- PyAudioWPatch +- python-osc +- customtkinter +- deepl +- deepl-translate(https://github.com/misyaguziya/deepl-translate) +- translators(https://github.com/misyaguziya/translators) +- custom_speech_recognition(https://github.com/misyaguziya/custom_speech_recognition) + +deepl-translate/translators/custom_speech_recognitionについては追加実装をしています。`pip install`でinstallした場合、動かないので注意 + +## install +```bash +pip install -r requirements.txt +``` + +```bash +git clone https://github.com/misyaguziya/translators.git +python ./translators/setup.py install +git clone https://github.com/misyaguziya/deepl-translate.git +python ./deepl_translate/setup.py install +git clone https://github.com/misyaguziya/custom_speech_recognition.git +python ./custom_speech_recognition/setup.py install +``` + +## Usage +```bash +ptyhon VRCT.py +``` + +## Features + +### init +0. VRChatのOSCを有効にする(重要) + +(任意) +1. DeepLのAPIを使用するためにアカウント登録し、認証キーを取得する +2. ギアアイコンのボタンでconfigウィンドウを開く +3. ParameterタブのDeepL Auth Keyに認証キーを記載し、フロッピーアイコンのボタンを押す +4. configウィンドウを閉じる + +### Normal use +1. メッセージボックスにメッセージを記入 +2. Enterキーを押し、メッセージを送信する + +### About Checkboxes +- translation: 翻訳の有効無効 +- voice2chatbox: マイクの音声を文字起こししてチャットボックスに送信する +- speaker2log: スピーカーの音声から文字起こししてログに表示する +- foreground: 最前面表示の有効無効 + +### About Textbox +- log tab: すべてのログを表示 +- send tab: 送信したメッセージを表示 +- receive tab: 受信したメッセージを表示 +- system tab: 機能についてのメッセージを表示 + +### About Config Window +- UI tab + - Transparency: ウィンドウの透過度の調整 + - Appearance Theme: ウィンドウテーマを選択 + - UI Scaling: UIサイズを調整 + - Font Family: 表示フォントを選択 +- Translation tab + - Select Translator: 翻訳エンジンの変更 + - Send Language: 送信するメッセージに対して翻訳する言語[source, target]を選択 + - Receive Language: 受信したメッセージに対して翻訳する言語[source, target]を選択 +- Transcription tab + - Input Mic Device: マイクを選択 + - Input Mic Voice Language: 入力する音声の言語 + - Input Mic Energy Threshold: 音声取得のしきい値 + - Input Mic Dynamic Energy Threshold: 音声取得のしきい値の自動調整 + - Input Mic Record Timeout: 音声の区切りの無音時間 + - Input Mic Max Phrases: 保留する単語の上限 + - Input Speaker Device: スピーカーを選択 + - Input Speaker Voice Language: 受信する音声の言語 + - Input Speaker Energy Threshold: 音声取得のしきい値 + - Input Speaker Dynamic Energy Threshold: 音声取得のしきい値の自動調整 + - Input Speaker Record Timeout: 音声の区切りの無音時間 + - Input Speaker Max Phrases: 保留する単語の上限 +- Parameter tab + - OSC IP address: 変更不要 + - OSC port: 変更不要 + - DeepL Auth key: DeepLの認証キーの設定 + - Message Format: 送信するメッセージのデコレーションの設定 + - [message]がメッセージボックスに記入したメッセージに置換される + - [translation]が翻訳されたメッセージに置換される + - 初期フォーマット:`[message]([translation])` + +## Author +みしゃ(misyaguzi) +twitter: https://twitter.com/misya_ai +booth: https://misyaguziya.booth.pm/items/4814313 \ No newline at end of file diff --git a/README.txt b/README.txt index ef6e28a5..97085db2 100644 --- a/README.txt +++ b/README.txt @@ -32,7 +32,7 @@ VRChatで使用されるChatBoxをOSC経由でメッセージを送信するツ     - receiveタブ: 受信したメッセージを表示     - systemタブ: 機能についてのメッセージを表示 -  - configウィンドウ +  - Configウィンドウ    - UIタブ     - Transparency: ウィンドウの透過度の調整     - Appearance Theme: ウィンドウテーマを選択 @@ -43,13 +43,18 @@ VRChatで使用されるChatBoxをOSC経由でメッセージを送信するツ     - Send Language: 送信するメッセージに対して翻訳する言語[source, target]を選択     - Receive Language: 受信したメッセージに対して翻訳する言語[source, target]を選択    - Transcriptionタブ -    - Input Mic Device: 音声を入力するマイクを選択 +    - Input Mic Device: マイクを選択     - Input Mic Voice Language: 入力する音声の言語 -    - Input Mic IsDynamic: マイクの自動調整 -    - Input Mic Threshold: 音声取得のしきい値 -    - Input Speaker Device: 音声を受信するスピーカーを選択 +    - Input Mic Energy Threshold: 音声取得のしきい値 +    - Input Mic Dynamic Energy Threshold: 音声取得のしきい値の自動調整 +    - Input Mic Record Timeout: 音声の区切りの無音時間 +    - Input Mic Max Phrases: 保留する単語の上限 +    - Input Speaker Device: スピーカーを選択     - Input Speaker Voice Language: 受信する音声の言語 -    - Input Speaker Interval: 受信する音声の調整 +    - Input Speaker Energy Threshold: 音声取得のしきい値 +    - Input Speaker Dynamic Energy Threshold: 音声取得のしきい値の自動調整 +    - Input Speaker Record Timeout: 音声の区切りの無音時間 +    - Input Speaker Max Phrases: 保留する単語の上限   - Parameterタブ     - OSC IP address: 変更不要     - OSC port: 変更不要 @@ -90,6 +95,8 @@ https://twitter.com/misya_ai - いくつかのバクを修正 - 翻訳/文字起こし言語の表記を略称からわかりやすい文字に変更 - 文字起こしの処理の軽量化 +[2023-07-05: v1.2] +- 文字起こし精度の向上 # 注意事項 再配布とかはやめてね \ No newline at end of file diff --git a/VRCT.py b/VRCT.py index 902f6534..13d9840e 100644 --- a/VRCT.py +++ b/VRCT.py @@ -1,15 +1,19 @@ import os import json +import queue import tkinter as tk import customtkinter from PIL import Image import utils -import translation -import transcription import osc_tools import window_config import window_information +import languages +import audio_utils +import audio_recorder +import audio_transcriber +import translation class App(customtkinter.CTk): def __init__(self, *args, **kwargs): @@ -17,7 +21,6 @@ class App(customtkinter.CTk): # init instance self.translator = translation.Translator() - self.vr = transcription.VoiceRecognizer() # init config self.PATH_CONFIG = "./config.json" @@ -32,20 +35,27 @@ class App(customtkinter.CTk): self.UI_SCALING = "100%" self.FONT_FAMILY = "Yu Gothic UI" ## Translation - self.CHOICE_TRANSLATOR = "DeepL(web)" - self.INPUT_SOURCE_LANG = "Japanese" - self.INPUT_TARGET_LANG = "English" - self.OUTPUT_SOURCE_LANG = "English" - self.OUTPUT_TARGET_LANG = "Japanese" - ## Transcription - self.CHOICE_MIC_DEVICE = self.vr.search_default_device()[0] - self.INPUT_MIC_VOICE_LANGUAGE = "Japanese Japan" - self.INPUT_MIC_IS_DYNAMIC = False - self.INPUT_MIC_THRESHOLD = 300 - self.CHOICE_SPEAKER_DEVICE = self.vr.search_default_device()[1] - self.INPUT_SPEAKER_VOICE_LANGUAGE = "English United States" - self.INPUT_SPEAKER_INTERVAL = 4 - + self.CHOICE_TRANSLATOR = languages.translators[0] + self.INPUT_SOURCE_LANG = list(languages.translation_lang[self.CHOICE_TRANSLATOR].keys())[0] + self.INPUT_TARGET_LANG = list(languages.translation_lang[self.CHOICE_TRANSLATOR].keys())[1] + self.OUTPUT_SOURCE_LANG = list(languages.translation_lang[self.CHOICE_TRANSLATOR].keys())[1] + self.OUTPUT_TARGET_LANG = list(languages.translation_lang[self.CHOICE_TRANSLATOR].keys())[0] + ## Transcription Send + self.CHOICE_MIC_DEVICE = audio_utils.get_default_input_device()["name"] + self.INPUT_MIC_VOICE_LANGUAGE = list(languages.transcription_lang.keys())[0] + self.INPUT_MIC_ENERGY_THRESHOLD = 300 + self.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD = True + self.INPUT_MIC_RECORD_TIMEOUT = 3 + self.INPUT_MIC_PHRASE_TIMEOUT = 3 + self.INPUT_MIC_MAX_PHRASES = 10 + ## Transcription Receive + self.CHOICE_SPEAKER_DEVICE = audio_utils.get_default_output_device()["name"] + self.INPUT_SPEAKER_VOICE_LANGUAGE = list(languages.transcription_lang.keys())[1] + self.INPUT_SPEAKER_ENERGY_THRESHOLD = 300 + self.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD = True + self.INPUT_SPEAKER_RECORD_TIMEOUT = 3 + self.INPUT_SPEAKER_PHRASE_TIMEOUT = 3 + self.INPUT_SPEAKER_MAX_PHRASES = 10 ## Parameter self.OSC_IP_ADDRESS = "127.0.0.1" self.OSC_PORT = 9000 @@ -95,40 +105,62 @@ class App(customtkinter.CTk): if config["CHOICE_TRANSLATOR"] in list(self.translator.translator_status.keys()): self.CHOICE_TRANSLATOR = config["CHOICE_TRANSLATOR"] if "INPUT_SOURCE_LANG" in config.keys(): - if config["INPUT_SOURCE_LANG"] in self.translator.languages[self.CHOICE_TRANSLATOR]: + if config["INPUT_SOURCE_LANG"] in list(languages.translation_lang[self.CHOICE_TRANSLATOR].keys()): self.INPUT_SOURCE_LANG = config["INPUT_SOURCE_LANG"] if "INPUT_TARGET_LANG" in config.keys(): - if config["INPUT_SOURCE_LANG"] in self.translator.languages[self.CHOICE_TRANSLATOR]: + if config["INPUT_SOURCE_LANG"] in list(languages.translation_lang[self.CHOICE_TRANSLATOR].keys()): self.INPUT_TARGET_LANG = config["INPUT_TARGET_LANG"] if "OUTPUT_SOURCE_LANG" in config.keys(): - if config["INPUT_SOURCE_LANG"] in self.translator.languages[self.CHOICE_TRANSLATOR]: + if config["INPUT_SOURCE_LANG"] in list(languages.translation_lang[self.CHOICE_TRANSLATOR].keys()): self.OUTPUT_SOURCE_LANG = config["OUTPUT_SOURCE_LANG"] if "OUTPUT_TARGET_LANG" in config.keys(): - if config["INPUT_SOURCE_LANG"] in self.translator.languages[self.CHOICE_TRANSLATOR]: + if config["INPUT_SOURCE_LANG"] in list(languages.translation_lang[self.CHOICE_TRANSLATOR].keys()): self.OUTPUT_TARGET_LANG = config["OUTPUT_TARGET_LANG"] # Transcription if "CHOICE_MIC_DEVICE" in config.keys(): - if config["CHOICE_MIC_DEVICE"] in [device["name"] for device in self.vr.search_input_device()]: + if config["CHOICE_MIC_DEVICE"] in [device["name"] for device in audio_utils.get_input_device_list()]: self.CHOICE_MIC_DEVICE = config["CHOICE_MIC_DEVICE"] if "INPUT_MIC_VOICE_LANGUAGE" in config.keys(): - if config["INPUT_MIC_VOICE_LANGUAGE"] in list(self.vr.languages): + if config["INPUT_MIC_VOICE_LANGUAGE"] in list(languages.transcription_lang.keys()): self.INPUT_MIC_VOICE_LANGUAGE = config["INPUT_MIC_VOICE_LANGUAGE"] - if "INPUT_MIC_IS_DYNAMIC" in config.keys(): - if type(config["INPUT_MIC_IS_DYNAMIC"]) is bool: - self.INPUT_MIC_IS_DYNAMIC = config["INPUT_MIC_IS_DYNAMIC"] - if "INPUT_MIC_THRESHOLD" in config.keys(): - if type(config["INPUT_MIC_THRESHOLD"]) is int: - self.INPUT_MIC_THRESHOLD = config["INPUT_MIC_THRESHOLD"] + if "INPUT_MIC_ENERGY_THRESHOLD" in config.keys(): + if type(config["INPUT_MIC_ENERGY_THRESHOLD"]) is int: + self.INPUT_MIC_ENERGY_THRESHOLD = config["INPUT_MIC_ENERGY_THRESHOLD"] + if "INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD" in config.keys(): + if type(config["INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD"]) is bool: + self.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD = config["INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD"] + if "INPUT_MIC_RECORD_TIMEOUT" in config.keys(): + if type(config["INPUT_MIC_RECORD_TIMEOUT"]) is int: + self.INPUT_MIC_RECORD_TIMEOUT = config["INPUT_MIC_RECORD_TIMEOUT"] + if "INPUT_MIC_PHRASE_TIMEOUT" in config.keys(): + if type(config["INPUT_MIC_PHRASE_TIMEOUT"]) is int: + self.INPUT_MIC_PHRASE_TIMEOUT = config["INPUT_MIC_PHRASE_TIMEOUT"] + if "INPUT_MIC_MAX_PHRASES" in config.keys(): + if type(config["INPUT_MIC_MAX_PHRASES"]) is int: + self.INPUT_MIC_MAX_PHRASES = config["INPUT_MIC_MAX_PHRASES"] + if "CHOICE_SPEAKER_DEVICE" in config.keys(): - if config["CHOICE_SPEAKER_DEVICE"] in [device["name"] for device in self.vr.search_output_device()]: + if config["CHOICE_SPEAKER_DEVICE"] in [device["name"] for device in audio_utils.get_output_device_list()]: self.CHOICE_SPEAKER_DEVICE = config["CHOICE_SPEAKER_DEVICE"] if "INPUT_SPEAKER_VOICE_LANGUAGE" in config.keys(): - if config["INPUT_SPEAKER_VOICE_LANGUAGE"] in list(self.vr.languages): + if config["INPUT_SPEAKER_VOICE_LANGUAGE"] in list(languages.transcription_lang.keys()): self.INPUT_SPEAKER_VOICE_LANGUAGE = config["INPUT_SPEAKER_VOICE_LANGUAGE"] - if "INPUT_SPEAKER_INTERVAL" in config.keys(): - if type(config["INPUT_SPEAKER_INTERVAL"]) is int: - self.INPUT_SPEAKER_INTERVAL = config["INPUT_SPEAKER_INTERVAL"] + if "INPUT_SPEAKER_ENERGY_THRESHOLD" in config.keys(): + if type(config["INPUT_SPEAKER_ENERGY_THRESHOLD"]) is int: + self.INPUT_SPEAKER_ENERGY_THRESHOLD = config["INPUT_SPEAKER_ENERGY_THRESHOLD"] + if "INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD" in config.keys(): + if type(config["INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD"]) is bool: + self.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD = config["INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD"] + if "INPUT_SPEAKER_RECORD_TIMEOUT" in config.keys(): + if type(config["INPUT_SPEAKER_RECORD_TIMEOUT"]) is int: + self.INPUT_SPEAKER_RECORD_TIMEOUT = config["INPUT_SPEAKER_RECORD_TIMEOUT"] + if "INPUT_SPEAKER_PHRASE_TIMEOUT" in config.keys(): + if type(config["INPUT_SPEAKER_PHRASE_TIMEOUT"]) is int: + self.INPUT_SPEAKER_PHRASE_TIMEOUT = config["INPUT_SPEAKER_PHRASE_TIMEOUT"] + if "INPUT_SPEAKER_MAX_PHRASES" in config.keys(): + if type(config["INPUT_SPEAKER_MAX_PHRASES"]) is int: + self.INPUT_MIC_MAX_PHRASES = config["INPUT_SPEAKER_MAX_PHRASES"] # Parameter if "OSC_IP_ADDRESS" in config.keys(): @@ -164,11 +196,18 @@ class App(customtkinter.CTk): "OUTPUT_TARGET_LANG": self.OUTPUT_TARGET_LANG, "CHOICE_MIC_DEVICE": self.CHOICE_MIC_DEVICE, "INPUT_MIC_VOICE_LANGUAGE": self.INPUT_MIC_VOICE_LANGUAGE, - "INPUT_MIC_IS_DYNAMIC": self.INPUT_MIC_IS_DYNAMIC, - "INPUT_MIC_THRESHOLD": self.INPUT_MIC_THRESHOLD, + "INPUT_MIC_ENERGY_THRESHOLD": self.INPUT_MIC_ENERGY_THRESHOLD, + "INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD": self.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD, + "INPUT_MIC_RECORD_TIMEOUT": self.INPUT_MIC_RECORD_TIMEOUT, + "INPUT_MIC_PHRASE_TIMEOUT": self.INPUT_MIC_PHRASE_TIMEOUT, + "INPUT_MIC_MAX_PHRASES": self.INPUT_MIC_MAX_PHRASES, "CHOICE_SPEAKER_DEVICE": self.CHOICE_SPEAKER_DEVICE, "INPUT_SPEAKER_VOICE_LANGUAGE": self.INPUT_SPEAKER_VOICE_LANGUAGE, - "INPUT_SPEAKER_INTERVAL": self.INPUT_SPEAKER_INTERVAL, + "INPUT_SPEAKER_ENERGY_THRESHOLD": self.INPUT_SPEAKER_ENERGY_THRESHOLD, + "INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD": self.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD, + "INPUT_SPEAKER_RECORD_TIMEOUT": self.INPUT_SPEAKER_RECORD_TIMEOUT, + "INPUT_SPEAKER_PHRASE_TIMEOUT": self.INPUT_SPEAKER_PHRASE_TIMEOUT, + "INPUT_SPEAKER_MAX_PHRASES": self.INPUT_SPEAKER_MAX_PHRASES, "OSC_IP_ADDRESS": self.OSC_IP_ADDRESS, "OSC_PORT": self.OSC_PORT, "AUTH_KEYS": self.AUTH_KEYS, @@ -333,8 +372,6 @@ class App(customtkinter.CTk): self.checkbox_translation.deselect() ## set checkbox enable transcription send - self.th_vr_listen_mic = None - self.th_vr_recognize_mic = None if self.ENABLE_TRANSCRIPTION_SEND: self.checkbox_transcription_send.select() self.checkbox_transcription_send_callback() @@ -342,8 +379,6 @@ class App(customtkinter.CTk): self.checkbox_transcription_send.deselect() ## set checkbox enable transcription receive - self.th_vr_listen_spk = None - self.th_vr_recognize_spk = None if self.ENABLE_TRANSCRIPTION_RECEIVE: self.checkbox_transcription_receive.select() self.checkbox_transcription_receive_callback() @@ -375,6 +410,9 @@ class App(customtkinter.CTk): def button_config_callback(self): if self.config_window is None or not self.config_window.winfo_exists(): self.config_window = window_config.ToplevelWindowConfig(self) + self.checkbox_translation.configure(state="disabled") + self.checkbox_transcription_send.configure(state="disabled") + self.checkbox_transcription_receive.configure(state="disabled") self.config_window.focus() def button_information_callback(self): @@ -385,9 +423,14 @@ class App(customtkinter.CTk): def checkbox_translation_callback(self): self.ENABLE_TRANSLATION = self.checkbox_translation.get() if self.ENABLE_TRANSLATION: + self.button_config.configure(state="disabled", fg_color=["gray92", "gray14"]) utils.print_textbox(self.textbox_message_log, "Start translation", "INFO") utils.print_textbox(self.textbox_message_system_log, "Start translation", "INFO") else: + if ((self.checkbox_translation.get() is False) and + (self.checkbox_transcription_send.get() is False) and + (self.checkbox_transcription_receive.get() is False)): + self.button_config.configure(state="normal", fg_color=["#3B8ED0", "#1F6AA5"]) utils.print_textbox(self.textbox_message_log, "Stop translation", "INFO") utils.print_textbox(self.textbox_message_system_log, "Stop translation", "INFO") utils.save_json(self.PATH_CONFIG, "ENABLE_TRANSLATION", self.ENABLE_TRANSLATION) @@ -395,25 +438,66 @@ class App(customtkinter.CTk): def checkbox_transcription_send_callback(self): self.ENABLE_TRANSCRIPTION_SEND = self.checkbox_transcription_send.get() if self.ENABLE_TRANSCRIPTION_SEND is True: + self.button_config.configure(state="disabled", fg_color=["gray92", "gray14"]) + self.mic_audio_queue = queue.Queue() + mic_device = [device for device in audio_utils.get_input_device_list() if device["name"] == self.CHOICE_MIC_DEVICE][0] + self.mic_audio_recorder = audio_recorder.SelectedMicRecorder( + mic_device, + self.INPUT_MIC_ENERGY_THRESHOLD, + self.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD, + self.INPUT_MIC_RECORD_TIMEOUT, + ) + self.mic_audio_recorder.record_into_queue(self.mic_audio_queue) + self.mic_transcriber = audio_transcriber.AudioTranscriber( + speaker=False, + source=self.mic_audio_recorder.source, + language=languages.transcription_lang[self.INPUT_MIC_VOICE_LANGUAGE], + phrase_timeout=self.INPUT_MIC_PHRASE_TIMEOUT, + max_phrases=self.INPUT_MIC_MAX_PHRASES, + ) + def mic_transcript_to_chatbox(): + self.mic_transcriber.transcribe_audio_queue(self.mic_audio_queue) + message = self.mic_transcriber.get_transcript() + if len(message) > 0: + # translate + if self.checkbox_translation.get() is False: + voice_message = f"{message}" + elif self.translator.translator_status[self.CHOICE_TRANSLATOR] is False: + utils.print_textbox(self.textbox_message_log, "Auth Key or language setting is incorrect", "ERROR") + utils.print_textbox(self.textbox_message_system_log, "Auth Key or language setting is incorrect", "ERROR") + voice_message = f"{message}" + else: + result = self.translator.translate( + translator_name=self.CHOICE_TRANSLATOR, + source_language=self.INPUT_SOURCE_LANG, + target_language=self.INPUT_TARGET_LANG, + message=message + ) + voice_message = self.MESSAGE_FORMAT.replace("[message]", message).replace("[translation]", result) + + if self.checkbox_transcription_send.get() is True: + # send OSC message + osc_tools.send_message(voice_message, self.OSC_IP_ADDRESS, self.OSC_PORT) + # update textbox message log + utils.print_textbox(self.textbox_message_log, f"{voice_message}", "SEND") + utils.print_textbox(self.textbox_message_send_log, f"{voice_message}", "SEND") + + self.mic_print_transcript = utils.thread_fnc(mic_transcript_to_chatbox) + self.mic_print_transcript.daemon = True + self.mic_print_transcript.start() + utils.print_textbox(self.textbox_message_log, "Start voice2chatbox", "INFO") utils.print_textbox(self.textbox_message_system_log, "Start voice2chatbox", "INFO") - # start threading - self.vr.set_mic( - device_name=self.CHOICE_MIC_DEVICE, - threshold=int(self.INPUT_MIC_THRESHOLD), - is_dynamic=self.INPUT_MIC_IS_DYNAMIC, - language=self.INPUT_MIC_VOICE_LANGUAGE, - ) - self.vr.init_mic() - self.th_vr_listen_mic = utils.thread_fnc(self.vr_listen_mic) - self.th_vr_recognize_mic = utils.thread_fnc(self.vr_recognize_mic) - self.th_vr_listen_mic.start() - self.th_vr_recognize_mic.start() else: - if isinstance(self.th_vr_listen_mic, utils.thread_fnc): - self.th_vr_listen_mic.stop() - if isinstance(self.th_vr_recognize_mic, utils.thread_fnc): - self.th_vr_recognize_mic.stop() + if ((self.checkbox_translation.get() is False) and + (self.checkbox_transcription_send.get() is False) and + (self.checkbox_transcription_receive.get() is False)): + self.button_config.configure(state="normal", fg_color=["#3B8ED0", "#1F6AA5"]) + if isinstance(self.mic_print_transcript, utils.thread_fnc): + self.mic_print_transcript.stop() + if self.mic_audio_recorder.stop != None: + self.mic_audio_recorder.stop() + self.mic_audio_recorder.stop = None utils.print_textbox(self.textbox_message_log, "Stop voice2chatbox", "INFO") utils.print_textbox(self.textbox_message_system_log, "Stop voice2chatbox", "INFO") @@ -422,88 +506,70 @@ class App(customtkinter.CTk): def checkbox_transcription_receive_callback(self): self.ENABLE_TRANSCRIPTION_RECEIVE = self.checkbox_transcription_receive.get() if self.ENABLE_TRANSCRIPTION_RECEIVE is True: + self.button_config.configure(state="disabled", fg_color=["gray92", "gray14"]) + self.spk_audio_queue = queue.Queue() + spk_device = [device for device in audio_utils.get_output_device_list() if device["name"] == self.CHOICE_SPEAKER_DEVICE][0] + self.spk_audio_recorder = audio_recorder.SelectedSpeakerRecorder( + spk_device, + self.INPUT_SPEAKER_ENERGY_THRESHOLD, + self.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD, + self.INPUT_SPEAKER_RECORD_TIMEOUT, + ) + self.spk_audio_recorder.record_into_queue(self.spk_audio_queue) + self.spk_transcriber = audio_transcriber.AudioTranscriber( + speaker=True, + source=self.spk_audio_recorder.source, + language=languages.transcription_lang[self.INPUT_SPEAKER_VOICE_LANGUAGE], + phrase_timeout=self.INPUT_SPEAKER_PHRASE_TIMEOUT, + max_phrases=self.INPUT_SPEAKER_MAX_PHRASES, + ) + + def spk_transcript_to_textbox(): + self.spk_transcriber.transcribe_audio_queue(self.spk_audio_queue) + message = self.spk_transcriber.get_transcript() + if len(message) > 0: + # translate + if self.checkbox_translation.get() is False: + voice_message = f"{message}" + elif self.translator.translator_status[self.CHOICE_TRANSLATOR] is False: + utils.print_textbox(self.textbox_message_log, "Auth Key or language setting is incorrect", "ERROR") + utils.print_textbox(self.textbox_message_system_log, "Auth Key or language setting is incorrect", "ERROR") + voice_message = f"{message}" + else: + result = self.translator.translate( + translator_name=self.CHOICE_TRANSLATOR, + source_language=self.OUTPUT_SOURCE_LANG, + target_language=self.OUTPUT_TARGET_LANG, + message=message + ) + voice_message = self.MESSAGE_FORMAT.replace("[message]", message).replace("[translation]", result) + # send OSC message + # osc_tools.send_message(voice_message, self.OSC_IP_ADDRESS, self.OSC_PORT) + + if self.checkbox_transcription_receive.get() is True: + # update textbox message receive log + utils.print_textbox(self.textbox_message_log, f"{voice_message}", "RECEIVE") + utils.print_textbox(self.textbox_message_receive_log, f"{voice_message}", "RECEIVE") + + self.spk_print_transcript = utils.thread_fnc(spk_transcript_to_textbox) + self.spk_print_transcript.daemon = True + self.spk_print_transcript.start() utils.print_textbox(self.textbox_message_log, "Start speaker2log", "INFO") utils.print_textbox(self.textbox_message_system_log, "Start speaker2log", "INFO") - - self.vr.set_spk( - device_name=self.CHOICE_SPEAKER_DEVICE, - interval=int(self.INPUT_SPEAKER_INTERVAL), - language=self.INPUT_SPEAKER_VOICE_LANGUAGE, - ) - self.vr.init_spk() - self.vr.start_spk_recording() - self.th_vr_recognize_spk = utils.thread_fnc(self.vr_recognize_spk) - self.th_vr_recognize_spk.start() else: - if self.vr.spk_stream is not None: - self.vr.close_spk_stream() - if isinstance(self.th_vr_recognize_spk, utils.thread_fnc): - self.th_vr_recognize_spk.stop() - + if ((self.checkbox_translation.get() is False) and + (self.checkbox_transcription_send.get() is False) and + (self.checkbox_transcription_receive.get() is False)): + self.button_config.configure(state="normal", fg_color=["#3B8ED0", "#1F6AA5"]) + if isinstance(self.spk_print_transcript, utils.thread_fnc): + self.spk_print_transcript.stop() + if self.spk_audio_recorder.stop != None: + self.spk_audio_recorder.stop() + self.spk_audio_recorder.stop = None utils.print_textbox(self.textbox_message_log, "Stop speaker2log", "INFO") utils.print_textbox(self.textbox_message_system_log, "Stop speaker2log", "INFO") utils.save_json(self.PATH_CONFIG, "ENABLE_TRANSCRIPTION_RECEIVE", self.ENABLE_TRANSCRIPTION_RECEIVE) - def vr_listen_mic(self): - if self.checkbox_transcription_send.get() is True: - self.vr.listen_mic() - - def vr_recognize_mic(self): - message = self.vr.recognize_mic() - if len(message) > 0: - # translate - if self.checkbox_translation.get() is False: - voice_message = f"{message}" - elif self.translator.translator_status[self.CHOICE_TRANSLATOR] is False: - utils.print_textbox(self.textbox_message_log, "Auth Key or language setting is incorrect", "ERROR") - utils.print_textbox(self.textbox_message_system_log, "Auth Key or language setting is incorrect", "ERROR") - voice_message = f"{message}" - else: - result = self.translator.translate( - translator_name=self.CHOICE_TRANSLATOR, - source_language=self.INPUT_SOURCE_LANG, - target_language=self.INPUT_TARGET_LANG, - message=message - ) - voice_message = self.MESSAGE_FORMAT.replace("[message]", message).replace("[translation]", result) - - if self.checkbox_transcription_send.get() is True: - # send OSC message - osc_tools.send_message(voice_message, self.OSC_IP_ADDRESS, self.OSC_PORT) - # update textbox message log - utils.print_textbox(self.textbox_message_log, f"{voice_message}", "SEND") - utils.print_textbox(self.textbox_message_send_log, f"{voice_message}", "SEND") - - def vr_listen_spk(self): - if self.checkbox_transcription_receive.get() is True: - self.vr.listen_spk() - - def vr_recognize_spk(self): - message = self.vr.recognize_spk() - if len(message) > 0: - # translate - if self.checkbox_translation.get() is False: - voice_message = f"{message}" - elif self.translator.translator_status[self.CHOICE_TRANSLATOR] is False: - utils.print_textbox(self.textbox_message_log, "Auth Key or language setting is incorrect", "ERROR") - utils.print_textbox(self.textbox_message_system_log, "Auth Key or language setting is incorrect", "ERROR") - voice_message = f"{message}" - else: - result = self.translator.translate( - translator_name=self.CHOICE_TRANSLATOR, - source_language=self.OUTPUT_SOURCE_LANG, - target_language=self.OUTPUT_TARGET_LANG, - message=message - ) - voice_message = self.MESSAGE_FORMAT.replace("[message]", message).replace("[translation]", result) - # send OSC message - # osc_tools.send_message(voice_message, self.OSC_IP_ADDRESS, self.OSC_PORT) - - if self.checkbox_transcription_receive.get() is True: - # update textbox message receive log - utils.print_textbox(self.textbox_message_log, f"{voice_message}", "RECEIVE") - utils.print_textbox(self.textbox_message_receive_log, f"{voice_message}", "RECEIVE") - def checkbox_foreground_callback(self): self.ENABLE_FOREGROUND = self.checkbox_foreground.get() if self.ENABLE_FOREGROUND: diff --git a/audio_recorder.py b/audio_recorder.py new file mode 100644 index 00000000..34f990e5 --- /dev/null +++ b/audio_recorder.py @@ -0,0 +1,47 @@ +import speech_recognition as sr +import pyaudiowpatch as pyaudio +from datetime import datetime + +class BaseRecorder: + def __init__(self, source, energy_threshold, dynamic_energy_threshold, record_timeout): + self.recorder = sr.Recognizer() + self.recorder.energy_threshold = energy_threshold + self.recorder.dynamic_energy_threshold = dynamic_energy_threshold + self.record_timeout = record_timeout + self.stop = None + + if source is None: + raise ValueError("audio source can't be None") + + self.source = source + + def adjust_for_noise(self): + with self.source: + self.recorder.adjust_for_ambient_noise(self.source) + + def record_into_queue(self, audio_queue): + def record_callback(_, audio:sr.AudioData) -> None: + audio_queue.put((audio.get_raw_data(), datetime.now())) + + self.stop = self.recorder.listen_in_background(self.source, record_callback, phrase_time_limit=self.record_timeout) + +class SelectedMicRecorder(BaseRecorder): + def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout): + source=sr.Microphone( + device_index=device['index'], + sample_rate=int(device["defaultSampleRate"]), + ) + super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout) + self.adjust_for_noise() + +class SelectedSpeakerRecorder(BaseRecorder): + def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout): + + source = sr.Microphone(speaker=True, + device_index= device["index"], + sample_rate=int(device["defaultSampleRate"]), + chunk_size=pyaudio.get_sample_size(pyaudio.paInt16), + channels=device["maxInputChannels"] + ) + super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout) + self.adjust_for_noise() \ No newline at end of file diff --git a/audio_transcriber.py b/audio_transcriber.py new file mode 100644 index 00000000..695ee55f --- /dev/null +++ b/audio_transcriber.py @@ -0,0 +1,99 @@ +import io +import threading +import wave +import speech_recognition as sr +from datetime import timedelta +import pyaudiowpatch as pyaudio + +PHRASE_TIMEOUT = 3 +MAX_PHRASES = 10 + +class AudioTranscriber: + def __init__(self, speaker, source, language, phrase_timeout, max_phrases): + self.speaker = speaker + self.language = language + self.phrase_timeout = phrase_timeout + self.max_phrases = max_phrases + self.transcript_data = [] + self.transcript_changed_event = threading.Event() + self.audio_recognizer = sr.Recognizer() + self.audio_sources = { + "sample_rate": source.SAMPLE_RATE, + "sample_width": source.SAMPLE_WIDTH, + "channels": source.channels, + "last_sample": bytes(), + "last_spoken": None, + "new_phrase": True, + "process_data_func": self.process_speaker_data if speaker else self.process_speaker_data + } + + def transcribe_audio_queue(self, audio_queue): + # while True: + audio, time_spoken = audio_queue.get() + self.update_last_sample_and_phrase_status(audio, time_spoken) + + text = '' + try: + # fd, path = tempfile.mkstemp(suffix=".wav") + # os.close(fd) + audio_data = self.audio_sources["process_data_func"]() + text = self.audio_recognizer.recognize_google(audio_data, language=self.language) + except Exception as e: + pass + finally: + pass + # os.unlink(path) + + if text != '': + self.update_transcript(text) + + def update_last_sample_and_phrase_status(self, data, time_spoken): + source_info = self.audio_sources + if source_info["last_spoken"] and time_spoken - source_info["last_spoken"] > timedelta(seconds=self.phrase_timeout): + source_info["last_sample"] = bytes() + source_info["new_phrase"] = True + else: + source_info["new_phrase"] = False + + source_info["last_sample"] += data + source_info["last_spoken"] = time_spoken + + def process_mic_data(self): + audio_data = sr.AudioData(self.audio_sources["last_sample"], self.audio_sources["sample_rate"], self.audio_sources["sample_width"]) + return audio_data + + def process_speaker_data(self): + temp_file = io.BytesIO() + with wave.open(temp_file, 'wb') as wf: + wf.setnchannels(self.audio_sources["channels"]) + p = pyaudio.PyAudio() + wf.setsampwidth(p.get_sample_size(pyaudio.paInt16)) + wf.setframerate(self.audio_sources["sample_rate"]) + wf.writeframes(self.audio_sources["last_sample"]) + temp_file.seek(0) + with sr.AudioFile(temp_file) as source: + audio = self.audio_recognizer.record(source) + return audio + + def update_transcript(self, text): + source_info = self.audio_sources + transcript = self.transcript_data + + if source_info["new_phrase"] or len(transcript) == 0: + if len(transcript) > self.max_phrases: + transcript.pop(-1) + transcript.insert(0, text) + else: + transcript[0] = text + + def get_transcript(self): + if len(self.transcript_data) > 0: + text = self.transcript_data.pop(-1) + else: + text = "" + return text + + def clear_transcript_data(self): + self.transcript_data.clear() + self.audio_sources["last_sample"] = bytes() + self.audio_sources["new_phrase"] = True \ No newline at end of file diff --git a/audio_utils.py b/audio_utils.py new file mode 100644 index 00000000..fd56ae5c --- /dev/null +++ b/audio_utils.py @@ -0,0 +1,49 @@ +import pyaudiowpatch as pyaudio + +def get_input_device_list(): + devices = [] + with pyaudio.PyAudio() as p: + wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI) + for host_index in range(0, p.get_host_api_count()): + for device_index in range(0, p. get_host_api_info_by_index(host_index)['deviceCount']): + device = p.get_device_info_by_host_api_device_index(host_index, device_index) + if device["hostApi"] == wasapi_info["index"] and device["maxInputChannels"] > 0 and device["isLoopbackDevice"] is False: + devices.append(device) + return devices + +def get_output_device_list(): + devices =[] + with pyaudio.PyAudio() as p: + wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI) + for device in p.get_loopback_device_info_generator(): + if device["hostApi"] == wasapi_info["index"] and device["isLoopbackDevice"] is True: + devices.append(device) + return devices + +def get_default_input_device(): + with pyaudio.PyAudio() as p: + wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI) + defaultInputDevice = wasapi_info["defaultInputDevice"] + + for host_index in range(0, p.get_host_api_count()): + for device_index in range(0, p. get_host_api_info_by_index(host_index)['deviceCount']): + device = p.get_device_info_by_host_api_device_index(host_index, device_index) + if device["index"] == defaultInputDevice: + default_device = device + return default_device + +def get_default_output_device(): + with pyaudio.PyAudio() as p: + wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI) + defaultOutputDevice = wasapi_info["defaultOutputDevice"] + + for host_index in range(0, p.get_host_api_count()): + for device_index in range(0, p. get_host_api_info_by_index(host_index)['deviceCount']): + device = p.get_device_info_by_host_api_device_index(host_index, device_index) + if device["index"] == defaultOutputDevice: + default_speakers = device + if not default_speakers["isLoopbackDevice"]: + for loopback in p.get_loopback_device_info_generator(): + if default_speakers["name"] in loopback["name"]: + default_device = loopback + return default_device \ No newline at end of file diff --git a/languages.py b/languages.py index 5f512980..40b546f5 100644 --- a/languages.py +++ b/languages.py @@ -1,4 +1,4 @@ -recognize_lang = { +transcription_lang = { "Japanese Japan":"ja-JP", "English United States":"en-US", "English United Kingdom":"en-GB", @@ -90,7 +90,35 @@ recognize_lang = { "Zulu South Africa":"zu-ZA" } -deepl_lang = { +translators = ["DeepL(web)", "DeepL(auth)", "Google(web)", "Bing(web)"] +translation_lang = {} +translation_lang["DeepL(web)"] = { + "Japanese":"JA", + "English":"EN", + "Bulgarian":"BG", + "Chinese":"ZH", + "Czech":"CS", + "Danish":"DA", + "Dutch":"NL", + "Estonian":"ET", + "Finnish":"FI", + "French":"FR", + "German":"DE", + "Greek":"EL", + "Hungarian":"HU", + "Italian":"IT", + "Latvian":"LV", + "Lithuanian":"LT", + "Polish":"PL", + "Portuguese":"PT", + "Romanian":"RO", + "Russian":"RU", + "Slovak":"SK", + "Slovenian":"SL", + "Spanish":"ES", + "Swedish":"SV" +} +translation_lang["DeepL(auth)"] = { "Japanese":"ja", "English American":"en-US", "English British":"en-GB", @@ -125,98 +153,7 @@ deepl_lang = { "Ukrainian":"uk", "Chinese":"zh" } - -deepl_translate_lang = { - "Japanese":"JA", - "English":"EN", - "Bulgarian":"BG", - "Chinese":"ZH", - "Czech":"CS", - "Danish":"DA", - "Dutch":"NL", - "Estonian":"ET", - "Finnish":"FI", - "French":"FR", - "German":"DE", - "Greek":"EL", - "Hungarian":"HU", - "Italian":"IT", - "Latvian":"LV", - "Lithuanian":"LT", - "Polish":"PL", - "Portuguese":"PT", - "Romanian":"RO", - "Russian":"RU", - "Slovak":"SK", - "Slovenian":"SL", - "Spanish":"ES", - "Swedish":"SV" -} - -translators_bing_lang = { - "japanese":"ja", - "english":"en", - "chinese":"zh", - "arabic":"ar", - "russian":"ru", - "french":"fr", - "german":"de", - "spanish":"es", - "portuguese":"pt", - "italian":"it", - "korean":"ko", - "greek":"el", - "dutch":"nl", - "hindi":"hi", - "turkish":"tr", - "malay":"ms", - "thai":"th", - "vietnamese":"vi", - "indonesian":"id", - "hebrew":"he", - "polish":"pl", - "czech":"cs", - "hungarian":"hu", - "estonian":"et", - "bulgarian":"bg", - "danish":"da", - "finnish":"fi", - "romanian":"ro", - "swedish":"sv", - "slovenian":"sl", - "persian/farsi":"fa", - "bosnian":"bs", - "serbian":"sr", - "fijian":"fj", - "filipino":"tl", - "haitiancreole":"ht", - "catalan":"ca", - "croatian":"hr", - "latvian":"lv", - "lithuanian":"lt", - "urdu":"ur", - "ukrainian":"uk", - "welsh":"cy", - "tahiti":"ty", - "tongan":"to", - "swahili":"sw", - "samoan":"sm", - "slovak":"sk", - "afrikaans":"af", - "norwegian":"no", - "bengali":"bn", - "malagasy":"mg", - "maltese":"mt", - "queretaro otomi":"otq", - "klingon/tlhingan hol":"tlh", - "gujarati":"gu", - "tamil":"ta", - "telugu":"te", - "punjabi":"pa", - "irish":"ga" -} - -translators_google_lang = { +translation_lang["Google(web)"] = { "japanese":"ja", "english":"en", "chinese":"zh", @@ -279,4 +216,66 @@ translators_google_lang = { "esperanto":"eo", "basque":"eu", "irish":"ga" +} +translation_lang["Bing(web)"] = { + "japanese":"ja", + "english":"en", + "chinese":"zh", + "arabic":"ar", + "russian":"ru", + "french":"fr", + "german":"de", + "spanish":"es", + "portuguese":"pt", + "italian":"it", + "korean":"ko", + "greek":"el", + "dutch":"nl", + "hindi":"hi", + "turkish":"tr", + "malay":"ms", + "thai":"th", + "vietnamese":"vi", + "indonesian":"id", + "hebrew":"he", + "polish":"pl", + "czech":"cs", + "hungarian":"hu", + "estonian":"et", + "bulgarian":"bg", + "danish":"da", + "finnish":"fi", + "romanian":"ro", + "swedish":"sv", + "slovenian":"sl", + "persian/farsi":"fa", + "bosnian":"bs", + "serbian":"sr", + "fijian":"fj", + "filipino":"tl", + "haitiancreole":"ht", + "catalan":"ca", + "croatian":"hr", + "latvian":"lv", + "lithuanian":"lt", + "urdu":"ur", + "ukrainian":"uk", + "welsh":"cy", + "tahiti":"ty", + "tongan":"to", + "swahili":"sw", + "samoan":"sm", + "slovak":"sk", + "afrikaans":"af", + "norwegian":"no", + "bengali":"bn", + "malagasy":"mg", + "maltese":"mt", + "queretaro otomi":"otq", + "klingon/tlhingan hol":"tlh", + "gujarati":"gu", + "tamil":"ta", + "telugu":"te", + "punjabi":"pa", + "irish":"ga" } \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..1e4b81c7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +pillow +PyAudioWPatch +python-osc +customtkinter +deepl \ No newline at end of file diff --git a/transcription.py b/transcription.py deleted file mode 100644 index 98021064..00000000 --- a/transcription.py +++ /dev/null @@ -1,194 +0,0 @@ -import queue -import speech_recognition as sr -import pyaudiowpatch as pyaudio -import languages - -# VoiceRecognizer -class VoiceRecognizer(): - def __init__(self): - self.r = sr.Recognizer() - self.p = pyaudio.PyAudio() - - self.dict_languages = languages.recognize_lang - self.languages = list(self.dict_languages.keys()) - self.mic_device_name = None - self.mic_threshold = 50 - self.mic_is_dynamic = False - self.mic_language = "Japanese Japan" - self.mic_queue = queue.Queue(10) - - self.spk_device = None - self.spk_interval = 3 - self.spk_language = "Japanese Japan" - self.spk_stream = None - self.spk_queue = queue.Queue(10) - - def search_input_device(self): - devices = [] - with pyaudio.PyAudio() as p: - wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI) - for host_index in range(0, p.get_host_api_count()): - for device_index in range(0, p. get_host_api_info_by_index(host_index)['deviceCount']): - device = p.get_device_info_by_host_api_device_index(host_index, device_index) - if device["hostApi"] == wasapi_info["index"] and device["maxInputChannels"] > 0 and device["isLoopbackDevice"] is False: - devices.append(device) - return devices - - def search_output_device(self): - devices =[] - with pyaudio.PyAudio() as p: - wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI) - for host_index in range(0, p.get_host_api_count()): - for device_index in range(0, p. get_host_api_info_by_index(host_index)['deviceCount']): - device = p.get_device_info_by_host_api_device_index(host_index, device_index) - if device["hostApi"] == wasapi_info["index"] and device["isLoopbackDevice"] is True: - devices.append(device) - return devices - - def search_default_device(self): - with pyaudio.PyAudio() as p: - wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI) - defaultInputDevice, defaultOutputDevice = wasapi_info["defaultInputDevice"], wasapi_info["defaultOutputDevice"] - - for host_index in range(0, p.get_host_api_count()): - for device_index in range(0, p. get_host_api_info_by_index(host_index)['deviceCount']): - device = p.get_device_info_by_host_api_device_index(host_index, device_index) - if device["index"] == defaultInputDevice: - default_mics = device - name_mic = default_mics["name"] - break - - for host_index in range(0, p.get_host_api_count()): - for device_index in range(0, p. get_host_api_info_by_index(host_index)['deviceCount']): - device = p.get_device_info_by_host_api_device_index(host_index, device_index) - if device["index"] == defaultOutputDevice: - default_speakers = device - if not default_speakers["isLoopbackDevice"]: - for loopback in p.get_loopback_device_info_generator(): - if default_speakers["name"] in loopback["name"]: - name_spk = loopback["name"] - break - return name_mic, name_spk - - def set_mic(self, device_name, threshold=50, is_dynamic=False, language="Japanese Japan"): - input_device_list = self.search_input_device() - self.mic_device_name = [device["index"] for device in input_device_list if device["name"] == device_name][0] - self.mic_threshold = threshold - self.mic_is_dynamic = is_dynamic - self.mic_language = language - - def init_mic(self): - while self.mic_queue.empty() is False: - self.mic_queue.get() - - self.r.energy_threshold = self.mic_threshold - if self.mic_is_dynamic: - with self.mic as source: - self.r.adjust_for_ambient_noise(source, 3.0) - - def listen_mic(self): - with sr.Microphone(device_index=self.mic_device_name) as source: - audio = self.r.listen(source) - self.mic_queue.put(audio) - - def recognize_mic(self): - try: - audio = self.mic_queue.get() - text = self.r.recognize_google(audio, language=self.dict_languages[self.mic_language]) - except: - text = "" - return text - - def set_spk(self, device_name, interval=4, language="Japanese Japan"): - output_device_list = self.search_output_device() - self.spk_device = [device for device in output_device_list if device["name"] == device_name][0] - self.spk_interval = interval - self.spk_language = language - - def init_spk(self): - while self.spk_queue.empty() is False: - self.spk_queue.get() - - def spk_record_callback(self, in_data, frame_count, time_info, status): - self.spk_queue.put(in_data) - return (in_data, pyaudio.paContinue) - - def start_spk_recording(self): - self.close_spk_stream() - self.spk_stream = self.p.open(format=pyaudio.paInt16, - channels=self.spk_device["maxInputChannels"], - rate=int(self.spk_device["defaultSampleRate"]), - frames_per_buffer=int(self.spk_device["defaultSampleRate"])*self.spk_interval, - input=True, - input_device_index=self.spk_device["index"], - stream_callback=self.spk_record_callback - ) - - def stop_spk_stream(self): - self.spk_stream.stop_stream() - - def start_spk_stream(self): - self.spk_stream.start_stream() - - def close_spk_stream(self): - if self.spk_stream is not None: - self.spk_stream.stop_stream() - self.spk_stream.close() - self.spk_stream = None - - def recognize_spk(self): - try: - in_data = self.spk_queue.get() - audio_data = sr.AudioData(in_data, int(self.spk_device["defaultSampleRate"]), self.spk_interval) - text = self.r.recognize_google(audio_data, language=self.dict_languages[self.spk_language]) - except: - text = "" - return text - -if __name__ == "__main__": - # import queue - # import threading - - # mic_queue = queue.Queue() - # spk_queue = queue.Queue() - # vr = VoiceRecognizer(mic_queue, spk_queue) - - # mic_name, spk_name = vr.search_default_device() - # print("mic_name", mic_name) - # print("spk_name", spk_name) - - # ############################################################### - # vr.set_mic(device_name=mic_name, threshold=300, is_dynamic=False, language="ja-JP") - # vr.init_mic() - - # def vr_listen_mic(): - # while True: - # vr.listen_mic() - - # def vr_recognize_mic(): - # while True: - # text = vr.recognize_mic() - # if len(text) > 0: - # print(text) - # th_vr_listen_mic = threading.Thread(target=vr_listen_mic) - # th_vr_listen_mic.start() - # th_vr_recognize_mic = threading.Thread(target=vr_recognize_mic) - # th_vr_recognize_mic.start() - # ############################################################### - - # ############################################################### - # vr.set_spk(device_name=spk_name, interval=4, language="ja-JP") - # vr.start_spk_recording() - - # def vr_recognize_spk(): - # while True: - # text = vr.recognize_spk() - # if len(text) > 0: - # print(text) - # th_vr_recognize_spk = threading.Thread(target=vr_recognize_spk) - # th_vr_recognize_spk.start() - # ############################################################### - - vr = VoiceRecognizer() - print(vr.search_input_device()) - print(vr.search_default_device()) \ No newline at end of file diff --git a/translation.py b/translation.py index 58f8ad8c..50eda54e 100644 --- a/translation.py +++ b/translation.py @@ -6,24 +6,9 @@ import languages # Translator class Translator(): def __init__(self): - self.translator_status = { - "DeepL(web)": False, - "DeepL(auth)": False, - "Google(web)": False, - "Bing(web)": False, - } - - self.dict_languages = {} - self.dict_languages["DeepL(web)"] = languages.deepl_translate_lang - self.dict_languages["DeepL(auth)"] = languages.deepl_lang - self.dict_languages["Google(web)"] = languages.translators_google_lang - self.dict_languages["Bing(web)"] = languages.translators_bing_lang - - self.languages = {} - self.languages["DeepL(web)"] = list(self.dict_languages["DeepL(web)"].keys()) - self.languages["DeepL(auth)"] = list(self.dict_languages["DeepL(auth)"].keys()) - self.languages["Google(web)"] = list(self.dict_languages["Google(web)"].keys()) - self.languages["Bing(web)"] = list(self.dict_languages["Bing(web)"].keys()) + self.translator_status = {} + for translator in languages.translators: + self.translator_status[translator] = False self.deepl_client = None def authentication(self, translator_name, authkey=None): @@ -48,33 +33,35 @@ class Translator(): return result def translate(self, translator_name, source_language, target_language, message): - result = False + result = "" try: + source_language=languages.translation_lang[translator_name][source_language] + target_language=languages.translation_lang[translator_name][target_language] if translator_name == "DeepL(web)": result = deepl_translate.translate( - source_language=self.dict_languages["DeepL(web)"][source_language], - target_language=self.dict_languages["DeepL(web)"][target_language], + source_language=source_language, + target_language=target_language, text=message ) elif translator_name == "DeepL(auth)": result = self.deepl_client.translate_text( message, - source_lang=self.dict_languages["DeepL(auth)"][source_language], - target_lang=self.dict_languages["DeepL(auth)"][target_language], + source_lang=source_language, + target_lang=target_language, ).text elif translator_name == "Google(web)": result = ts.translate_text( query_text=message, translator="google", - from_language=self.dict_languages["Google(web)"][source_language], - to_language=self.dict_languages["Google(web)"][target_language], + from_language=source_language, + to_language=target_language, ) elif translator_name == "Bing(web)": result = ts.translate_text( query_text=message, translator="bing", - from_language=self.dict_languages["Bing(web)"][source_language], - to_language=self.dict_languages["Bing(web)"][target_language], + from_language=source_language, + to_language=target_language, ) except: pass diff --git a/utils.py b/utils.py index 234bacf7..473487fc 100644 --- a/utils.py +++ b/utils.py @@ -38,4 +38,4 @@ class thread_fnc(threading.Thread): while True: if self.stopped(): return - self.fnc() \ No newline at end of file + self.fnc(*self._args, **self._kwargs) \ No newline at end of file diff --git a/window_config.py b/window_config.py index b9314236..adbe90b9 100644 --- a/window_config.py +++ b/window_config.py @@ -2,6 +2,8 @@ import os import tkinter as tk import customtkinter import utils +import audio_utils +import languages class ToplevelWindowConfig(customtkinter.CTkToplevel): def __init__(self, parent, *args, **kwargs): @@ -31,13 +33,16 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): # tab UI ## slider transparency + row = 0 + padx = 5 + pady = 1 self.label_transparency = customtkinter.CTkLabel( self.tabview_config.tab("UI"), text="Transparency:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_transparency.grid(row=0, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_transparency.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") self.slider_transparency = customtkinter.CTkSlider( self.tabview_config.tab("UI"), from_=50, @@ -45,16 +50,17 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): command=self.slider_transparency_callback, variable=tk.DoubleVar(value=self.parent.TRANSPARENCY), ) - self.slider_transparency.grid(row=0, column=1, columnspan=1, padx=5, pady=10, sticky="nsew") + self.slider_transparency.grid(row=row, column=1, columnspan=1, padx=padx, pady=10, sticky="nsew") ## optionmenu theme + row += 1 self.label_appearance_theme = customtkinter.CTkLabel( self.tabview_config.tab("UI"), text="Appearance Theme:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_appearance_theme.grid(row=1, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_appearance_theme.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") self.optionmenu_appearance_theme = customtkinter.CTkOptionMenu( self.tabview_config.tab("UI"), values=["Light", "Dark", "System"], @@ -62,17 +68,18 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): variable=customtkinter.StringVar(value=self.parent.APPEARANCE_THEME), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_appearance_theme.grid(row=1, column=1, columnspan=1, padx=5, pady=5, sticky="nsew") + self.optionmenu_appearance_theme.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.optionmenu_appearance_theme._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) ## optionmenu UI scaling + row += 1 self.label_ui_scaling = customtkinter.CTkLabel( self.tabview_config.tab("UI"), text="UI Scaling:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_ui_scaling.grid(row=2, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_ui_scaling.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") self.optionmenu_ui_scaling = customtkinter.CTkOptionMenu( self.tabview_config.tab("UI"), values=["80%", "90%", "100%", "110%", "120%"], @@ -80,17 +87,18 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): variable=customtkinter.StringVar(value=self.parent.UI_SCALING), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_ui_scaling.grid(row=2, column=1, columnspan=1, padx=5, pady=5, sticky="nsew") + self.optionmenu_ui_scaling.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.optionmenu_ui_scaling._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) ## optionmenu font family + row += 1 self.label_font_family = customtkinter.CTkLabel( self.tabview_config.tab("UI"), text="Font Family:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_font_family.grid(row=3, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_font_family.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") font_families = list(tk.font.families()) self.optionmenu_font_family = customtkinter.CTkOptionMenu( self.tabview_config.tab("UI"), @@ -99,18 +107,21 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): variable=customtkinter.StringVar(value=self.parent.FONT_FAMILY), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_font_family.grid(row=3, column=1, columnspan=1, padx=5, pady=5, sticky="nsew") + self.optionmenu_font_family.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.optionmenu_font_family._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) # tab Translation ## optionmenu translation translator + row = 0 + padx = 5 + pady = 1 self.label_translation_translator = customtkinter.CTkLabel( self.tabview_config.tab("Translation"), text="Select Translator:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.label_translation_translator.grid(row=0, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_translation_translator.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") self.optionmenu_translation_translator = customtkinter.CTkOptionMenu( self.tabview_config.tab("Translation"), values=list(self.parent.translator.translator_status.keys()), @@ -118,27 +129,28 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): variable=customtkinter.StringVar(value=self.parent.CHOICE_TRANSLATOR), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_translation_translator.grid(row=0, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew") + self.optionmenu_translation_translator.grid(row=row, column=1, columnspan=3 ,padx=padx, pady=pady, sticky="nsew") self.optionmenu_translation_translator._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) ## optionmenu translation input language + row +=1 self.label_translation_input_language = customtkinter.CTkLabel( self.tabview_config.tab("Translation"), text="Send Language:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_translation_input_language.grid(row=1, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_translation_input_language.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") ## select translation input source language self.optionmenu_translation_input_source_language = customtkinter.CTkOptionMenu( self.tabview_config.tab("Translation"), command=self.optionmenu_translation_input_source_language_callback, - values=self.parent.translator.languages[self.parent.CHOICE_TRANSLATOR], + values=list(languages.translation_lang[self.parent.CHOICE_TRANSLATOR].keys()), variable=customtkinter.StringVar(value=self.parent.INPUT_SOURCE_LANG), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_translation_input_source_language.grid(row=1, column=1, columnspan=1, padx=5, pady=5, sticky="nsew") + self.optionmenu_translation_input_source_language.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.optionmenu_translation_input_source_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) ## label translation input arrow @@ -148,37 +160,38 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_translation_input_arrow.grid(row=1, column=2, columnspan=1, padx=5, pady=5, sticky="nsew") + self.label_translation_input_arrow.grid(row=row, column=2, columnspan=1, padx=padx, pady=pady, sticky="nsew") ## select translation input target language self.optionmenu_translation_input_target_language = customtkinter.CTkOptionMenu( self.tabview_config.tab("Translation"), command=self.optionmenu_translation_input_target_language_callback, - values=self.parent.translator.languages[self.parent.CHOICE_TRANSLATOR], + values=list(languages.translation_lang[self.parent.CHOICE_TRANSLATOR].keys()), variable=customtkinter.StringVar(value=self.parent.INPUT_TARGET_LANG), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_translation_input_target_language.grid(row=1, column=3, columnspan=1, padx=5, pady=5, sticky="nsew") + self.optionmenu_translation_input_target_language.grid(row=row, column=3, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.optionmenu_translation_input_target_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) ## optionmenu translation output language + row +=1 self.label_translation_output_language = customtkinter.CTkLabel( self.tabview_config.tab("Translation"), text="Receive Language:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_translation_output_language.grid(row=2, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_translation_output_language.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") ## select translation output source language self.optionmenu_translation_output_source_language = customtkinter.CTkOptionMenu( self.tabview_config.tab("Translation"), command=self.optionmenu_translation_output_source_language_callback, - values=self.parent.translator.languages[self.parent.CHOICE_TRANSLATOR], + values=list(languages.translation_lang[self.parent.CHOICE_TRANSLATOR].keys()), variable=customtkinter.StringVar(value=self.parent.OUTPUT_SOURCE_LANG), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_translation_output_source_language.grid(row=2, column=1, columnspan=1, padx=5, pady=5, sticky="nsew") + self.optionmenu_translation_output_source_language.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.optionmenu_translation_output_source_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) ## label translation output arrow @@ -188,211 +201,353 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_translation_output_arrow.grid(row=2, column=2, columnspan=1, padx=5, pady=5, sticky="nsew") + self.label_translation_output_arrow.grid(row=row, column=2, columnspan=1, padx=padx, pady=pady, sticky="nsew") ## select translation output target language self.optionmenu_translation_output_target_language = customtkinter.CTkOptionMenu( self.tabview_config.tab("Translation"), command=self.optionmenu_translation_output_target_language_callback, - values=self.parent.translator.languages[self.parent.CHOICE_TRANSLATOR], + values=list(languages.translation_lang[self.parent.CHOICE_TRANSLATOR].keys()), variable=customtkinter.StringVar(value=self.parent.OUTPUT_TARGET_LANG), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_translation_output_target_language.grid(row=2, column=3, columnspan=1, padx=5, pady=5, sticky="nsew") + self.optionmenu_translation_output_target_language.grid(row=row, column=3, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.optionmenu_translation_output_target_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) # tab Transcription ## optionmenu input mic device + row = 0 + padx = 5 + pady = 1 self.label_input_mic_device = customtkinter.CTkLabel( self.tabview_config.tab("Transcription"), text="Input Mic Device:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_input_mic_device.grid(row=0, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_input_mic_device.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") self.optionmenu_input_mic_device = customtkinter.CTkOptionMenu( self.tabview_config.tab("Transcription"), - values=[device["name"] for device in self.parent.vr.search_input_device()], + values=[device["name"] for device in audio_utils.get_input_device_list()], command=self.optionmenu_input_mic_device_callback, variable=customtkinter.StringVar(value=self.parent.CHOICE_MIC_DEVICE), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_input_mic_device.grid(row=0, column=1, columnspan=1 ,padx=5, pady=5, sticky="nsew") + self.optionmenu_input_mic_device.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") self.optionmenu_input_mic_device._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) ## optionmenu input mic voice language + row +=1 self.label_input_mic_voice_language = customtkinter.CTkLabel( self.tabview_config.tab("Transcription"), text="Input Mic Voice Language:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_input_mic_voice_language.grid(row=1, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_input_mic_voice_language.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") self.optionmenu_input_mic_voice_language = customtkinter.CTkOptionMenu( self.tabview_config.tab("Transcription"), - values=list(self.parent.vr.languages), + values=list(languages.transcription_lang.keys()), command=self.optionmenu_input_mic_voice_language_callback, variable=customtkinter.StringVar(value=self.parent.INPUT_MIC_VOICE_LANGUAGE), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_input_mic_voice_language.grid(row=1, column=1, columnspan=1 ,padx=5, pady=5, sticky="nsew") + self.optionmenu_input_mic_voice_language.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") self.optionmenu_input_mic_voice_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) - ## checkbox input mic in dynamic - self.label_input_mic_is_dynamic = customtkinter.CTkLabel( + ## entry input mic energy threshold + row +=1 + self.label_input_mic_energy_threshold = customtkinter.CTkLabel( self.tabview_config.tab("Transcription"), - text="Input Mic IsDynamic:", + text="Input Mic Energy Threshold:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_input_mic_is_dynamic.grid(row=2, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") - self.checkbox_input_mic_is_dynamic = customtkinter.CTkCheckBox( + self.label_input_mic_energy_threshold.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") + self.entry_input_mic_energy_threshold = customtkinter.CTkEntry( + self.tabview_config.tab("Transcription"), + textvariable=customtkinter.StringVar(value=self.parent.INPUT_MIC_ENERGY_THRESHOLD), + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.entry_input_mic_energy_threshold.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") + self.entry_input_mic_energy_threshold.bind("", self.entry_input_mic_energy_threshold_callback) + + ## checkbox input mic dynamic energy threshold + row +=1 + self.label_input_mic_dynamic_energy_threshold = customtkinter.CTkLabel( + self.tabview_config.tab("Transcription"), + text="Input Mic Dynamic Energy Threshold:", + fg_color="transparent", + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.label_input_mic_dynamic_energy_threshold.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") + self.checkbox_input_mic_dynamic_energy_threshold = customtkinter.CTkCheckBox( self.tabview_config.tab("Transcription"), text="", onvalue=True, offvalue=False, - command=self.checkbox_input_mic_is_dynamic_callback, + command=self.checkbox_input_mic_dynamic_energy_threshold_callback, font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.checkbox_input_mic_is_dynamic.grid(row=2, column=1, columnspan=1 ,padx=5, pady=5, sticky="nsew") - if self.parent.INPUT_MIC_IS_DYNAMIC is True: - self.checkbox_input_mic_is_dynamic.select() + self.checkbox_input_mic_dynamic_energy_threshold.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") + if self.parent.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD is True: + self.checkbox_input_mic_dynamic_energy_threshold.select() else: - self.checkbox_input_mic_is_dynamic.deselect() + self.checkbox_input_mic_dynamic_energy_threshold.deselect() - ## entry input mic threshold - self.label_input_mic_threshold = customtkinter.CTkLabel( + ## entry input mic record timeout + row +=1 + self.label_input_mic_record_timeout = customtkinter.CTkLabel( self.tabview_config.tab("Transcription"), - text="Input Mic Threshold:", + text="Input Mic Record Timeout:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_input_mic_threshold.grid(row=3, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") - self.entry_input_mic_threshold = customtkinter.CTkEntry( + self.label_input_mic_record_timeout.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") + self.entry_input_mic_record_timeout = customtkinter.CTkEntry( self.tabview_config.tab("Transcription"), - textvariable=customtkinter.StringVar(value=self.parent.INPUT_MIC_THRESHOLD), + textvariable=customtkinter.StringVar(value=self.parent.INPUT_MIC_RECORD_TIMEOUT), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.entry_input_mic_threshold.grid(row=3, column=1, columnspan=1 ,padx=5, pady=10, sticky="nsew") - self.entry_input_mic_threshold.bind("", self.entry_input_mic_threshold_callback) + self.entry_input_mic_record_timeout.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") + self.entry_input_mic_record_timeout.bind("", self.entry_input_mic_record_timeout_callback) + + ## entry input mic phrase timeout + row +=1 + self.label_input_mic_phrase_timeout = customtkinter.CTkLabel( + self.tabview_config.tab("Transcription"), + text="Input Mic Phrase Timeout:", + fg_color="transparent", + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.label_input_mic_phrase_timeout.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") + self.entry_input_mic_phrase_timeout = customtkinter.CTkEntry( + self.tabview_config.tab("Transcription"), + textvariable=customtkinter.StringVar(value=self.parent.INPUT_MIC_PHRASE_TIMEOUT), + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.entry_input_mic_phrase_timeout.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") + self.entry_input_mic_phrase_timeout.bind("", self.entry_input_mic_phrase_timeout_callback) + + ## entry input mic max phrases + row +=1 + self.label_input_mic_max_phrases = customtkinter.CTkLabel( + self.tabview_config.tab("Transcription"), + text="Input Mic Max Phrases:", + fg_color="transparent", + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.label_input_mic_max_phrases.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") + self.entry_input_mic_max_phrases = customtkinter.CTkEntry( + self.tabview_config.tab("Transcription"), + textvariable=customtkinter.StringVar(value=self.parent.INPUT_MIC_MAX_PHRASES), + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.entry_input_mic_max_phrases.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") + self.entry_input_mic_max_phrases.bind("", self.entry_input_mic_max_phrases_callback) ## optionmenu input speaker device + row +=1 self.label_input_speaker_device = customtkinter.CTkLabel( self.tabview_config.tab("Transcription"), text="Input Speaker Device:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_input_speaker_device.grid(row=4, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_input_speaker_device.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") self.optionmenu_input_speaker_device = customtkinter.CTkOptionMenu( self.tabview_config.tab("Transcription"), - values=[device["name"] for device in self.parent.vr.search_output_device()], + values=[device["name"] for device in audio_utils.get_output_device_list()], command=self.optionmenu_input_speaker_device_callback, variable=customtkinter.StringVar(value=self.parent.CHOICE_SPEAKER_DEVICE), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_input_speaker_device.grid(row=4, column=1, columnspan=1 ,padx=5, pady=5, sticky="nsew") + self.optionmenu_input_speaker_device.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") self.optionmenu_input_speaker_device._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) ## optionmenu input speaker voice language + row +=1 self.label_input_speaker_voice_language = customtkinter.CTkLabel( self.tabview_config.tab("Transcription"), text="Input Speaker Voice Language:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_input_speaker_voice_language.grid(row=5, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_input_speaker_voice_language.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") self.optionmenu_input_speaker_voice_language = customtkinter.CTkOptionMenu( self.tabview_config.tab("Transcription"), - values=list(self.parent.vr.languages), + values=list(languages.transcription_lang.keys()), command=self.optionmenu_input_speaker_voice_language_callback, variable=customtkinter.StringVar(value=self.parent.INPUT_SPEAKER_VOICE_LANGUAGE), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_input_speaker_voice_language.grid(row=5, column=1, columnspan=1 ,padx=5, pady=5, sticky="nsew") + self.optionmenu_input_speaker_voice_language.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") self.optionmenu_input_speaker_voice_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) - ## entry input speaker interval - self.label_input_speaker_interval = customtkinter.CTkLabel( + ## entry input speaker energy threshold + row +=1 + self.label_input_speaker_energy_threshold = customtkinter.CTkLabel( self.tabview_config.tab("Transcription"), - text="Input Speaker Interval:", + text="Input Speaker Energy Threshold:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_input_speaker_interval.grid(row=6, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") - self.entry_input_speaker_interval = customtkinter.CTkEntry( + self.label_input_speaker_energy_threshold.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") + self.entry_input_speaker_energy_threshold = customtkinter.CTkEntry( self.tabview_config.tab("Transcription"), - textvariable=customtkinter.StringVar(value=self.parent.INPUT_SPEAKER_INTERVAL), + textvariable=customtkinter.StringVar(value=self.parent.INPUT_SPEAKER_ENERGY_THRESHOLD), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.entry_input_speaker_interval.grid(row=6, column=1, columnspan=1 ,padx=5, pady=5, sticky="nsew") - self.entry_input_speaker_interval.bind("", self.entry_input_speaker_interval_callback) + self.entry_input_speaker_energy_threshold.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") + self.entry_input_speaker_energy_threshold.bind("", self.entry_input_speaker_energy_threshold_callback) + + ## checkbox input speaker dynamic energy threshold + row +=1 + self.label_input_speaker_dynamic_energy_threshold = customtkinter.CTkLabel( + self.tabview_config.tab("Transcription"), + text="Input Speaker Dynamic Energy Threshold:", + fg_color="transparent", + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.label_input_speaker_dynamic_energy_threshold.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") + self.checkbox_input_speaker_dynamic_energy_threshold = customtkinter.CTkCheckBox( + self.tabview_config.tab("Transcription"), + text="", + onvalue=True, + offvalue=False, + command=self.checkbox_input_speaker_dynamic_energy_threshold_callback, + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.checkbox_input_speaker_dynamic_energy_threshold.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") + if self.parent.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD is True: + self.checkbox_input_speaker_dynamic_energy_threshold.select() + else: + self.checkbox_input_speaker_dynamic_energy_threshold.deselect() + + ## entry input speaker record timeout + row +=1 + self.label_input_speaker_record_timeout = customtkinter.CTkLabel( + self.tabview_config.tab("Transcription"), + text="Input Speaker Record Timeout:", + fg_color="transparent", + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.label_input_speaker_record_timeout.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") + self.entry_input_speaker_record_timeout = customtkinter.CTkEntry( + self.tabview_config.tab("Transcription"), + textvariable=customtkinter.StringVar(value=self.parent.INPUT_SPEAKER_RECORD_TIMEOUT), + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.entry_input_speaker_record_timeout.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") + self.entry_input_speaker_record_timeout.bind("", self.entry_input_speaker_record_timeout_callback) + + ## entry input speaker phrase timeout + row +=1 + self.label_input_speaker_phrase_timeout = customtkinter.CTkLabel( + self.tabview_config.tab("Transcription"), + text="Input Speaker Phrase Timeout:", + fg_color="transparent", + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.label_input_speaker_phrase_timeout.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") + self.entry_input_speaker_phrase_timeout = customtkinter.CTkEntry( + self.tabview_config.tab("Transcription"), + textvariable=customtkinter.StringVar(value=self.parent.INPUT_SPEAKER_PHRASE_TIMEOUT), + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.entry_input_speaker_phrase_timeout.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") + self.entry_input_speaker_phrase_timeout.bind("", self.entry_input_speaker_phrase_timeout_callback) + + ## entry input speaker max phrases + row +=1 + self.label_input_speaker_max_phrases = customtkinter.CTkLabel( + self.tabview_config.tab("Transcription"), + text="Input Speaker Max Phrases:", + fg_color="transparent", + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.label_input_speaker_max_phrases.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") + self.entry_input_speaker_max_phrases = customtkinter.CTkEntry( + self.tabview_config.tab("Transcription"), + textvariable=customtkinter.StringVar(value=self.parent.INPUT_SPEAKER_MAX_PHRASES), + font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) + ) + self.entry_input_speaker_max_phrases.grid(row=row, column=1, columnspan=1 ,padx=padx, pady=pady, sticky="nsew") + self.entry_input_speaker_max_phrases.bind("", self.entry_input_speaker_max_phrases_callback) # tab Parameter ## entry ip address + row = 0 + padx = 5 + pady = 1 self.label_ip_address = customtkinter.CTkLabel( self.tabview_config.tab("Parameter"), text="OSC IP address:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_ip_address.grid(row=0, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_ip_address.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") self.entry_ip_address = customtkinter.CTkEntry( self.tabview_config.tab("Parameter"), textvariable=customtkinter.StringVar(value=self.parent.OSC_IP_ADDRESS), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.entry_ip_address.grid(row=0, column=1, columnspan=1, padx=1, pady=5, sticky="nsew") + self.entry_ip_address.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.entry_ip_address.bind("", self.entry_ip_address_callback) ## entry port + row +=1 self.label_port = customtkinter.CTkLabel( self.tabview_config.tab("Parameter"), text="OSC Port:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_port.grid(row=1, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_port.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") self.entry_port = customtkinter.CTkEntry( self.tabview_config.tab("Parameter"), textvariable=customtkinter.StringVar(value=self.parent.OSC_PORT), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.entry_port.grid(row=1, column=1, columnspan=1, padx=1, pady=5, sticky="nsew") + self.entry_port.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.entry_port.bind("", self.entry_port_callback) ## entry authkey + row +=1 self.label_authkey = customtkinter.CTkLabel( self.tabview_config.tab("Parameter"), text="DeepL Auth Key:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_authkey.grid(row=2, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_authkey.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") self.entry_authkey = customtkinter.CTkEntry( self.tabview_config.tab("Parameter"), textvariable=customtkinter.StringVar(value=self.parent.AUTH_KEYS["DeepL(auth)"]), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.entry_authkey.grid(row=2, column=1, columnspan=1, padx=1, pady=5, sticky="nsew") + self.entry_authkey.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.entry_authkey.bind("", self.entry_authkey_callback) ## entry message format + row +=1 self.label_message_format = customtkinter.CTkLabel( self.tabview_config.tab("Parameter"), text="Message Format:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_message_format.grid(row=3, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_message_format.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") self.entry_message_format = customtkinter.CTkEntry( self.tabview_config.tab("Parameter"), textvariable=customtkinter.StringVar(value=self.parent.MESSAGE_FORMAT), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.entry_message_format.grid(row=3, column=1, columnspan=1, padx=1, pady=5, sticky="nsew") + self.entry_message_format.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.entry_message_format.bind("", self.entry_message_format_callback) + self.protocol("WM_DELETE_WINDOW", self.delete_window) + def slider_transparency_callback(self, value): self.parent.wm_attributes("-alpha", value/100) self.parent.TRANSPARENCY = value @@ -449,17 +604,30 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): self.label_input_mic_voice_language.configure(font=customtkinter.CTkFont(family=choice)) self.optionmenu_input_mic_voice_language.configure(font=customtkinter.CTkFont(family=choice)) self.optionmenu_input_mic_voice_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=choice)) - self.label_input_mic_is_dynamic.configure(font=customtkinter.CTkFont(family=choice)) - self.label_input_mic_threshold.configure(font=customtkinter.CTkFont(family=choice)) - self.entry_input_mic_threshold.configure(font=customtkinter.CTkFont(family=choice)) + self.label_input_mic_energy_threshold.configure(font=customtkinter.CTkFont(family=choice)) + self.entry_input_mic_energy_threshold.configure(font=customtkinter.CTkFont(family=choice)) + self.label_input_mic_dynamic_energy_threshold.configure(font=customtkinter.CTkFont(family=choice)) + self.label_input_mic_record_timeout.configure(font=customtkinter.CTkFont(family=choice)) + self.entry_input_mic_record_timeout.configure(font=customtkinter.CTkFont(family=choice)) + self.label_input_mic_phrase_timeout.configure(font=customtkinter.CTkFont(family=choice)) + self.entry_input_mic_phrase_timeout.configure(font=customtkinter.CTkFont(family=choice)) + self.label_input_mic_max_phrases.configure(font=customtkinter.CTkFont(family=choice)) + self.entry_input_mic_max_phrases.configure(font=customtkinter.CTkFont(family=choice)) self.label_input_speaker_device.configure(font=customtkinter.CTkFont(family=choice)) self.optionmenu_input_speaker_device.configure(font=customtkinter.CTkFont(family=choice)) self.optionmenu_input_speaker_device._dropdown_menu.configure(font=customtkinter.CTkFont(family=choice)) self.label_input_speaker_voice_language.configure(font=customtkinter.CTkFont(family=choice)) self.optionmenu_input_speaker_voice_language.configure(font=customtkinter.CTkFont(family=choice)) self.optionmenu_input_speaker_voice_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=choice)) - self.label_input_speaker_interval.configure(font=customtkinter.CTkFont(family=choice)) - self.entry_input_speaker_interval.configure(font=customtkinter.CTkFont(family=choice)) + self.label_input_speaker_energy_threshold.configure(font=customtkinter.CTkFont(family=choice)) + self.entry_input_speaker_energy_threshold.configure(font=customtkinter.CTkFont(family=choice)) + self.label_input_speaker_dynamic_energy_threshold.configure(font=customtkinter.CTkFont(family=choice)) + self.label_input_speaker_record_timeout.configure(font=customtkinter.CTkFont(family=choice)) + self.entry_input_speaker_record_timeout.configure(font=customtkinter.CTkFont(family=choice)) + self.label_input_speaker_phrase_timeout.configure(font=customtkinter.CTkFont(family=choice)) + self.entry_input_speaker_phrase_timeout.configure(font=customtkinter.CTkFont(family=choice)) + self.label_input_speaker_max_phrases.configure(font=customtkinter.CTkFont(family=choice)) + self.entry_input_speaker_max_phrases.configure(font=customtkinter.CTkFont(family=choice)) # tab Parameter self.label_ip_address.configure(font=customtkinter.CTkFont(family=choice)) @@ -498,23 +666,23 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): utils.print_textbox(self.parent.textbox_message_system_log, "Auth Key or language setting is incorrect", "ERROR") else: self.optionmenu_translation_input_source_language.configure( - values=self.parent.translator.languages[choice], - variable=customtkinter.StringVar(value=self.parent.translator.languages[choice][0])) + values=list(languages.translation_lang[choice].keys()), + variable=customtkinter.StringVar(value=list(languages.translation_lang[choice].keys())[0])) self.optionmenu_translation_input_target_language.configure( - values=self.parent.translator.languages[choice], - variable=customtkinter.StringVar(value=self.parent.translator.languages[choice][1])) + values=list(languages.translation_lang[choice].keys()), + variable=customtkinter.StringVar(value=list(languages.translation_lang[choice].keys())[1])) self.optionmenu_translation_output_source_language.configure( - values=self.parent.translator.languages[choice], - variable=customtkinter.StringVar(value=self.parent.translator.languages[choice][1])) + values=list(languages.translation_lang[choice].keys()), + variable=customtkinter.StringVar(value=list(languages.translation_lang[choice].keys())[1])) self.optionmenu_translation_output_target_language.configure( - values=self.parent.translator.languages[choice], - variable=customtkinter.StringVar(value=self.parent.translator.languages[choice][0])) + values=list(languages.translation_lang[choice].keys()), + variable=customtkinter.StringVar(value=list(languages.translation_lang[choice].keys())[0])) self.parent.CHOICE_TRANSLATOR = choice - self.parent.INPUT_SOURCE_LANG = self.parent.translator.languages[choice][0] - self.parent.INPUT_TARGET_LANG = self.parent.translator.languages[choice][1] - self.parent.OUTPUT_SOURCE_LANG = self.parent.translator.languages[choice][1] - self.parent.OUTPUT_TARGET_LANG = self.parent.translator.languages[choice][0] + self.parent.INPUT_SOURCE_LANG = list(languages.translation_lang[choice].keys())[0] + self.parent.INPUT_TARGET_LANG = list(languages.translation_lang[choice].keys())[1] + self.parent.OUTPUT_SOURCE_LANG = list(languages.translation_lang[choice].keys())[1] + self.parent.OUTPUT_TARGET_LANG = list(languages.translation_lang[choice].keys())[0] utils.save_json(self.parent.PATH_CONFIG, "CHOICE_TRANSLATOR", self.parent.CHOICE_TRANSLATOR) utils.save_json(self.parent.PATH_CONFIG, "INPUT_SOURCE_LANG", self.parent.INPUT_SOURCE_LANG) utils.save_json(self.parent.PATH_CONFIG, "INPUT_TARGET_LANG", self.parent.INPUT_TARGET_LANG) @@ -545,14 +713,26 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): self.parent.INPUT_MIC_VOICE_LANGUAGE = choice utils.save_json(self.parent.PATH_CONFIG, "INPUT_MIC_VOICE_LANGUAGE", self.parent.INPUT_MIC_VOICE_LANGUAGE) - def checkbox_input_mic_is_dynamic_callback(self): - value = self.checkbox_input_mic_is_dynamic.get() - self.parent.INPUT_MIC_IS_DYNAMIC = value - utils.save_json(self.parent.PATH_CONFIG, "INPUT_MIC_IS_DYNAMIC", self.parent.INPUT_MIC_IS_DYNAMIC) + def entry_input_mic_energy_threshold_callback(self, event): + self.parent.INPUT_MIC_ENERGY_THRESHOLD = int(self.entry_input_mic_energy_threshold.get()) + utils.save_json(self.parent.PATH_CONFIG, "INPUT_MIC_ENERGY_THRESHOLD", self.parent.INPUT_MIC_ENERGY_THRESHOLD) - def entry_input_mic_threshold_callback(self, event): - self.parent.INPUT_MIC_THRESHOLD = int(self.entry_input_mic_threshold.get()) - utils.save_json(self.parent.PATH_CONFIG, "INPUT_MIC_THRESHOLD", self.parent.INPUT_MIC_THRESHOLD) + def checkbox_input_mic_dynamic_energy_threshold_callback(self): + value = self.checkbox_input_mic_dynamic_energy_threshold.get() + self.parent.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD = value + utils.save_json(self.parent.PATH_CONFIG, "INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD", self.parent.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD) + + def entry_input_mic_record_timeout_callback(self, event): + self.parent.INPUT_MIC_RECORD_TIMEOUT = int(self.entry_input_mic_record_timeout.get()) + utils.save_json(self.parent.PATH_CONFIG, "INPUT_MIC_RECORD_TIMEOUT", self.parent.INPUT_MIC_RECORD_TIMEOUT) + + def entry_input_mic_phrase_timeout_callback(self, event): + self.parent.INPUT_MIC_PHRASE_TIMEOUT = int(self.entry_input_mic_phrase_timeout.get()) + utils.save_json(self.parent.PATH_CONFIG, "INPUT_MIC_PHRASE_TIMEOUT", self.parent.INPUT_MIC_PHRASE_TIMEOUT) + + def entry_input_mic_max_phrases_callback(self, event): + self.parent.INPUT_MIC_MAX_PHRASES = int(self.entry_input_mic_max_phrases.get()) + utils.save_json(self.parent.PATH_CONFIG, "INPUT_MIC_MAX_PHRASES", self.parent.INPUT_MIC_MAX_PHRASES) def optionmenu_input_speaker_device_callback(self, choice): self.parent.CHOICE_SPEAKER_DEVICE = choice @@ -562,9 +742,26 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): self.parent.INPUT_SPEAKER_VOICE_LANGUAGE = choice utils.save_json(self.parent.PATH_CONFIG, "INPUT_SPEAKER_VOICE_LANGUAGE", self.parent.INPUT_SPEAKER_VOICE_LANGUAGE) - def entry_input_speaker_interval_callback(self, event): - self.parent.INPUT_SPEAKER_INTERVAL = int(self.entry_input_speaker_interval.get()) - utils.save_json(self.parent.PATH_CONFIG, "INPUT_SPEAKER_INTERVAL", self.parent.INPUT_SPEAKER_INTERVAL) + def entry_input_speaker_energy_threshold_callback(self, event): + self.parent.INPUT_SPEAKER_ENERGY_THRESHOLD = int(self.entry_input_speaker_energy_threshold.get()) + utils.save_json(self.parent.PATH_CONFIG, "INPUT_SPEAKER_ENERGY_THRESHOLD", self.parent.INPUT_SPEAKER_ENERGY_THRESHOLD) + + def checkbox_input_speaker_dynamic_energy_threshold_callback(self): + value = self.checkbox_input_speaker_dynamic_energy_threshold.get() + self.parent.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD = value + utils.save_json(self.parent.PATH_CONFIG, "INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD", self.parent.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD) + + def entry_input_speaker_record_timeout_callback(self, event): + self.parent.INPUT_SPEAKER_RECORD_TIMEOUT = int(self.entry_input_speaker_record_timeout.get()) + utils.save_json(self.parent.PATH_CONFIG, "INPUT_SPEAKER_RECORD_TIMEOUT", self.parent.INPUT_SPEAKER_RECORD_TIMEOUT) + + def entry_input_speaker_phrase_timeout_callback(self, event): + self.parent.INPUT_SPEAKER_PHRASE_TIMEOUT = int(self.entry_input_speaker_phrase_timeout.get()) + utils.save_json(self.parent.PATH_CONFIG, "INPUT_SPEAKER_PHRASE_TIMEOUT", self.parent.INPUT_SPEAKER_PHRASE_TIMEOUT) + + def entry_input_speaker_max_phrases_callback(self, event): + self.parent.INPUT_SPEAKER_MAX_PHRASES = int(self.entry_input_speaker_max_phrases.get()) + utils.save_json(self.parent.PATH_CONFIG, "INPUT_SPEAKER_MAX_PHRASES", self.parent.INPUT_SPEAKER_MAX_PHRASES) def entry_ip_address_callback(self, event): self.parent.OSC_IP_ADDRESS = self.entry_ip_address.get() @@ -585,6 +782,12 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): else: pass + def delete_window(self): + self.parent.checkbox_translation.configure(state="normal") + self.parent.checkbox_transcription_send.configure(state="normal") + self.parent.checkbox_transcription_receive.configure(state="normal") + self.parent.config_window.destroy() + def entry_message_format_callback(self, event): value = self.entry_message_format.get() if len(value) > 0: diff --git a/window_information.py b/window_information.py index 96c2afd2..f3039589 100644 --- a/window_information.py +++ b/window_information.py @@ -18,7 +18,7 @@ class ToplevelWindowInformation(customtkinter.CTkToplevel): font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) self.textbox_information.grid(row=0, column=0, padx=(10, 10), pady=(10, 10), sticky="nsew") - textbox_information_message = """VRCT(v1.1) + textbox_information_message = """VRCT(v1.2) # 概要 VRChatで使用されるChatBoxをOSC経由でメッセージを送信するツールになります。 @@ -66,13 +66,18 @@ VRChatで使用されるChatBoxをOSC経由でメッセージを送信するツ Send Language: 送信するメッセージに対して翻訳する言語[source, target]を選択 Receive Language: 受信したメッセージに対して翻訳する言語[source, target]を選択 Transcriptionタブ - Input Mic Device: 音声を入力するマイクを選択 + Input Mic Device: マイクを選択 Input Mic Voice Language: 入力する音声の言語 - Input Mic IsDynamic: マイクの自動調整 - Input Mic Threshold: 音声取得のしきい値 - Input Speaker Device: 音声を受信するスピーカーを選択 + Input Mic Energy Threshold: 音声取得のしきい値 + Input Mic Dynamic Energy Threshold: 音声取得のしきい値の自動調整 + Input Mic Record Timeout: 音声の区切りの無音時間 + Input Mic Max Phrases: 保留する単語の上限 + Input Speaker Device: スピーカーを選択 Input Speaker Voice Language: 受信する音声の言語 - Input Speaker Interval: 受信する音声の調整 + Input Speaker Energy Threshold: 音声取得のしきい値 + Input Speaker Dynamic Energy Threshold: 音声取得のしきい値の自動調整 + Input Speaker Record Timeout: 音声の区切りの無音時間 + Input Speaker Max Phrases: 保留する単語の上限 Parameterタブ OSC IP address: 変更不要 OSC port: 変更不要