diff --git a/.gitignore b/.gitignore index 75c28a41..52825c27 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,7 @@ VRCT.spec *.pyc logs/ .venv/ -weight/ +weights/ .vscode error.log *.exe diff --git a/config.py b/config.py index 9a7c4e83..638918da 100644 --- a/config.py +++ b/config.py @@ -98,6 +98,10 @@ class Config: def SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT(self): return self._SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT + @property + def SELECTABLE_WHISPER_WEIGHT_TYPE_DICT(self): + return self._SELECTABLE_WHISPER_WEIGHT_TYPE_DICT + @property def MAX_MIC_ENERGY_THRESHOLD(self): return self._MAX_MIC_ENERGY_THRESHOLD @@ -224,6 +228,15 @@ class Config: if isinstance(value, bool): self._IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION = value + @property + def IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER(self): + return self._IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER + + @IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER.setter + def IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER(self, value): + if isinstance(value, bool): + self._IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = value + # Save Json Data ## Main Window @property @@ -281,6 +294,17 @@ class Config: self._SELECTED_TAB_TARGET_LANGUAGES = value saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value) + @property + @json_serializable('SELECTED_TRANSCRIPTION_ENGINE') + def SELECTED_TRANSCRIPTION_ENGINE(self): + return self._SELECTED_TRANSCRIPTION_ENGINE + + @SELECTED_TRANSCRIPTION_ENGINE.setter + def SELECTED_TRANSCRIPTION_ENGINE(self, value): + if isinstance(value, str): + self._SELECTED_TRANSCRIPTION_ENGINE = value + saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value) + @property @json_serializable('IS_MAIN_WINDOW_SIDEBAR_COMPACT_MODE') def IS_MAIN_WINDOW_SIDEBAR_COMPACT_MODE(self): @@ -587,15 +611,37 @@ class Config: saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value) @property - @json_serializable('WEIGHT_TYPE') - def WEIGHT_TYPE(self): - return self._WEIGHT_TYPE + @json_serializable('USE_WHISPER_FEATURE') + def USE_WHISPER_FEATURE(self): + return self._USE_WHISPER_FEATURE - @WEIGHT_TYPE.setter - def WEIGHT_TYPE(self, value): + @USE_WHISPER_FEATURE.setter + def USE_WHISPER_FEATURE(self, value): + if isinstance(value, bool): + self._USE_WHISPER_FEATURE = value + saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value) + + @property + @json_serializable('CTRANSLATE2_WEIGHT_TYPE') + def CTRANSLATE2_WEIGHT_TYPE(self): + return self._CTRANSLATE2_WEIGHT_TYPE + + @CTRANSLATE2_WEIGHT_TYPE.setter + def CTRANSLATE2_WEIGHT_TYPE(self, value): # if isinstance(value, str) and value in self.SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT: if isinstance(value, str): - self._WEIGHT_TYPE = value + self._CTRANSLATE2_WEIGHT_TYPE = value + saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value) + + @property + @json_serializable('WHISPER_WEIGHT_TYPE') + def WHISPER_WEIGHT_TYPE(self): + return self._WHISPER_WEIGHT_TYPE + + @WHISPER_WEIGHT_TYPE.setter + def WHISPER_WEIGHT_TYPE(self, value): + if isinstance(value, str): + self._WHISPER_WEIGHT_TYPE = value saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value) @property @@ -774,6 +820,18 @@ class Config: "Small": "Small", "Large": "Large", } + + self._SELECTABLE_WHISPER_WEIGHT_TYPE_DICT = { + # {Save json str}: {i18n_placeholder} pairs + "tiny": "tiny", + "base": "base", + "small": "small", + "medium": "medium", + "large-v1": "large-v1", + "large-v2": "large-v2", + "large-v3": "large-v3", + } + self._MAX_MIC_ENERGY_THRESHOLD = 2000 self._MAX_SPEAKER_ENERGY_THRESHOLD = 4000 @@ -791,6 +849,7 @@ class Config: self._SENT_MESSAGES_LOG = [] self._CURRENT_SENT_MESSAGES_LOG_INDEX = 0 self._IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION = False + self._IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = False # Save Json Data ## Main Window @@ -815,6 +874,7 @@ class Config: "2":"English\n(United States)", "3":"English\n(United States)", } + self._SELECTED_TRANSCRIPTION_ENGINE = "Google" self._IS_MAIN_WINDOW_SIDEBAR_COMPACT_MODE = False ## Config Window @@ -851,7 +911,9 @@ class Config: "DeepL_API": None, } self._USE_TRANSLATION_FEATURE = True - self._WEIGHT_TYPE = "Small" + self._CTRANSLATE2_WEIGHT_TYPE = "Small" + self._USE_WHISPER_FEATURE = False + self._WHISPER_WEIGHT_TYPE = "base" self._SEND_MESSAGE_FORMAT = "[message]" self._SEND_MESSAGE_FORMAT_WITH_T = "[message]([translation])" self._RECEIVED_MESSAGE_FORMAT = "[message]" diff --git a/controller.py b/controller.py index 5a80c98f..e977a5ef 100644 --- a/controller.py +++ b/controller.py @@ -530,8 +530,8 @@ def callbackSetUseTranslationFeature(value): def callbackSetCtranslate2WeightType(value): print("callbackSetCtranslate2WeightType", value) - config.WEIGHT_TYPE = str(value) - view.updateSelectedCtranslate2WeightType(config.WEIGHT_TYPE) + config.CTRANSLATE2_WEIGHT_TYPE = str(value) + view.updateSelectedCtranslate2WeightType(config.CTRANSLATE2_WEIGHT_TYPE) view.setWidgetsStatus_changeWeightType_Pending() if model.checkCTranslatorCTranslate2ModelWeight(): config.IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION = False @@ -792,6 +792,33 @@ def callbackSetSpeakerMaxPhrases(value): except Exception: view.showErrorMessage_SpeakerMaxPhrases() +# Transcription (Internal AI Model) +def callbackSetUserWhisperFeature(value): + print("callbackSetUserWhisperFeature", value) + config.USE_WHISPER_FEATURE = value + if config.USE_WHISPER_FEATURE is True: + view.openWhisperWeightTypeWidget() + if model.checkTranscriptionWhisperModelWeight() is True: + config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = False + config.SELECTED_TRANSCRIPTION_ENGINE = "Whisper" + else: + config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = True + config.SELECTED_TRANSCRIPTION_ENGINE = "Google" + else: + view.closeWhisperWeightTypeWidget() + view.showRestartButtonIfRequired() + +def callbackSetWhisperWeightType(value): + print("callbackSetWhisperWeightType", value) + config.WHISPER_WEIGHT_TYPE = str(value) + view.updateSelectedWhisperWeightType(config.WHISPER_WEIGHT_TYPE) + if model.checkTranscriptionWhisperModelWeight() is True: + config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = False + config.SELECTED_TRANSCRIPTION_ENGINE = "Whisper" + else: + config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = True + config.SELECTED_TRANSCRIPTION_ENGINE = "Google" + view.showRestartButtonIfRequired() # Others Tab def callbackSetEnableAutoClearMessageBox(value): @@ -923,6 +950,12 @@ def createMainWindow(splash): # set Translation Engine updateTranslationEngineAndEngineList() + # set Transcription Engine + if config.USE_WHISPER_FEATURE is True: + config.SELECTED_TRANSCRIPTION_ENGINE = "Whisper" + else: + config.SELECTED_TRANSCRIPTION_ENGINE = "Google" + # set word filter model.addKeywords() @@ -1020,6 +1053,10 @@ def createMainWindow(splash): "callback_set_speaker_phrase_timeout": callbackSetSpeakerPhraseTimeout, "callback_set_speaker_max_phrases": callbackSetSpeakerMaxPhrases, + # Transcription Tab (Internal AI Model) + "callback_set_use_whisper_feature": callbackSetUserWhisperFeature, + "callback_set_whisper_weight_type": callbackSetWhisperWeightType, + # Others Tab "callback_set_enable_auto_clear_chatbox": callbackSetEnableAutoClearMessageBox, "callback_set_send_only_translated_messages": callbackSetEnableSendOnlyTranslatedMessages, diff --git a/install.bat b/install.bat index 036f6a51..8d2a5d51 100644 --- a/install.bat +++ b/install.bat @@ -1,4 +1,2 @@ python.exe -m pip install --upgrade pip -pip install -r requirements.txt -pip install git+https://github.com/misyaguziya/translators -pip install git+https://github.com/misyaguziya/custom_speech_recognition \ No newline at end of file +pip install -r requirements.txt \ No newline at end of file diff --git a/locales/en.yml b/locales/en.yml index 2806ea91..f68aa32c 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -79,6 +79,7 @@ config_window: transcription: Transcription transcription_mic: Mic transcription_speaker: Speaker + transcription_internal_model: Internal Model others: Others others_send_message_formats: Message Formats (Send) others_received_message_formats: Message Formats (Received) @@ -125,6 +126,21 @@ config_window: small: "Basic model (%{capacity})" large: "High accuracy model (%{capacity})" + use_whisper_feature: + label: Use Whisper Feature + desc: Description + + whisper_weight_type: + label: Select Whisper Model + desc: Description + tiny: "tiny model (%{capacity})" + base: "base model (%{capacity})" + small: "small model (%{capacity})" + medium: "medium model (%{capacity})" + large_v1: "large_v1 model (%{capacity})" + large_v2: "large_v2 model (%{capacity})" + large_v3: "large_v3 model (%{capacity})" + deepl_auth_key: label: DeepL Auth Key desc: Please select %{translator} on the main screen with DeepL_API when using. ※Some languages may not be supported. diff --git a/main.py b/main.py index 4810cbe5..6b6c0e3e 100644 --- a/main.py +++ b/main.py @@ -8,11 +8,16 @@ if __name__ == "__main__": splash.showSplash() from config import config - from models.translation.utils import downloadCTranslate2Weight + from models.translation.translation_utils import downloadCTranslate2Weight if config.USE_TRANSLATION_FEATURE is True: - downloadCTranslate2Weight(config.PATH_LOCAL, config.WEIGHT_TYPE, splash.updateDownloadProgress) + downloadCTranslate2Weight(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE, splash.updateDownloadProgress) + + from models.transcription.transcription_whisper import downloadWhisperWeight + if config.USE_WHISPER_FEATURE is True: + downloadWhisperWeight(config.PATH_LOCAL, config.WHISPER_WEIGHT_TYPE, splash.updateDownloadProgress) splash.toProgress(0) + import controller controller.createMainWindow(splash) splash.destroySplash() diff --git a/model.py b/model.py index 573659a7..5b17e167 100644 --- a/model.py +++ b/model.py @@ -23,7 +23,8 @@ from models.transcription.transcription_transcriber import AudioTranscriber from models.xsoverlay.notification import xsoverlayForVRCT from models.translation.translation_languages import translation_lang from models.transcription.transcription_languages import transcription_lang -from models.translation.utils import checkCTranslate2Weight +from models.translation.translation_utils import checkCTranslate2Weight +from models.transcription.transcription_whisper import checkWhisperWeight from config import config class threadFnc(Thread): @@ -65,14 +66,17 @@ class Model: self.speaker_energy_plot_progressbar = None self.translator = Translator() if config.USE_TRANSLATION_FEATURE is True: - self.translator.changeCTranslate2Model(config.PATH_LOCAL, config.WEIGHT_TYPE) + self.translator.changeCTranslate2Model(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE) self.keyword_processor = KeywordProcessor() def checkCTranslatorCTranslate2ModelWeight(self): - return checkCTranslate2Weight(config.PATH_LOCAL, config.WEIGHT_TYPE) + return checkCTranslate2Weight(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE) def changeTranslatorCTranslate2Model(self): - self.translator.changeCTranslate2Model(config.PATH_LOCAL, config.WEIGHT_TYPE) + self.translator.changeCTranslate2Model(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE) + + def checkTranscriptionWhisperModelWeight(self): + return checkWhisperWeight(config.PATH_LOCAL, config.WHISPER_WEIGHT_TYPE) def resetKeywordProcessor(self): del self.keyword_processor @@ -335,9 +339,11 @@ class Model: source=self.mic_audio_recorder.source, phrase_timeout=phase_timeout, max_phrases=config.INPUT_MIC_MAX_PHRASES, + root=config.PATH_LOCAL, + whisper_weight_type=config.WHISPER_WEIGHT_TYPE, ) def sendMicTranscript(): - mic_transcriber.transcribeAudioQueue(mic_audio_queue, config.SOURCE_LANGUAGE, config.SOURCE_COUNTRY) + mic_transcriber.transcribeAudioQueue(mic_audio_queue, config.SOURCE_LANGUAGE, config.SOURCE_COUNTRY, config.SELECTED_TRANSCRIPTION_ENGINE) message = mic_transcriber.getTranscript() try: fnc(message) @@ -416,9 +422,11 @@ class Model: source=self.speaker_audio_recorder.source, phrase_timeout=phase_timeout, max_phrases=config.INPUT_SPEAKER_MAX_PHRASES, + root=config.PATH_LOCAL, + whisper_weight_type=config.WHISPER_WEIGHT_TYPE, ) def sendSpeakerTranscript(): - speaker_transcriber.transcribeAudioQueue(speaker_audio_queue, config.TARGET_LANGUAGE, config.TARGET_COUNTRY) + speaker_transcriber.transcribeAudioQueue(speaker_audio_queue, config.TARGET_LANGUAGE, config.TARGET_COUNTRY, config.SELECTED_TRANSCRIPTION_ENGINE) message = speaker_transcriber.getTranscript() try: fnc(message) diff --git a/models/transcription/transcription_languages.py b/models/transcription/transcription_languages.py index 26f2c3f6..63d92568 100644 --- a/models/transcription/transcription_languages.py +++ b/models/transcription/transcription_languages.py @@ -1,177 +1,438 @@ transcription_lang = { "Afrikaans":{ - "South Africa":"af-ZA", + "South Africa":{ + "Google": "af-ZA", + "Whisper": "af", + }, }, "Arabic":{ - "Algeria":"ar-DZ", - "Bahrain":"ar-BH", - "Egypt":"ar-EG", - "Israel":"ar-IL", - "Iraq":"ar-IQ", - "Jordan":"ar-JO", - "Kuwait":"ar-KW", - "Lebanon":"ar-LB", - "Morocco":"ar-MA", - "Oman":"ar-OM", - "State of Palestine":"ar-PS", - "Qatar":"ar-QA", - "Saudi Arabia":"ar-SA", - "Tunisia":"ar-TN", - "United Arab Emirates":"ar-AE", + "Algeria":{ + "Google": "ar-DZ", + "Whisper": "ar", + }, + "Bahrain":{ + "Google": "ar-BH", + "Whisper": "ar", + }, + "Egypt":{ + "Google": "ar-EG", + "Whisper": "ar", + }, + "Israel":{ + "Google": "ar-IL", + "Whisper": "ar", + }, + "Iraq":{ + "Google": "ar-IQ", + "Whisper": "ar", + }, + "Jordan":{ + "Google": "ar-JO", + "Whisper": "ar", + }, + "Kuwait":{ + "Google": "ar-KW", + "Whisper": "ar", + }, + "Lebanon":{ + "Google": "ar-LB", + "Whisper": "ar", + }, + "Morocco":{ + "Google": "ar-MA", + "Whisper": "ar", + }, + "Oman":{ + "Google": "ar-OM", + "Whisper": "ar", + }, + "State of Palestine":{ + "Google": "ar-PS", + "Whisper": "ar", + }, + "Qatar":{ + "Google": "ar-QA", + "Whisper": "ar", + }, + "Saudi Arabia":{ + "Google": "ar-SA", + "Whisper": "ar", + }, + "Tunisia":{ + "Google": "ar-TN", + "Whisper": "ar", + }, + "United Arab Emirates":{ + "Google": "ar-AE", + "Whisper": "ar", + }, }, "Basque":{ - "Spain":"eu-ES", + "Spain":{ + "Google": "eu-ES", + "Whisper": "eu", + }, }, "Bulgarian":{ - "Bulgaria":"bg-BG", + "Bulgaria":{ + "Google": "bg-BG", + "Whisper": "bg", + }, }, "Catalan":{ - "Spain":"ca-ES", + "Spain":{ + "Google": "ca-ES", + "Whisper": "ca", + }, }, "Chinese":{ - "Mandarin (Simplified, China)":"cmn-Hans-CN", - "Mandarin (Simplified, Hong Kong)":"cmn-Hans-HK", - "Mandarin (Traditional, Taiwan)":"cmn-Hant-TW", - "Cantonese (Traditional Hong Kong)":"yue-Hant-HK", + "Mandarin (Simplified, China)":{ + "Google": "cmn-Hans-CN", + "Whisper": "zh", + }, + "Mandarin (Simplified, Hong Kong)":{ + "Google": "cmn-Hans-HK", + "Whisper": "zh", + }, + "Mandarin (Traditional, Taiwan)":{ + "Google": "cmn-Hant-TW", + "Whisper": "zh", + }, + "Cantonese (Traditional Hong Kong)":{ + "Google": "yue-Hant-HK", + "Whisper": "yue", + }, }, "Croatian":{ - "Croatia":"hr-HR", + "Croatia":{ + "Google": "hr-HR", + "Whisper": "hr", + }, }, "Czech":{ - "Czech Republic":"cs-CZ", + "Czech Republic":{ + "Google": "cs-CZ", + "Whisper": "cs", + }, }, "Danish":{ - "Denmark":"da-DK", + "Denmark":{ + "Google": "da-DK", + "Whisper": "da", + }, }, "Dutch":{ - "Netherlands":"nl-NL", + "Netherlands":{ + "Google": "nl-NL", + "Whisper": "nl", + }, }, "English": { - "United States":"en-US", - "United Kingdom":"en-GB", - "Australia":"en-AU", - "Canada":"en-CA", - "India":"en-IN", - "Ireland":"en-IE", - "New Zealand":"en-NZ", - "Philippines":"en-PH", - "South Africa":"en-ZA", + "United States":{ + "Google": "en-US", + "Whisper": "en", + }, + "United Kingdom":{ + "Google": "en-GB", + "Whisper": "en", + }, + "Australia":{ + "Google": "en-AU", + "Whisper": "en", + }, + "Canada":{ + "Google": "en-CA", + "Whisper": "en", + }, + "India":{ + "Google": "en-IN", + "Whisper": "en", + }, + "Ireland":{ + "Google": "en-IE", + "Whisper": "en", + }, + "New Zealand":{ + "Google": "en-NZ", + "Whisper": "en", + }, + "Philippines":{ + "Google": "en-PH", + "Whisper": "en", + }, + "South Africa":{ + "Google": "en-ZA", + "Whisper": "en", + }, }, "Filipino":{ - "Philippines":"fil-PH", + "Philippines":{ + "Google": "fil-PH", + "Whisper": "tl", + }, }, "Finnish":{ - "Finland":"fi-FI", + "Finland":{ + "Google": "fi-FI", + "Whisper": "fi", + }, }, "French":{ - "France":"fr-FR", + "France":{ + "Google": "fr-FR", + "Whisper": "fr", + }, }, "Galician":{ - "Spain":"gl-ES", + "Spain":{ + "Google": "gl-ES", + "Whisper": "gl", + }, }, "German":{ - "Germany":"de-DE", + "Germany":{ + "Google": "de-DE", + "Whisper": "de", + }, }, "Greek":{ - "Greece":"el-GR", + "Greece":{ + "Google": "el-GR", + "Whisper": "el", + }, }, "Hebrew":{ - "Israel":"he-IL", + "Israel":{ + "Google": "he-IL", + "Whisper": "he", + }, }, "Hindi": { - "India":"hi-IN", + "India":{ + "Google": "hi-IN", + "Whisper": "hi", + }, }, "Hungarian":{ - "Hungary":"hu-HU", + "Hungary":{ + "Google": "hu-HU", + "Whisper": "hu", + }, }, "Indonesian":{ - "Indonesia":"id-ID", + "Indonesia":{ + "Google": "id-ID", + "Whisper": "id", + }, }, "Icelandic":{ - "Iceland":"is-IS", + "Iceland":{ + "Google": "is-IS", + "Whisper": "is", + }, }, "Italian":{ - "Italy":"it-IT", - "Switzerland":"it-CH", + "Italy":{ + "Google": "it-IT", + "Whisper": "it", + }, + "Switzerland":{ + "Google": "it-CH", + "Whisper": "it", + }, }, "Japanese":{ - "Japan":"ja-JP", + "Japan":{ + "Google": "ja-JP", + "Whisper": "ja", + }, }, "Korean":{ - "South Korea":"ko-KR", + "South Korea":{ + "Google": "ko-KR", + "Whisper": "ko", + }, }, "Lithuanian":{ - "Lithuania":"lt-LT", + "Lithuania":{ + "Google": "lt-LT", + "Whisper": "lt", + }, }, "Malay":{ - "Malaysia":"ms-MY", + "Malaysia":{ + "Google": "ms-MY", + "Whisper": "ms", + }, }, "Norwegian":{ - "Norway":"nb-NO", + "Norway":{ + "Google": "nb-NO", + "Whisper": "no", + }, }, "Persian":{ - "Iran":"fa-IR", + "Iran":{ + "Google": "fa-IR", + "Whisper": "fa", + }, }, "Polish":{ - "Poland":"pl-PL", + "Poland":{ + "Google": "pl-PL", + "Whisper": "pl", + }, }, "Portuguese":{ - "Brazil":"pt-BR", - "Portugal":"pt-PT", + "Brazil":{ + "Google": "pt-BR", + "Whisper": "pt", + }, + "Portugal":{ + "Google": "pt-PT", + "Whisper": "pt", + }, }, "Romanian":{ - "Romania":"ro-RO", + "Romania":{ + "Google": "ro-RO", + "Whisper": "ro", + }, }, "Russian":{ - "Russia":"ru-RU", + "Russia":{ + "Google": "ru-RU", + "Whisper": "ru", + }, }, "Serbian":{ - "Serbia":"sr-RS", + "Serbia":{ + "Google": "sr-RS", + "Whisper": "sr", + }, }, "Slovak":{ - "Slovakia":"sk-SK", + "Slovakia":{ + "Google": "sk-SK", + "Whisper": "sk", + }, }, "Slovenian":{ - "Slovenia":"sl-SI", + "Slovenia":{ + "Google": "sl-SI", + "Whisper": "sl", + }, }, "Spanish":{ - "Argentina":"es-AR", - "Bolivia":"es-BO", - "Chile":"es-CL", - "Colombia":"es-CO", - "Costa Rica":"es-CR", - "Dominican Republic":"es-DO", - "Ecuador":"es-EC", - "El Salvador":"es-SV", - "Guatemala":"es-GT", - "Honduras":"es-HN", - "Mexico":"es-MX", - "Nicaragua":"es-NI", - "Panama":"es-PA", - "Paraguay":"es-PY", - "Peru":"es-PE", - "Puerto Rico":"es-PR", - "Spain":"es-ES", - "Uruguay":"es-UY", - "United States":"es-US", - "Venezuela":"es-VE", + "Argentina":{ + "Google": "es-AR", + "Whisper": "es", + }, + "Bolivia":{ + "Google": "es-BO", + "Whisper": "es", + }, + "Chile":{ + "Google": "es-CL", + "Whisper": "es", + }, + "Colombia":{ + "Google": "es-CO", + "Whisper": "es", + }, + "Costa Rica":{ + "Google": "es-CR", + "Whisper": "es", + }, + "Dominican Republic":{ + "Google": "es-DO", + "Whisper": "es", + }, + "Ecuador":{ + "Google": "es-EC", + "Whisper": "es", + }, + "El Salvador":{ + "Google": "es-SV", + "Whisper": "es", + }, + "Guatemala":{ + "Google": "es-GT", + "Whisper": "es", + }, + "Honduras":{ + "Google": "es-HN", + "Whisper": "es", + }, + "Mexico":{ + "Google": "es-MX", + "Whisper": "es", + }, + "Nicaragua":{ + "Google": "es-NI", + "Whisper": "es", + }, + "Panama":{ + "Google": "es-PA", + "Whisper": "es", + }, + "Paraguay":{ + "Google": "es-PY", + "Whisper": "es", + }, + "Peru":{ + "Google": "es-PE", + "Whisper": "es", + }, + "Puerto Rico":{ + "Google": "es-PR", + "Whisper": "es", + }, + "Spain":{ + "Google": "es-ES", + "Whisper": "es", + }, + "Uruguay":{ + "Google": "es-UY", + "Whisper": "es", + }, + "United States":{ + "Google": "es-US", + "Whisper": "es", + }, + "Venezuela":{ + "Google": "es-VE", + "Whisper": "es", + }, }, "Swedish":{ - "Sweden":"sv-SE", + "Sweden":{ + "Google": "sv-SE", + "Whisper": "sv", + }, }, "Thai":{ - "Thailand":"th-TH", + "Thailand":{ + "Google": "th-TH", + "Whisper": "th", + }, }, "Turkish":{ - "Turkey":"tr-TR", + "Turkey":{ + "Google": "tr-TR", + "Whisper": "tr", + }, }, "Ukrainian":{ - "Ukraine":"uk-UA", + "Ukraine":{ + "Google": "uk-UA", + "Whisper": "uk", + }, }, "Vietnamese":{ - "Vietnam":"vi-VN", - }, - "Zulu":{ - "South Africa":"zu-ZA" + "Vietnam":{ + "Google": "vi-VN", + "Whisper": "vi", + }, }, } \ No newline at end of file diff --git a/models/transcription/transcription_transcriber.py b/models/transcription/transcription_transcriber.py index bf78566e..08cc6a1a 100644 --- a/models/transcription/transcription_transcriber.py +++ b/models/transcription/transcription_transcriber.py @@ -5,12 +5,16 @@ from speech_recognition import Recognizer, AudioData, AudioFile from datetime import timedelta from pyaudiowpatch import get_sample_size, paInt16 from .transcription_languages import transcription_lang +from .transcription_whisper import getWhisperModel, checkWhisperWeight + +import torch +import numpy as np PHRASE_TIMEOUT = 3 MAX_PHRASES = 10 class AudioTranscriber: - def __init__(self, speaker, source, phrase_timeout, max_phrases): + def __init__(self, speaker, source, phrase_timeout, max_phrases, root=None, whisper_weight_type=None, ): self.speaker = speaker self.phrase_timeout = phrase_timeout self.max_phrases = max_phrases @@ -26,23 +30,51 @@ class AudioTranscriber: "new_phrase": True, "process_data_func": self.processSpeakerData if speaker else self.processSpeakerData } + if whisper_weight_type is not None and root is not None and checkWhisperWeight(root, whisper_weight_type) is True: + self.whisper_model = getWhisperModel(root, whisper_weight_type) + else: + self.whisper_model = None - def transcribeAudioQueue(self, audio_queue, language, country): - # while True: + def transcribeAudioQueue(self, audio_queue, language, country, transcription_engine): audio, time_spoken = audio_queue.get() self.updateLastSampleAndPhraseStatus(audio, time_spoken) text = '' try: - # fd, path = tempfile.mkstemp(suffix=".wav") - # os.close(fd) + # Whisperが使用できない場合はGoogle Speech-to-Textを使用する + if transcription_engine == "Whisper": + if self.whisper_model is None: + transcription_engine = "Google" + audio_data = self.audio_sources["process_data_func"]() - text = self.audio_recognizer.recognize_google(audio_data, language=transcription_lang[language][country]) + match transcription_engine: + case "Google": + text = self.audio_recognizer.recognize_google(audio_data, language=transcription_lang[language][country][transcription_engine]) + case "Whisper": + audio_data = np.frombuffer(audio_data.get_raw_data(convert_rate=16000, convert_width=2), np.int16).flatten().astype(np.float32) / 32768.0 + if isinstance(audio_data, torch.Tensor): + audio_data = audio_data.detach().numpy() + segments, _ = self.whisper_model.transcribe( + audio_data, + beam_size=5, + temperature=0.0, + log_prob_threshold=-0.8, + no_speech_threshold=0.6, + language=transcription_lang[language][country][transcription_engine], + word_timestamps=False, + without_timestamps=True, + task="transcribe", + vad_filter=False, + ) + for s in segments: + if s.avg_logprob < -0.8 or s.no_speech_prob > 0.6: + continue + text += s.text + except Exception: pass finally: pass - # os.unlink(path) if text != '': self.updateTranscript(text) diff --git a/models/transcription/transcription_whisper.py b/models/transcription/transcription_whisper.py new file mode 100644 index 00000000..c6412d35 --- /dev/null +++ b/models/transcription/transcription_whisper.py @@ -0,0 +1,98 @@ +from os import path as os_path, makedirs as os_makedirs +from requests import get as requests_get +from typing import Callable +import huggingface_hub +from faster_whisper import WhisperModel +import logging +logger = logging.getLogger('faster_whisper') +logger.setLevel(logging.CRITICAL) + +_MODELS = { + "tiny": "Systran/faster-whisper-tiny", + "base": "Systran/faster-whisper-base", + "small": "Systran/faster-whisper-small", + "medium": "Systran/faster-whisper-medium", + "large-v1": "Systran/faster-whisper-large-v1", + "large-v2": "Systran/faster-whisper-large-v2", + "large-v3": "Systran/faster-whisper-large-v3", +} + +_FILENAMES = [ + "config.json", + "preprocessor_config.json", + "model.bin", + "tokenizer.json", + "vocabulary.txt", + "vocabulary.json", +] + +def downloadFile(url, path, func=None): + try: + res = requests_get(url, stream=True) + res.raise_for_status() + file_size = int(res.headers.get('content-length', 0)) + total_chunk = 0 + with open(os_path.join(path), 'wb') as file: + for chunk in res.iter_content(chunk_size=1024*5): + file.write(chunk) + if isinstance(func, Callable): + total_chunk += len(chunk) + func(total_chunk/file_size) + + except Exception as e: + print("error:downloadFile()", e) + +def checkWhisperWeight(root, weight_type): + path = os_path.join(root, "weights", "whisper", weight_type) + result = False + try: + WhisperModel( + path, + device="cpu", + device_index=0, + compute_type="int8", + cpu_threads=4, + num_workers=1, + local_files_only=True, + ) + result = True + except Exception: + pass + return result + +def downloadWhisperWeight(root, weight_type, callbackFunc): + path = os_path.join(root, "weights", "whisper", weight_type) + os_makedirs(path, exist_ok=True) + if checkWhisperWeight(root, weight_type) is True: + return + + for filename in _FILENAMES: + print("Downloading", filename, "...") + file_path = os_path.join(path, filename) + url = huggingface_hub.hf_hub_url(_MODELS[weight_type], filename) + downloadFile(url, file_path, func=callbackFunc) + +def getWhisperModel(root, weight_type): + path = os_path.join(root, "weights", "whisper", weight_type) + return WhisperModel( + path, + device="cpu", + device_index=0, + compute_type="int8", + cpu_threads=4, + num_workers=1, + local_files_only=True, + ) + +if __name__ == "__main__": + def callback(value): + print(value) + pass + + downloadWhisperWeight("./", "tiny", callback) + downloadWhisperWeight("./", "base", callback) + downloadWhisperWeight("./", "small", callback) + downloadWhisperWeight("./", "medium", callback) + downloadWhisperWeight("./", "large-v1", callback) + downloadWhisperWeight("./", "large-v2", callback) + downloadWhisperWeight("./", "large-v3", callback) \ No newline at end of file diff --git a/models/translation/translation_translator.py b/models/translation/translation_translator.py index ea02e490..c966c672 100644 --- a/models/translation/translation_translator.py +++ b/models/translation/translation_translator.py @@ -2,7 +2,7 @@ import os from deepl import Translator as deepl_Translator from translators import translate_text as other_web_Translator from .translation_languages import translation_lang -from .utils import ctranslate2_weights +from .translation_utils import ctranslate2_weights import ctranslate2 import transformers @@ -27,8 +27,8 @@ class Translator(): def changeCTranslate2Model(self, path, model_type): directory_name = ctranslate2_weights[model_type]["directory_name"] tokenizer = ctranslate2_weights[model_type]["tokenizer"] - weight_path = os.path.join(path, "weight", directory_name) - tokenizer_path = os.path.join(path, "weight", directory_name, "tokenizer") + weight_path = os.path.join(path, "weights", "ctranslate2", directory_name) + tokenizer_path = os.path.join(path, "weights", "ctranslate2", directory_name, "tokenizer") self.ctranslate2_translator = ctranslate2.Translator( weight_path, device="cpu", @@ -41,7 +41,7 @@ class Translator(): self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path) except Exception as e: print("Error: changeCTranslate2Model()", e) - tokenizer_path = os.path.join("./weight", directory_name, "tokenizer") + tokenizer_path = os.path.join("./weights", "ctranslate2", directory_name, "tokenizer") self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path) @staticmethod diff --git a/models/translation/utils.py b/models/translation/translation_utils.py similarity index 78% rename from models/translation/utils.py rename to models/translation/translation_utils.py index d47401cf..73805cdc 100644 --- a/models/translation/utils.py +++ b/models/translation/translation_utils.py @@ -39,36 +39,36 @@ def calculate_file_hash(file_path, block_size=65536): return hash_object.hexdigest() def checkCTranslate2Weight(path, weight_type="Small"): - directory_name = 'weight' - current_directory = path weight_directory_name = ctranslate2_weights[weight_type]["directory_name"] hash_data = ctranslate2_weights[weight_type]["hash"] - files = ["model.bin", "sentencepiece.model", "shared_vocabulary.txt"] + files = [ + "model.bin", + "sentencepiece.model", + "shared_vocabulary.txt" + ] # check already downloaded already_downloaded = False - if all(os_path.exists(os_path.join(current_directory, directory_name, weight_directory_name, file)) for file in files): + if all(os_path.exists(os_path.join(path, weight_directory_name, file)) for file in files): # check hash for file in files: original_hash = hash_data[file] - current_hash = calculate_file_hash(os_path.join(current_directory, directory_name, weight_directory_name, file)) + current_hash = calculate_file_hash(os_path.join(path, weight_directory_name, file)) if original_hash != current_hash: break already_downloaded = True return already_downloaded -def downloadCTranslate2Weight(path, weight_type="Small", func=None): +def downloadCTranslate2Weight(root, weight_type="Small", func=None): url = ctranslate2_weights[weight_type]["url"] - filename = 'weight.zip' - directory_name = 'weight' - current_directory = path + filename = "weight.zip" + path = os_path.join(root, "weights", "ctranslate2") + os_makedirs(path, exist_ok=True) if checkCTranslate2Weight(path, weight_type): return try: - os_makedirs(os_path.join(current_directory, directory_name), exist_ok=True) - print(os_path.join(current_directory, directory_name)) with tempfile.TemporaryDirectory() as tmp_path: res = requests_get(url, stream=True) file_size = int(res.headers.get('content-length', 0)) @@ -81,6 +81,6 @@ def downloadCTranslate2Weight(path, weight_type="Small", func=None): func(total_chunk/file_size) with ZipFile(os_path.join(tmp_path, filename)) as zf: - zf.extractall(os_path.join(current_directory, directory_name)) + zf.extractall(path) except Exception as e: print("error:downloadCTranslate2Weight()", e) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index b6e14d85..cedd1568 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,9 @@ pyyaml == 6.0.1 python-i18n == 0.3.9 CTkToolTip == 0.8 pyinstaller==6.2.0 -transformers[torch] +transformers[torch]==4.37.2 sentencepiece==0.1.99 -ctranslate2==3.21.0 \ No newline at end of file +ctranslate2==3.24.0 +faster-whisper==0.10.0 +translators @ git+https://github.com/misyaguziya/translators@master +SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@master \ No newline at end of file diff --git a/view.py b/view.py index 720648ce..91074d93 100644 --- a/view.py +++ b/view.py @@ -29,6 +29,7 @@ class View(): font_family=config.FONT_FAMILY, ui_language=config.UI_LANGUAGE, is_reset_button_displayed_for_translation=config.IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION, + is_reset_button_displayed_for_whisper=config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER, ) if config.ENABLE_SPEAKER2CHATBOX is False: @@ -213,6 +214,7 @@ class View(): VAR_SIDE_MENU_LABEL_TRANSCRIPTION=StringVar(value=i18n.t("config_window.side_menu_labels.transcription")), VAR_SECOND_TITLE_TRANSCRIPTION_MIC=StringVar(value=i18n.t("config_window.side_menu_labels.transcription_mic")), VAR_SECOND_TITLE_TRANSCRIPTION_SPEAKER=StringVar(value=i18n.t("config_window.side_menu_labels.transcription_speaker")), + VAR_SECOND_TITLE_TRANSCRIPTION_INTERNAL_MODEL=StringVar(value=i18n.t("config_window.side_menu_labels.transcription_internal_model")), VAR_SIDE_MENU_LABEL_OTHERS=StringVar(value=i18n.t("config_window.side_menu_labels.others")), VAR_SIDE_MENU_LABEL_ADVANCED_SETTINGS=StringVar(value=i18n.t("config_window.side_menu_labels.advanced_settings")), @@ -282,7 +284,7 @@ class View(): VAR_DESC_CTRANSLATE2_WEIGHT_TYPE=StringVar(value=i18n.t("config_window.ctranslate2_weight_type.desc")), DICT_CTRANSLATE2_WEIGHT_TYPE=self.getSelectableCtranslate2WeightTypeDict(), CALLBACK_SET_CTRANSLATE2_WEIGHT_TYPE=None, - VAR_CTRANSLATE2_WEIGHT_TYPE=StringVar(value=self.getSelectableCtranslate2WeightTypeDict()[config.WEIGHT_TYPE]), + VAR_CTRANSLATE2_WEIGHT_TYPE=StringVar(value=self.getSelectableCtranslate2WeightTypeDict()[config.CTRANSLATE2_WEIGHT_TYPE]), VAR_LABEL_DEEPL_AUTH_KEY=StringVar(value=i18n.t( "config_window.deepl_auth_key.label")), VAR_DESC_DEEPL_AUTH_KEY=StringVar( @@ -383,6 +385,19 @@ class View(): CALLBACK_FOCUS_OUT_SPEAKER_MAX_PHRASES=self.callbackBindFocusOut_SpeakerMaxPhrases, + # Transcription Tab (Whisper Internal AI Model) + VAR_LABEL_USE_WHISPER_FEATURE=StringVar(value=i18n.t("config_window.use_whisper_feature.label")), + VAR_DESC_USE_WHISPER_FEATURE=StringVar(value=i18n.t("config_window.use_whisper_feature.desc")), + CALLBACK_SET_USE_WHISPER_FEATURE=None, + VAR_USE_WHISPER_FEATURE=BooleanVar(value=config.USE_WHISPER_FEATURE), + + VAR_LABEL_WHISPER_WEIGHT_TYPE=StringVar(value=i18n.t("config_window.whisper_weight_type.label")), + VAR_DESC_WHISPER_WEIGHT_TYPE=StringVar(value=i18n.t("config_window.whisper_weight_type.desc")), + DICT_WHISPER_WEIGHT_TYPE=self.getSelectableWhisperWeightTypeDict(), + CALLBACK_SET_WHISPER_WEIGHT_TYPE=None, + VAR_WHISPER_WEIGHT_TYPE=StringVar(value=self.getSelectableWhisperWeightTypeDict()[config.WHISPER_WEIGHT_TYPE]), + + # Others Tab VAR_LABEL_ENABLE_AUTO_CLEAR_MESSAGE_BOX=StringVar(value=i18n.t("config_window.auto_clear_the_message_box.label")), VAR_DESC_ENABLE_AUTO_CLEAR_MESSAGE_BOX=None, @@ -630,6 +645,11 @@ class View(): self.view_variable.CALLBACK_SET_SPEAKER_PHRASE_TIMEOUT = config_window_registers.get("callback_set_speaker_phrase_timeout", None) self.view_variable.CALLBACK_SET_SPEAKER_MAX_PHRASES = config_window_registers.get("callback_set_speaker_max_phrases", None) + # Transcription Tab (Internal AI Model) + self.view_variable.CALLBACK_SET_USE_WHISPER_FEATURE = config_window_registers.get("callback_set_use_whisper_feature", None) + self.view_variable.CALLBACK_SET_WHISPER_WEIGHT_TYPE = config_window_registers.get("callback_set_whisper_weight_type", None) + + # Others Tab self.view_variable.CALLBACK_SET_ENABLE_AUTO_CLEAR_MESSAGE_BOX = config_window_registers.get("callback_set_enable_auto_clear_chatbox", None) self.view_variable.CALLBACK_SET_ENABLE_SEND_ONLY_TRANSLATED_MESSAGES = config_window_registers.get("callback_set_send_only_translated_messages", None) @@ -684,6 +704,11 @@ class View(): ) self.replaceMicThresholdCheckButton_Disabled() + if config.USE_WHISPER_FEATURE is True: + self.openWhisperWeightTypeWidget() + else: + self.closeWhisperWeightTypeWidget() + if config.ENABLE_SPEAKER2CHATBOX is False: vrct_gui._changeConfigWindowWidgetsStatus( status="disabled", @@ -925,6 +950,17 @@ class View(): vrct_gui.update() vrct_gui.config_window.lift() + @staticmethod + def getSelectableWhisperWeightTypeDict(): + return { + config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["tiny"]: i18n.t("config_window.whisper_weight_type.tiny", capacity="74.5MB"), + config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["base"]: i18n.t("config_window.whisper_weight_type.base", capacity="141MB"), + config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["small"]: i18n.t("config_window.whisper_weight_type.small", capacity="463MB"), + config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["medium"]: i18n.t("config_window.whisper_weight_type.medium", capacity="1.42GB"), + config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["large-v1"]: i18n.t("config_window.whisper_weight_type.large_v1", capacity="2.87GB"), + config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["large-v2"]: i18n.t("config_window.whisper_weight_type.large_v2", capacity="2.87GB"), + config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["large-v3"]: i18n.t("config_window.whisper_weight_type.large_v3", capacity="2.87GB"), + } # Open Webpage Functions def openWebPage_Booth(self): @@ -1020,7 +1056,8 @@ class View(): self.restart_required_configs_pre_data.ui_scaling == config.UI_SCALING and self.restart_required_configs_pre_data.font_family == config.FONT_FAMILY and self.restart_required_configs_pre_data.ui_language == config.UI_LANGUAGE and - self.restart_required_configs_pre_data.is_reset_button_displayed_for_translation == config.IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION + self.restart_required_configs_pre_data.is_reset_button_displayed_for_translation == config.IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION and + self.restart_required_configs_pre_data.is_reset_button_displayed_for_whisper == config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER ) if locale is None: @@ -1075,7 +1112,7 @@ class View(): self.view_variable.VAR_CTRANSLATE2_WEIGHT_TYPE.set(self.getSelectableCtranslate2WeightTypeDict()[selected_weight_type]) def setLatestCTranslate2WeightType(self): - selected_weight_type = self.getSelectableCtranslate2WeightTypeDict()[config.WEIGHT_TYPE] + selected_weight_type = self.getSelectableCtranslate2WeightTypeDict()[config.CTRANSLATE2_WEIGHT_TYPE] self.view_variable.VAR_CTRANSLATE2_WEIGHT_TYPE.set(selected_weight_type) @@ -1088,6 +1125,23 @@ class View(): vrct_gui.config_window.sb__ctranslate2_weight_type.grid_remove() + def openWhisperWeightTypeWidget(self): + vrct_gui.config_window.sb__use_whisper_feature.grid() + vrct_gui.config_window.sb__whisper_weight_type.grid() + + def closeWhisperWeightTypeWidget(self): + vrct_gui.config_window.sb__use_whisper_feature.grid() + vrct_gui.config_window.sb__whisper_weight_type.grid_remove() + + + def updateSelectedWhisperWeightType(self, selected_weight_type:str): + self.view_variable.VAR_WHISPER_WEIGHT_TYPE.set(self.getSelectableWhisperWeightTypeDict()[selected_weight_type]) + + def setLatestCTranslate2WeightType(self): + selected_weight_type = self.getSelectableWhisperWeightTypeDict()[config.WHISPER_WEIGHT_TYPE] + self.view_variable.VAR_WHISPER_WEIGHT_TYPE.set(selected_weight_type) + + def openMicEnergyThresholdWidget(self): self.view_variable.VAR_LABEL_MIC_DYNAMIC_ENERGY_THRESHOLD.set(i18n.t("config_window.mic_dynamic_energy_threshold.label_for_manual")) self.view_variable.VAR_DESC_MIC_DYNAMIC_ENERGY_THRESHOLD.set(i18n.t("config_window.mic_dynamic_energy_threshold.desc_for_manual")) diff --git a/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/createSideMenuAndSettingsBoxContainers.py b/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/createSideMenuAndSettingsBoxContainers.py index 30af50de..49272afc 100644 --- a/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/createSideMenuAndSettingsBoxContainers.py +++ b/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/createSideMenuAndSettingsBoxContainers.py @@ -7,7 +7,7 @@ from ._createSettingBoxContainer import _createSettingBoxContainer from .setting_box_containers.setting_box_appearance import createSettingBox_Appearance -from .setting_box_containers.setting_box_transcription import createSettingBox_Mic, createSettingBox_Speaker +from .setting_box_containers.setting_box_transcription import createSettingBox_Mic, createSettingBox_Speaker, createSettingBox_InternalModel from .setting_box_containers.setting_box_others import createSettingBox_Others, createSettingBox_Others_SendMessageFormats, createSettingBox_Others_ReceivedMessageFormats, createSettingBox_Others_Additional from .setting_box_containers.setting_box_advanced_settings import createSettingBox_AdvancedSettings from .setting_box_containers.setting_box_translation import createSettingBox_Translation @@ -94,6 +94,10 @@ def createSideMenuAndSettingsBoxContainers(config_window, settings, view_variabl "var_section_title": view_variable.VAR_SECOND_TITLE_TRANSCRIPTION_SPEAKER, "setting_box": createSettingBox_Speaker }, + { + "var_section_title": view_variable.VAR_SECOND_TITLE_TRANSCRIPTION_INTERNAL_MODEL, + "setting_box": createSettingBox_InternalModel + }, ] }, }, diff --git a/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/__init__.py b/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/__init__.py index 5383094e..b06ff822 100644 --- a/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/__init__.py +++ b/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/__init__.py @@ -1,2 +1,3 @@ from .createSettingBox_Mic import createSettingBox_Mic -from .createSettingBox_Speaker import createSettingBox_Speaker \ No newline at end of file +from .createSettingBox_Speaker import createSettingBox_Speaker +from .createSettingBox_InternalModel import createSettingBox_InternalModel \ No newline at end of file diff --git a/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/createSettingBox_InternalModel.py b/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/createSettingBox_InternalModel.py new file mode 100644 index 00000000..0a6b3e69 --- /dev/null +++ b/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/createSettingBox_InternalModel.py @@ -0,0 +1,37 @@ +from utils import callFunctionIfCallable + +from .._SettingBoxGenerator import _SettingBoxGenerator + +def createSettingBox_InternalModel(setting_box_wrapper, config_window, settings, view_variable): + sbg = _SettingBoxGenerator(setting_box_wrapper, config_window, settings, view_variable) + createSettingBoxSwitch = sbg.createSettingBoxSwitch + createSettingBoxDropdownMenu = sbg.createSettingBoxDropdownMenu + + def switchUseWhisperFeatureCallback(switch_widget): + callFunctionIfCallable(view_variable.CALLBACK_SET_USE_WHISPER_FEATURE, switch_widget.get()) + + def optionmenuWhisperWeightTypeCallback(value): + callFunctionIfCallable(view_variable.CALLBACK_SET_WHISPER_WEIGHT_TYPE, value) + + + row=0 + config_window.sb__use_whisper_feature = createSettingBoxSwitch( + for_var_label_text=view_variable.VAR_LABEL_USE_WHISPER_FEATURE, + for_var_desc_text=view_variable.VAR_DESC_USE_WHISPER_FEATURE, + switch_attr_name="sb__switch_use_whisper_feature", + command=lambda: switchUseWhisperFeatureCallback(config_window.sb__switch_use_whisper_feature), + variable=view_variable.VAR_USE_WHISPER_FEATURE + ) + config_window.sb__use_whisper_feature.grid(row=row, pady=0) + row+=1 + + config_window.sb__whisper_weight_type = createSettingBoxDropdownMenu( + for_var_label_text=view_variable.VAR_LABEL_WHISPER_WEIGHT_TYPE, + for_var_desc_text=view_variable.VAR_DESC_WHISPER_WEIGHT_TYPE, + optionmenu_attr_name="sb__optionmenu_whisper_weight_type", + dropdown_menu_values=view_variable.DICT_WHISPER_WEIGHT_TYPE, + command=lambda value: optionmenuWhisperWeightTypeCallback(value), + variable=view_variable.VAR_WHISPER_WEIGHT_TYPE, + ) + config_window.sb__whisper_weight_type.grid(row=row, pady=0) + row+=1 \ No newline at end of file