Merge branch 'faster-whisper' into develop

2024-02-07 22:50:26 +09:00
parent a22e4b9b91 61a6eb792b
commit d4164d7c58
17 changed files with 758 additions and 142 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,7 @@ VRCT.spec
 *.pyc
 logs/
 .venv/
-weight/
+weights/
 .vscode
 error.log
 *.exe
--- a/config.py
+++ b/config.py
@@ -98,6 +98,10 @@ class Config:
    def SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT(self):
        return self._SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT
    @property
    def SELECTABLE_WHISPER_WEIGHT_TYPE_DICT(self):
        return self._SELECTABLE_WHISPER_WEIGHT_TYPE_DICT
    @property
    def MAX_MIC_ENERGY_THRESHOLD(self):
        return self._MAX_MIC_ENERGY_THRESHOLD
@@ -224,6 +228,15 @@ class Config:
        if isinstance(value, bool):
            self._IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION = value
    @property
    def IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER(self):
        return self._IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER
    @IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER.setter
    def IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER(self, value):
        if isinstance(value, bool):
            self._IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = value
    # Save Json Data
    ## Main Window
    @property
@@ -281,6 +294,17 @@ class Config:
            self._SELECTED_TAB_TARGET_LANGUAGES = value
            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
    @property
    @json_serializable('SELECTED_TRANSCRIPTION_ENGINE')
    def SELECTED_TRANSCRIPTION_ENGINE(self):
        return self._SELECTED_TRANSCRIPTION_ENGINE
    @SELECTED_TRANSCRIPTION_ENGINE.setter
    def SELECTED_TRANSCRIPTION_ENGINE(self, value):
        if isinstance(value, str):
            self._SELECTED_TRANSCRIPTION_ENGINE = value
            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
    @property
    @json_serializable('IS_MAIN_WINDOW_SIDEBAR_COMPACT_MODE')
    def IS_MAIN_WINDOW_SIDEBAR_COMPACT_MODE(self):
@@ -587,15 +611,37 @@ class Config:
            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
    @property
-    @json_serializable('WEIGHT_TYPE')
+    @json_serializable('USE_WHISPER_FEATURE')
-    def WEIGHT_TYPE(self):
+    def USE_WHISPER_FEATURE(self):
-        return self._WEIGHT_TYPE
+        return self._USE_WHISPER_FEATURE
-    @WEIGHT_TYPE.setter
+    @USE_WHISPER_FEATURE.setter
-    def WEIGHT_TYPE(self, value):
+    def USE_WHISPER_FEATURE(self, value):
        if isinstance(value, bool):
            self._USE_WHISPER_FEATURE = value
            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
    @property
    @json_serializable('CTRANSLATE2_WEIGHT_TYPE')
    def CTRANSLATE2_WEIGHT_TYPE(self):
        return self._CTRANSLATE2_WEIGHT_TYPE
    @CTRANSLATE2_WEIGHT_TYPE.setter
    def CTRANSLATE2_WEIGHT_TYPE(self, value):
        # if isinstance(value, str) and value in self.SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT:
        if isinstance(value, str):
-            self._WEIGHT_TYPE = value
+            self._CTRANSLATE2_WEIGHT_TYPE = value
            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
    @property
    @json_serializable('WHISPER_WEIGHT_TYPE')
    def WHISPER_WEIGHT_TYPE(self):
        return self._WHISPER_WEIGHT_TYPE
    @WHISPER_WEIGHT_TYPE.setter
    def WHISPER_WEIGHT_TYPE(self, value):
        if isinstance(value, str):
            self._WHISPER_WEIGHT_TYPE = value
            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
    @property
@@ -774,6 +820,18 @@ class Config:
            "Small": "Small",
            "Large": "Large",
        }
        self._SELECTABLE_WHISPER_WEIGHT_TYPE_DICT = {
            # {Save json str}: {i18n_placeholder} pairs
            "tiny": "tiny",
            "base": "base",
            "small": "small",
            "medium": "medium",
            "large-v1": "large-v1",
            "large-v2": "large-v2",
            "large-v3": "large-v3",
        }
        self._MAX_MIC_ENERGY_THRESHOLD = 2000
        self._MAX_SPEAKER_ENERGY_THRESHOLD = 4000
@@ -791,6 +849,7 @@ class Config:
        self._SENT_MESSAGES_LOG = []
        self._CURRENT_SENT_MESSAGES_LOG_INDEX = 0
        self._IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION = False
        self._IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = False
        # Save Json Data
        ## Main Window
@@ -815,6 +874,7 @@ class Config:
            "2":"English\n(United States)",
            "3":"English\n(United States)",
        }
        self._SELECTED_TRANSCRIPTION_ENGINE = "Google"
        self._IS_MAIN_WINDOW_SIDEBAR_COMPACT_MODE = False
        ## Config Window
@@ -851,7 +911,9 @@ class Config:
            "DeepL_API": None,
        }
        self._USE_TRANSLATION_FEATURE = True
-        self._WEIGHT_TYPE = "Small"
+        self._CTRANSLATE2_WEIGHT_TYPE = "Small"
        self._USE_WHISPER_FEATURE = False
        self._WHISPER_WEIGHT_TYPE = "base"
        self._SEND_MESSAGE_FORMAT = "[message]"
        self._SEND_MESSAGE_FORMAT_WITH_T = "[message]([translation])"
        self._RECEIVED_MESSAGE_FORMAT = "[message]"
--- a/controller.py
+++ b/controller.py
@@ -530,8 +530,8 @@ def callbackSetUseTranslationFeature(value):
 def callbackSetCtranslate2WeightType(value):
    print("callbackSetCtranslate2WeightType", value)
-    config.WEIGHT_TYPE = str(value)
+    config.CTRANSLATE2_WEIGHT_TYPE = str(value)
-    view.updateSelectedCtranslate2WeightType(config.WEIGHT_TYPE)
+    view.updateSelectedCtranslate2WeightType(config.CTRANSLATE2_WEIGHT_TYPE)
    view.setWidgetsStatus_changeWeightType_Pending()
    if model.checkCTranslatorCTranslate2ModelWeight():
        config.IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION = False
@@ -792,6 +792,33 @@ def callbackSetSpeakerMaxPhrases(value):
    except Exception:
        view.showErrorMessage_SpeakerMaxPhrases()
 # Transcription (Internal AI Model)
 def callbackSetUserWhisperFeature(value):
    print("callbackSetUserWhisperFeature", value)
    config.USE_WHISPER_FEATURE = value
    if config.USE_WHISPER_FEATURE is True:
        view.openWhisperWeightTypeWidget()
        if model.checkTranscriptionWhisperModelWeight() is True:
            config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = False
            config.SELECTED_TRANSCRIPTION_ENGINE = "Whisper"
        else:
            config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = True
            config.SELECTED_TRANSCRIPTION_ENGINE = "Google"
    else:
        view.closeWhisperWeightTypeWidget()
    view.showRestartButtonIfRequired()
 def callbackSetWhisperWeightType(value):
    print("callbackSetWhisperWeightType", value)
    config.WHISPER_WEIGHT_TYPE = str(value)
    view.updateSelectedWhisperWeightType(config.WHISPER_WEIGHT_TYPE)
    if model.checkTranscriptionWhisperModelWeight() is True:
        config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = False
        config.SELECTED_TRANSCRIPTION_ENGINE = "Whisper"
    else:
        config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = True
        config.SELECTED_TRANSCRIPTION_ENGINE = "Google"
    view.showRestartButtonIfRequired()
 # Others Tab
 def callbackSetEnableAutoClearMessageBox(value):
@@ -923,6 +950,12 @@ def createMainWindow(splash):
    # set Translation Engine
    updateTranslationEngineAndEngineList()
    # set Transcription Engine
    if config.USE_WHISPER_FEATURE is True:
        config.SELECTED_TRANSCRIPTION_ENGINE = "Whisper"
    else:
        config.SELECTED_TRANSCRIPTION_ENGINE = "Google"
    # set word filter
    model.addKeywords()
@@ -1020,6 +1053,10 @@ def createMainWindow(splash):
            "callback_set_speaker_phrase_timeout": callbackSetSpeakerPhraseTimeout,
            "callback_set_speaker_max_phrases": callbackSetSpeakerMaxPhrases,
            # Transcription Tab (Internal AI Model)
            "callback_set_use_whisper_feature": callbackSetUserWhisperFeature,
            "callback_set_whisper_weight_type": callbackSetWhisperWeightType,
            # Others Tab
            "callback_set_enable_auto_clear_chatbox": callbackSetEnableAutoClearMessageBox,
            "callback_set_send_only_translated_messages": callbackSetEnableSendOnlyTranslatedMessages,
--- a/install.bat
+++ b/install.bat
@@ -1,4 +1,2 @@
 python.exe -m pip install --upgrade pip
 pip install -r requirements.txt
 pip install git+https://github.com/misyaguziya/translators
 pip install git+https://github.com/misyaguziya/custom_speech_recognition
--- a/locales/en.yml
+++ b/locales/en.yml
@@ -79,6 +79,7 @@ config_window:
    transcription: Transcription
    transcription_mic: Mic
    transcription_speaker: Speaker
    transcription_internal_model: Internal Model
    others: Others
    others_send_message_formats: Message Formats (Send)
    others_received_message_formats: Message Formats (Received)
@@ -125,6 +126,21 @@ config_window:
    small: "Basic model (%{capacity})"
    large: "High accuracy model (%{capacity})"
  use_whisper_feature:
    label: Use Whisper Feature
    desc: Description
  whisper_weight_type:
    label: Select Whisper Model
    desc: Description
    tiny: "tiny model (%{capacity})"
    base: "base model (%{capacity})"
    small: "small model (%{capacity})"
    medium: "medium model (%{capacity})"
    large_v1: "large_v1 model (%{capacity})"
    large_v2: "large_v2 model (%{capacity})"
    large_v3: "large_v3 model (%{capacity})"
  deepl_auth_key:
    label: DeepL Auth Key
    desc: Please select %{translator} on the main screen with DeepL_API when using. ※Some languages may not be supported.
--- a/main.py
+++ b/main.py
@@ -8,11 +8,16 @@ if __name__ == "__main__":
        splash.showSplash()
        from config import config
-        from models.translation.utils import downloadCTranslate2Weight
+        from models.translation.translation_utils import downloadCTranslate2Weight
        if config.USE_TRANSLATION_FEATURE is True:
-            downloadCTranslate2Weight(config.PATH_LOCAL, config.WEIGHT_TYPE, splash.updateDownloadProgress)
+            downloadCTranslate2Weight(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE, splash.updateDownloadProgress)
        from models.transcription.transcription_whisper import downloadWhisperWeight
        if config.USE_WHISPER_FEATURE is True:
            downloadWhisperWeight(config.PATH_LOCAL, config.WHISPER_WEIGHT_TYPE, splash.updateDownloadProgress)
        splash.toProgress(0)
        import controller
        controller.createMainWindow(splash)
        splash.destroySplash()
--- a/model.py
+++ b/model.py
@@ -23,7 +23,8 @@ from models.transcription.transcription_transcriber import AudioTranscriber
 from models.xsoverlay.notification import xsoverlayForVRCT
 from models.translation.translation_languages import translation_lang
 from models.transcription.transcription_languages import transcription_lang
-from models.translation.utils import checkCTranslate2Weight
+from models.translation.translation_utils import checkCTranslate2Weight
 from models.transcription.transcription_whisper import checkWhisperWeight
 from config import config
 class threadFnc(Thread):
@@ -65,14 +66,17 @@ class Model:
        self.speaker_energy_plot_progressbar = None
        self.translator = Translator()
        if config.USE_TRANSLATION_FEATURE is True:
-            self.translator.changeCTranslate2Model(config.PATH_LOCAL, config.WEIGHT_TYPE)
+            self.translator.changeCTranslate2Model(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE)
        self.keyword_processor = KeywordProcessor()
    def checkCTranslatorCTranslate2ModelWeight(self):
-        return checkCTranslate2Weight(config.PATH_LOCAL, config.WEIGHT_TYPE)
+        return checkCTranslate2Weight(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE)
    def changeTranslatorCTranslate2Model(self):
-        self.translator.changeCTranslate2Model(config.PATH_LOCAL, config.WEIGHT_TYPE)
+        self.translator.changeCTranslate2Model(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE)
    def checkTranscriptionWhisperModelWeight(self):
        return checkWhisperWeight(config.PATH_LOCAL, config.WHISPER_WEIGHT_TYPE)
    def resetKeywordProcessor(self):
        del self.keyword_processor
@@ -335,9 +339,11 @@ class Model:
            source=self.mic_audio_recorder.source,
            phrase_timeout=phase_timeout,
            max_phrases=config.INPUT_MIC_MAX_PHRASES,
            root=config.PATH_LOCAL,
            whisper_weight_type=config.WHISPER_WEIGHT_TYPE,
        )
        def sendMicTranscript():
-            mic_transcriber.transcribeAudioQueue(mic_audio_queue, config.SOURCE_LANGUAGE, config.SOURCE_COUNTRY)
+            mic_transcriber.transcribeAudioQueue(mic_audio_queue, config.SOURCE_LANGUAGE, config.SOURCE_COUNTRY, config.SELECTED_TRANSCRIPTION_ENGINE)
            message = mic_transcriber.getTranscript()
            try:
                fnc(message)
@@ -416,9 +422,11 @@ class Model:
            source=self.speaker_audio_recorder.source,
            phrase_timeout=phase_timeout,
            max_phrases=config.INPUT_SPEAKER_MAX_PHRASES,
            root=config.PATH_LOCAL,
            whisper_weight_type=config.WHISPER_WEIGHT_TYPE,
        )
        def sendSpeakerTranscript():
-            speaker_transcriber.transcribeAudioQueue(speaker_audio_queue, config.TARGET_LANGUAGE, config.TARGET_COUNTRY)
+            speaker_transcriber.transcribeAudioQueue(speaker_audio_queue, config.TARGET_LANGUAGE, config.TARGET_COUNTRY, config.SELECTED_TRANSCRIPTION_ENGINE)
            message = speaker_transcriber.getTranscript()
            try:
                fnc(message)
--- a/models/transcription/transcription_languages.py
+++ b/models/transcription/transcription_languages.py
@@ -1,177 +1,438 @@
 transcription_lang = {
    "Afrikaans":{
-        "South Africa":"af-ZA",
+        "South Africa":{
            "Google": "af-ZA",
            "Whisper": "af",
        },
    },
    "Arabic":{
-        "Algeria":"ar-DZ",
+        "Algeria":{
-        "Bahrain":"ar-BH",
+            "Google": "ar-DZ",
-        "Egypt":"ar-EG",
+            "Whisper": "ar",
-        "Israel":"ar-IL",
+        },
-        "Iraq":"ar-IQ",
+        "Bahrain":{
-        "Jordan":"ar-JO",
+            "Google": "ar-BH",
-        "Kuwait":"ar-KW",
+            "Whisper": "ar",
-        "Lebanon":"ar-LB",
+        },
-        "Morocco":"ar-MA",
+        "Egypt":{
-        "Oman":"ar-OM",
+            "Google": "ar-EG",
-        "State of Palestine":"ar-PS",
+            "Whisper": "ar",
-        "Qatar":"ar-QA",
+        },
-        "Saudi Arabia":"ar-SA",
+        "Israel":{
-        "Tunisia":"ar-TN",
+            "Google": "ar-IL",
-        "United Arab Emirates":"ar-AE",
+            "Whisper": "ar",
        },
        "Iraq":{
            "Google": "ar-IQ",
            "Whisper": "ar",
        },
        "Jordan":{
            "Google": "ar-JO",
            "Whisper": "ar",
        },
        "Kuwait":{
            "Google": "ar-KW",
            "Whisper": "ar",
        },
        "Lebanon":{
            "Google": "ar-LB",
            "Whisper": "ar",
        },
        "Morocco":{
            "Google": "ar-MA",
            "Whisper": "ar",
        },
        "Oman":{
            "Google": "ar-OM",
            "Whisper": "ar",
        },
        "State of Palestine":{
            "Google": "ar-PS",
            "Whisper": "ar",
        },
        "Qatar":{
            "Google": "ar-QA",
            "Whisper": "ar",
        },
        "Saudi Arabia":{
            "Google": "ar-SA",
            "Whisper": "ar",
        },
        "Tunisia":{
            "Google": "ar-TN",
            "Whisper": "ar",
        },
        "United Arab Emirates":{
            "Google": "ar-AE",
            "Whisper": "ar",
        },
    },
    "Basque":{
-        "Spain":"eu-ES",
+        "Spain":{
            "Google": "eu-ES",
            "Whisper": "eu",
        },
    },
    "Bulgarian":{
-        "Bulgaria":"bg-BG",
+        "Bulgaria":{
            "Google": "bg-BG",
            "Whisper": "bg",
        },
    },
    "Catalan":{
-        "Spain":"ca-ES",
+        "Spain":{
            "Google": "ca-ES",
            "Whisper": "ca",
        },
    },
    "Chinese":{
-        "Mandarin (Simplified, China)":"cmn-Hans-CN",
+        "Mandarin (Simplified, China)":{
-        "Mandarin (Simplified, Hong Kong)":"cmn-Hans-HK",
+            "Google": "cmn-Hans-CN",
-        "Mandarin (Traditional, Taiwan)":"cmn-Hant-TW",
+            "Whisper": "zh",
-        "Cantonese (Traditional Hong Kong)":"yue-Hant-HK",
+        },
        "Mandarin (Simplified, Hong Kong)":{
            "Google": "cmn-Hans-HK",
            "Whisper": "zh",
        },
        "Mandarin (Traditional, Taiwan)":{
            "Google": "cmn-Hant-TW",
            "Whisper": "zh",
        },
        "Cantonese (Traditional Hong Kong)":{
            "Google": "yue-Hant-HK",
            "Whisper": "yue",
        },
    },
    "Croatian":{
-        "Croatia":"hr-HR",
+        "Croatia":{
            "Google": "hr-HR",
            "Whisper": "hr",
        },
    },
    "Czech":{
-        "Czech Republic":"cs-CZ",
+        "Czech Republic":{
            "Google": "cs-CZ",
            "Whisper": "cs",
        },
    },
    "Danish":{
-        "Denmark":"da-DK",
+        "Denmark":{
            "Google": "da-DK",
            "Whisper": "da",
        },
    },
    "Dutch":{
-        "Netherlands":"nl-NL",
+        "Netherlands":{
            "Google": "nl-NL",
            "Whisper": "nl",
        },
    },
    "English": {
-        "United States":"en-US",
+        "United States":{
-        "United Kingdom":"en-GB",
+            "Google": "en-US",
-        "Australia":"en-AU",
+            "Whisper": "en",
-        "Canada":"en-CA",
+        },
-        "India":"en-IN",
+        "United Kingdom":{
-        "Ireland":"en-IE",
+            "Google": "en-GB",
-        "New Zealand":"en-NZ",
+            "Whisper": "en",
-        "Philippines":"en-PH",
+        },
-        "South Africa":"en-ZA",
+        "Australia":{
            "Google": "en-AU",
            "Whisper": "en",
        },
        "Canada":{
            "Google": "en-CA",
            "Whisper": "en",
        },
        "India":{
            "Google": "en-IN",
            "Whisper": "en",
        },
        "Ireland":{
            "Google": "en-IE",
            "Whisper": "en",
        },
        "New Zealand":{
            "Google": "en-NZ",
            "Whisper": "en",
        },
        "Philippines":{
            "Google": "en-PH",
            "Whisper": "en",
        },
        "South Africa":{
            "Google": "en-ZA",
            "Whisper": "en",
        },
    },
    "Filipino":{
-        "Philippines":"fil-PH",
+        "Philippines":{
            "Google": "fil-PH",
            "Whisper": "tl",
        },
    },
    "Finnish":{
-        "Finland":"fi-FI",
+        "Finland":{
            "Google": "fi-FI",
            "Whisper": "fi",
        },
    },
    "French":{
-        "France":"fr-FR",
+        "France":{
            "Google": "fr-FR",
            "Whisper": "fr",
        },
    },
    "Galician":{
-        "Spain":"gl-ES",
+        "Spain":{
            "Google": "gl-ES",
            "Whisper": "gl",
        },
    },
    "German":{
-        "Germany":"de-DE",
+        "Germany":{
            "Google": "de-DE",
            "Whisper": "de",
        },
    },
    "Greek":{
-        "Greece":"el-GR",
+        "Greece":{
            "Google": "el-GR",
            "Whisper": "el",
        },
    },
    "Hebrew":{
-        "Israel":"he-IL",
+        "Israel":{
            "Google": "he-IL",
            "Whisper": "he",
        },
    },
    "Hindi": {
-        "India":"hi-IN",
+        "India":{
            "Google": "hi-IN",
            "Whisper": "hi",
        },
    },
    "Hungarian":{
-        "Hungary":"hu-HU",
+        "Hungary":{
            "Google": "hu-HU",
            "Whisper": "hu",
        },
    },
    "Indonesian":{
-        "Indonesia":"id-ID",
+        "Indonesia":{
            "Google": "id-ID",
            "Whisper": "id",
        },
    },
    "Icelandic":{
-        "Iceland":"is-IS",
+        "Iceland":{
            "Google": "is-IS",
            "Whisper": "is",
        },
    },
    "Italian":{
-        "Italy":"it-IT",
+        "Italy":{
-        "Switzerland":"it-CH",
+            "Google": "it-IT",
            "Whisper": "it",
        },
        "Switzerland":{
            "Google": "it-CH",
            "Whisper": "it",
        },
    },
    "Japanese":{
-        "Japan":"ja-JP",
+        "Japan":{
            "Google": "ja-JP",
            "Whisper": "ja",
        },
    },
    "Korean":{
-        "South Korea":"ko-KR",
+        "South Korea":{
            "Google": "ko-KR",
            "Whisper": "ko",
        },
    },
    "Lithuanian":{
-        "Lithuania":"lt-LT",
+        "Lithuania":{
            "Google": "lt-LT",
            "Whisper": "lt",
        },
    },
    "Malay":{
-        "Malaysia":"ms-MY",
+        "Malaysia":{
            "Google": "ms-MY",
            "Whisper": "ms",
        },
    },
    "Norwegian":{
-        "Norway":"nb-NO",
+        "Norway":{
            "Google": "nb-NO",
            "Whisper": "no",
        },
    },
    "Persian":{
-        "Iran":"fa-IR",
+        "Iran":{
            "Google": "fa-IR",
            "Whisper": "fa",
        },
    },
    "Polish":{
-        "Poland":"pl-PL",
+        "Poland":{
            "Google": "pl-PL",
            "Whisper": "pl",
        },
    },
    "Portuguese":{
-        "Brazil":"pt-BR",
+        "Brazil":{
-        "Portugal":"pt-PT",
+            "Google": "pt-BR",
            "Whisper": "pt",
        },
        "Portugal":{
            "Google": "pt-PT",
            "Whisper": "pt",
        },
    },
    "Romanian":{
-        "Romania":"ro-RO",
+        "Romania":{
            "Google": "ro-RO",
            "Whisper": "ro",
        },
    },
    "Russian":{
-        "Russia":"ru-RU",
+        "Russia":{
            "Google": "ru-RU",
            "Whisper": "ru",
        },
    },
    "Serbian":{
-        "Serbia":"sr-RS",
+        "Serbia":{
            "Google": "sr-RS",
            "Whisper": "sr",
        },
    },
    "Slovak":{
-        "Slovakia":"sk-SK",
+        "Slovakia":{
            "Google": "sk-SK",
            "Whisper": "sk",
        },
    },
    "Slovenian":{
-        "Slovenia":"sl-SI",
+        "Slovenia":{
            "Google": "sl-SI",
            "Whisper": "sl",
        },
    },
    "Spanish":{
-        "Argentina":"es-AR",
+        "Argentina":{
-        "Bolivia":"es-BO",
+            "Google": "es-AR",
-        "Chile":"es-CL",
+            "Whisper": "es",
-        "Colombia":"es-CO",
+        },
-        "Costa Rica":"es-CR",
+        "Bolivia":{
-        "Dominican Republic":"es-DO",
+            "Google": "es-BO",
-        "Ecuador":"es-EC",
+            "Whisper": "es",
-        "El Salvador":"es-SV",
+        },
-        "Guatemala":"es-GT",
+        "Chile":{
-        "Honduras":"es-HN",
+            "Google": "es-CL",
-        "Mexico":"es-MX",
+            "Whisper": "es",
-        "Nicaragua":"es-NI",
+        },
-        "Panama":"es-PA",
+        "Colombia":{
-        "Paraguay":"es-PY",
+            "Google": "es-CO",
-        "Peru":"es-PE",
+            "Whisper": "es",
-        "Puerto Rico":"es-PR",
+        },
-        "Spain":"es-ES",
+        "Costa Rica":{
-        "Uruguay":"es-UY",
+            "Google": "es-CR",
-        "United States":"es-US",
+            "Whisper": "es",
-        "Venezuela":"es-VE",
+        },
        "Dominican Republic":{
            "Google": "es-DO",
            "Whisper": "es",
        },
        "Ecuador":{
            "Google": "es-EC",
            "Whisper": "es",
        },
        "El Salvador":{
            "Google": "es-SV",
            "Whisper": "es",
        },
        "Guatemala":{
            "Google": "es-GT",
            "Whisper": "es",
        },
        "Honduras":{
            "Google": "es-HN",
            "Whisper": "es",
        },
        "Mexico":{
            "Google": "es-MX",
            "Whisper": "es",
        },
        "Nicaragua":{
            "Google": "es-NI",
            "Whisper": "es",
        },
        "Panama":{
            "Google": "es-PA",
            "Whisper": "es",
        },
        "Paraguay":{
            "Google": "es-PY",
            "Whisper": "es",
        },
        "Peru":{
            "Google": "es-PE",
            "Whisper": "es",
        },
        "Puerto Rico":{
            "Google": "es-PR",
            "Whisper": "es",
        },
        "Spain":{
            "Google": "es-ES",
            "Whisper": "es",
        },
        "Uruguay":{
            "Google": "es-UY",
            "Whisper": "es",
        },
        "United States":{
            "Google": "es-US",
            "Whisper": "es",
        },
        "Venezuela":{
            "Google": "es-VE",
            "Whisper": "es",
        },
    },
    "Swedish":{
-        "Sweden":"sv-SE",
+        "Sweden":{
            "Google": "sv-SE",
            "Whisper": "sv",
        },
    },
    "Thai":{
-        "Thailand":"th-TH",
+        "Thailand":{
            "Google": "th-TH",
            "Whisper": "th",
        },
    },
    "Turkish":{
-        "Turkey":"tr-TR",
+        "Turkey":{
            "Google": "tr-TR",
            "Whisper": "tr",
        },
    },
    "Ukrainian":{
-        "Ukraine":"uk-UA",
+        "Ukraine":{
            "Google": "uk-UA",
            "Whisper": "uk",
        },
    },
    "Vietnamese":{
-        "Vietnam":"vi-VN",
+        "Vietnam":{
-    },
+            "Google": "vi-VN",
-    "Zulu":{
+            "Whisper": "vi",
-        "South Africa":"zu-ZA"
+        },
    },
 }
--- a/models/transcription/transcription_transcriber.py
+++ b/models/transcription/transcription_transcriber.py
@@ -5,12 +5,16 @@ from speech_recognition import Recognizer, AudioData, AudioFile
 from datetime import timedelta
 from pyaudiowpatch import get_sample_size, paInt16
 from .transcription_languages import transcription_lang
 from .transcription_whisper import getWhisperModel, checkWhisperWeight
 import torch
 import numpy as np
 PHRASE_TIMEOUT = 3
 MAX_PHRASES = 10
 class AudioTranscriber:
-    def __init__(self, speaker, source, phrase_timeout, max_phrases):
+    def __init__(self, speaker, source, phrase_timeout, max_phrases, root=None, whisper_weight_type=None, ):
        self.speaker = speaker
        self.phrase_timeout = phrase_timeout
        self.max_phrases = max_phrases
@@ -26,23 +30,51 @@ class AudioTranscriber:
                "new_phrase": True,
                "process_data_func": self.processSpeakerData if speaker else self.processSpeakerData
        }
        if whisper_weight_type is not None and root is not None and checkWhisperWeight(root, whisper_weight_type) is True:
            self.whisper_model = getWhisperModel(root, whisper_weight_type)
        else:
            self.whisper_model = None
-    def transcribeAudioQueue(self, audio_queue, language, country):
+    def transcribeAudioQueue(self, audio_queue, language, country, transcription_engine):
        # while True:
        audio, time_spoken = audio_queue.get()
        self.updateLastSampleAndPhraseStatus(audio, time_spoken)
        text = ''
        try:
-            # fd, path = tempfile.mkstemp(suffix=".wav")
+            # Whisperが使用できない場合はGoogle Speech-to-Textを使用する
-            # os.close(fd)
+            if transcription_engine == "Whisper":
                if self.whisper_model is None:
                    transcription_engine = "Google"
            audio_data = self.audio_sources["process_data_func"]()
-            text = self.audio_recognizer.recognize_google(audio_data, language=transcription_lang[language][country])
+            match transcription_engine:
                case "Google":
                    text = self.audio_recognizer.recognize_google(audio_data, language=transcription_lang[language][country][transcription_engine])
                case "Whisper":
                    audio_data = np.frombuffer(audio_data.get_raw_data(convert_rate=16000, convert_width=2), np.int16).flatten().astype(np.float32) / 32768.0
                    if isinstance(audio_data, torch.Tensor):
                        audio_data = audio_data.detach().numpy()
                    segments, _ = self.whisper_model.transcribe(
                        audio_data,
                        beam_size=5,
                        temperature=0.0,
                        log_prob_threshold=-0.8,
                        no_speech_threshold=0.6,
                        language=transcription_lang[language][country][transcription_engine],
                        word_timestamps=False,
                        without_timestamps=True,
                        task="transcribe",
                        vad_filter=False,
                        )
                    for s in segments:
                        if s.avg_logprob < -0.8 or s.no_speech_prob > 0.6:
                            continue
                        text += s.text
        except Exception:
            pass
        finally:
            pass
            # os.unlink(path)
        if text != '':
            self.updateTranscript(text)
--- a/models/transcription/transcription_whisper.py
+++ b/models/transcription/transcription_whisper.py
@@ -0,0 +1,98 @@
 from os import path as os_path, makedirs as os_makedirs
 from requests import get as requests_get
 from typing import Callable
 import huggingface_hub
 from faster_whisper import WhisperModel
 import logging
 logger = logging.getLogger('faster_whisper')
 logger.setLevel(logging.CRITICAL)
 _MODELS = {
    "tiny": "Systran/faster-whisper-tiny",
    "base": "Systran/faster-whisper-base",
    "small": "Systran/faster-whisper-small",
    "medium": "Systran/faster-whisper-medium",
    "large-v1": "Systran/faster-whisper-large-v1",
    "large-v2": "Systran/faster-whisper-large-v2",
    "large-v3": "Systran/faster-whisper-large-v3",
 }
 _FILENAMES = [
    "config.json",
    "preprocessor_config.json",
    "model.bin",
    "tokenizer.json",
    "vocabulary.txt",
    "vocabulary.json",
 ]
 def downloadFile(url, path, func=None):
    try:
        res = requests_get(url, stream=True)
        res.raise_for_status()
        file_size = int(res.headers.get('content-length', 0))
        total_chunk = 0
        with open(os_path.join(path), 'wb') as file:
            for chunk in res.iter_content(chunk_size=1024*5):
                file.write(chunk)
                if isinstance(func, Callable):
                    total_chunk += len(chunk)
                    func(total_chunk/file_size)
    except Exception as e:
            print("error:downloadFile()", e)
 def checkWhisperWeight(root, weight_type):
    path = os_path.join(root, "weights", "whisper", weight_type)
    result = False
    try:
        WhisperModel(
            path,
            device="cpu",
            device_index=0,
            compute_type="int8",
            cpu_threads=4,
            num_workers=1,
            local_files_only=True,
        )
        result = True
    except Exception:
        pass
    return result
 def downloadWhisperWeight(root, weight_type, callbackFunc):
    path = os_path.join(root, "weights", "whisper", weight_type)
    os_makedirs(path, exist_ok=True)
    if checkWhisperWeight(root, weight_type) is True:
        return
    for filename in _FILENAMES:
        print("Downloading", filename, "...")
        file_path = os_path.join(path, filename)
        url = huggingface_hub.hf_hub_url(_MODELS[weight_type], filename)
        downloadFile(url, file_path, func=callbackFunc)
 def getWhisperModel(root, weight_type):
    path = os_path.join(root, "weights", "whisper", weight_type)
    return WhisperModel(
        path,
        device="cpu",
        device_index=0,
        compute_type="int8",
        cpu_threads=4,
        num_workers=1,
        local_files_only=True,
    )
 if __name__ == "__main__":
    def callback(value):
        print(value)
        pass
    downloadWhisperWeight("./", "tiny", callback)
    downloadWhisperWeight("./", "base", callback)
    downloadWhisperWeight("./", "small", callback)
    downloadWhisperWeight("./", "medium", callback)
    downloadWhisperWeight("./", "large-v1", callback)
    downloadWhisperWeight("./", "large-v2", callback)
    downloadWhisperWeight("./", "large-v3", callback)
--- a/models/translation/translation_translator.py
+++ b/models/translation/translation_translator.py
@@ -2,7 +2,7 @@ import os
 from deepl import Translator as deepl_Translator
 from translators import translate_text as other_web_Translator
 from .translation_languages import translation_lang
-from .utils import ctranslate2_weights
+from .translation_utils import ctranslate2_weights
 import ctranslate2
 import transformers
@@ -27,8 +27,8 @@ class Translator():
    def changeCTranslate2Model(self, path, model_type):
        directory_name = ctranslate2_weights[model_type]["directory_name"]
        tokenizer = ctranslate2_weights[model_type]["tokenizer"]
-        weight_path = os.path.join(path, "weight", directory_name)
+        weight_path = os.path.join(path, "weights", "ctranslate2", directory_name)
-        tokenizer_path = os.path.join(path, "weight", directory_name, "tokenizer")
+        tokenizer_path = os.path.join(path, "weights", "ctranslate2", directory_name, "tokenizer")
        self.ctranslate2_translator = ctranslate2.Translator(
            weight_path,
            device="cpu",
@@ -41,7 +41,7 @@ class Translator():
            self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)
        except Exception as e:
            print("Error: changeCTranslate2Model()", e)
-            tokenizer_path = os.path.join("./weight", directory_name, "tokenizer")
+            tokenizer_path = os.path.join("./weights", "ctranslate2", directory_name, "tokenizer")
            self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)
    @staticmethod
--- a/models/translation/translation_utils.py
+++ b/models/translation/translation_utils.py
@@ -39,36 +39,36 @@ def calculate_file_hash(file_path, block_size=65536):
    return hash_object.hexdigest()
 def checkCTranslate2Weight(path, weight_type="Small"):
    directory_name = 'weight'
    current_directory = path
    weight_directory_name = ctranslate2_weights[weight_type]["directory_name"]
    hash_data = ctranslate2_weights[weight_type]["hash"]
-    files = ["model.bin", "sentencepiece.model", "shared_vocabulary.txt"]
+    files = [
        "model.bin",
        "sentencepiece.model",
        "shared_vocabulary.txt"
    ]
    # check already downloaded
    already_downloaded = False
-    if all(os_path.exists(os_path.join(current_directory, directory_name, weight_directory_name, file)) for file in files):
+    if all(os_path.exists(os_path.join(path, weight_directory_name, file)) for file in files):
        # check hash
        for file in files:
            original_hash = hash_data[file]
-            current_hash = calculate_file_hash(os_path.join(current_directory, directory_name, weight_directory_name, file))
+            current_hash = calculate_file_hash(os_path.join(path, weight_directory_name, file))
            if original_hash != current_hash:
                break
        already_downloaded = True
    return already_downloaded
-def downloadCTranslate2Weight(path, weight_type="Small", func=None):
+def downloadCTranslate2Weight(root, weight_type="Small", func=None):
    url = ctranslate2_weights[weight_type]["url"]
-    filename = 'weight.zip'
+    filename = "weight.zip"
-    directory_name = 'weight'
+    path = os_path.join(root, "weights", "ctranslate2")
-    current_directory = path
+    os_makedirs(path, exist_ok=True)
    if checkCTranslate2Weight(path, weight_type):
        return
    try:
        os_makedirs(os_path.join(current_directory, directory_name), exist_ok=True)
        print(os_path.join(current_directory, directory_name))
        with tempfile.TemporaryDirectory() as tmp_path:
            res = requests_get(url, stream=True)
            file_size = int(res.headers.get('content-length', 0))
@@ -81,6 +81,6 @@ def downloadCTranslate2Weight(path, weight_type="Small", func=None):
                        func(total_chunk/file_size)
            with ZipFile(os_path.join(tmp_path, filename)) as zf:
-                zf.extractall(os_path.join(current_directory, directory_name))
+                zf.extractall(path)
    except Exception as e:
            print("error:downloadCTranslate2Weight()", e)
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,9 @@ pyyaml == 6.0.1
 python-i18n == 0.3.9
 CTkToolTip == 0.8
 pyinstaller==6.2.0
-transformers[torch]
+transformers[torch]==4.37.2
 sentencepiece==0.1.99
-ctranslate2==3.21.0
+ctranslate2==3.24.0
 faster-whisper==0.10.0
 translators @ git+https://github.com/misyaguziya/translators@master
 SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@master
--- a/view.py
+++ b/view.py
@@ -29,6 +29,7 @@ class View():
            font_family=config.FONT_FAMILY,
            ui_language=config.UI_LANGUAGE,
            is_reset_button_displayed_for_translation=config.IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION,
            is_reset_button_displayed_for_whisper=config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER,
        )
        if config.ENABLE_SPEAKER2CHATBOX is False:
@@ -213,6 +214,7 @@ class View():
            VAR_SIDE_MENU_LABEL_TRANSCRIPTION=StringVar(value=i18n.t("config_window.side_menu_labels.transcription")),
            VAR_SECOND_TITLE_TRANSCRIPTION_MIC=StringVar(value=i18n.t("config_window.side_menu_labels.transcription_mic")),
            VAR_SECOND_TITLE_TRANSCRIPTION_SPEAKER=StringVar(value=i18n.t("config_window.side_menu_labels.transcription_speaker")),
            VAR_SECOND_TITLE_TRANSCRIPTION_INTERNAL_MODEL=StringVar(value=i18n.t("config_window.side_menu_labels.transcription_internal_model")),
            VAR_SIDE_MENU_LABEL_OTHERS=StringVar(value=i18n.t("config_window.side_menu_labels.others")),
            VAR_SIDE_MENU_LABEL_ADVANCED_SETTINGS=StringVar(value=i18n.t("config_window.side_menu_labels.advanced_settings")),
@@ -282,7 +284,7 @@ class View():
            VAR_DESC_CTRANSLATE2_WEIGHT_TYPE=StringVar(value=i18n.t("config_window.ctranslate2_weight_type.desc")),
            DICT_CTRANSLATE2_WEIGHT_TYPE=self.getSelectableCtranslate2WeightTypeDict(),
            CALLBACK_SET_CTRANSLATE2_WEIGHT_TYPE=None,
-            VAR_CTRANSLATE2_WEIGHT_TYPE=StringVar(value=self.getSelectableCtranslate2WeightTypeDict()[config.WEIGHT_TYPE]),
+            VAR_CTRANSLATE2_WEIGHT_TYPE=StringVar(value=self.getSelectableCtranslate2WeightTypeDict()[config.CTRANSLATE2_WEIGHT_TYPE]),
            VAR_LABEL_DEEPL_AUTH_KEY=StringVar(value=i18n.t( "config_window.deepl_auth_key.label")),
            VAR_DESC_DEEPL_AUTH_KEY=StringVar(
@@ -383,6 +385,19 @@ class View():
            CALLBACK_FOCUS_OUT_SPEAKER_MAX_PHRASES=self.callbackBindFocusOut_SpeakerMaxPhrases,
            # Transcription Tab (Whisper Internal AI Model)
            VAR_LABEL_USE_WHISPER_FEATURE=StringVar(value=i18n.t("config_window.use_whisper_feature.label")),
            VAR_DESC_USE_WHISPER_FEATURE=StringVar(value=i18n.t("config_window.use_whisper_feature.desc")),
            CALLBACK_SET_USE_WHISPER_FEATURE=None,
            VAR_USE_WHISPER_FEATURE=BooleanVar(value=config.USE_WHISPER_FEATURE),
            VAR_LABEL_WHISPER_WEIGHT_TYPE=StringVar(value=i18n.t("config_window.whisper_weight_type.label")),
            VAR_DESC_WHISPER_WEIGHT_TYPE=StringVar(value=i18n.t("config_window.whisper_weight_type.desc")),
            DICT_WHISPER_WEIGHT_TYPE=self.getSelectableWhisperWeightTypeDict(),
            CALLBACK_SET_WHISPER_WEIGHT_TYPE=None,
            VAR_WHISPER_WEIGHT_TYPE=StringVar(value=self.getSelectableWhisperWeightTypeDict()[config.WHISPER_WEIGHT_TYPE]),
            # Others Tab
            VAR_LABEL_ENABLE_AUTO_CLEAR_MESSAGE_BOX=StringVar(value=i18n.t("config_window.auto_clear_the_message_box.label")),
            VAR_DESC_ENABLE_AUTO_CLEAR_MESSAGE_BOX=None,
@@ -630,6 +645,11 @@ class View():
            self.view_variable.CALLBACK_SET_SPEAKER_PHRASE_TIMEOUT = config_window_registers.get("callback_set_speaker_phrase_timeout", None)
            self.view_variable.CALLBACK_SET_SPEAKER_MAX_PHRASES = config_window_registers.get("callback_set_speaker_max_phrases", None)
            # Transcription Tab (Internal AI Model)
            self.view_variable.CALLBACK_SET_USE_WHISPER_FEATURE = config_window_registers.get("callback_set_use_whisper_feature", None)
            self.view_variable.CALLBACK_SET_WHISPER_WEIGHT_TYPE = config_window_registers.get("callback_set_whisper_weight_type", None)
            # Others Tab
            self.view_variable.CALLBACK_SET_ENABLE_AUTO_CLEAR_MESSAGE_BOX = config_window_registers.get("callback_set_enable_auto_clear_chatbox", None)
            self.view_variable.CALLBACK_SET_ENABLE_SEND_ONLY_TRANSLATED_MESSAGES = config_window_registers.get("callback_set_send_only_translated_messages", None)
@@ -684,6 +704,11 @@ class View():
            )
            self.replaceMicThresholdCheckButton_Disabled()
        if config.USE_WHISPER_FEATURE is True:
            self.openWhisperWeightTypeWidget()
        else:
            self.closeWhisperWeightTypeWidget()
        if config.ENABLE_SPEAKER2CHATBOX is False:
            vrct_gui._changeConfigWindowWidgetsStatus(
                status="disabled",
@@ -925,6 +950,17 @@ class View():
        vrct_gui.update()
        vrct_gui.config_window.lift()
    @staticmethod
    def getSelectableWhisperWeightTypeDict():
        return {
            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["tiny"]: i18n.t("config_window.whisper_weight_type.tiny", capacity="74.5MB"),
            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["base"]: i18n.t("config_window.whisper_weight_type.base", capacity="141MB"),
            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["small"]: i18n.t("config_window.whisper_weight_type.small", capacity="463MB"),
            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["medium"]: i18n.t("config_window.whisper_weight_type.medium", capacity="1.42GB"),
            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["large-v1"]: i18n.t("config_window.whisper_weight_type.large_v1", capacity="2.87GB"),
            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["large-v2"]: i18n.t("config_window.whisper_weight_type.large_v2", capacity="2.87GB"),
            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["large-v3"]: i18n.t("config_window.whisper_weight_type.large_v3", capacity="2.87GB"),
        }
 # Open Webpage Functions
    def openWebPage_Booth(self):
@@ -1020,7 +1056,8 @@ class View():
            self.restart_required_configs_pre_data.ui_scaling == config.UI_SCALING and
            self.restart_required_configs_pre_data.font_family == config.FONT_FAMILY and
            self.restart_required_configs_pre_data.ui_language == config.UI_LANGUAGE and
-            self.restart_required_configs_pre_data.is_reset_button_displayed_for_translation == config.IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION
+            self.restart_required_configs_pre_data.is_reset_button_displayed_for_translation == config.IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION and
            self.restart_required_configs_pre_data.is_reset_button_displayed_for_whisper == config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER
        )
        if locale is None:
@@ -1075,7 +1112,7 @@ class View():
        self.view_variable.VAR_CTRANSLATE2_WEIGHT_TYPE.set(self.getSelectableCtranslate2WeightTypeDict()[selected_weight_type])
    def setLatestCTranslate2WeightType(self):
-        selected_weight_type = self.getSelectableCtranslate2WeightTypeDict()[config.WEIGHT_TYPE]
+        selected_weight_type = self.getSelectableCtranslate2WeightTypeDict()[config.CTRANSLATE2_WEIGHT_TYPE]
        self.view_variable.VAR_CTRANSLATE2_WEIGHT_TYPE.set(selected_weight_type)
@@ -1088,6 +1125,23 @@ class View():
        vrct_gui.config_window.sb__ctranslate2_weight_type.grid_remove()
    def openWhisperWeightTypeWidget(self):
        vrct_gui.config_window.sb__use_whisper_feature.grid()
        vrct_gui.config_window.sb__whisper_weight_type.grid()
    def closeWhisperWeightTypeWidget(self):
        vrct_gui.config_window.sb__use_whisper_feature.grid()
        vrct_gui.config_window.sb__whisper_weight_type.grid_remove()
    def updateSelectedWhisperWeightType(self, selected_weight_type:str):
        self.view_variable.VAR_WHISPER_WEIGHT_TYPE.set(self.getSelectableWhisperWeightTypeDict()[selected_weight_type])
    def setLatestCTranslate2WeightType(self):
        selected_weight_type = self.getSelectableWhisperWeightTypeDict()[config.WHISPER_WEIGHT_TYPE]
        self.view_variable.VAR_WHISPER_WEIGHT_TYPE.set(selected_weight_type)
    def openMicEnergyThresholdWidget(self):
        self.view_variable.VAR_LABEL_MIC_DYNAMIC_ENERGY_THRESHOLD.set(i18n.t("config_window.mic_dynamic_energy_threshold.label_for_manual"))
        self.view_variable.VAR_DESC_MIC_DYNAMIC_ENERGY_THRESHOLD.set(i18n.t("config_window.mic_dynamic_energy_threshold.desc_for_manual"))
--- a/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/createSideMenuAndSettingsBoxContainers.py
+++ b/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/createSideMenuAndSettingsBoxContainers.py
@@ -7,7 +7,7 @@ from ._createSettingBoxContainer import _createSettingBoxContainer
 from .setting_box_containers.setting_box_appearance import createSettingBox_Appearance
-from .setting_box_containers.setting_box_transcription import createSettingBox_Mic, createSettingBox_Speaker
+from .setting_box_containers.setting_box_transcription import createSettingBox_Mic, createSettingBox_Speaker, createSettingBox_InternalModel
 from .setting_box_containers.setting_box_others import createSettingBox_Others, createSettingBox_Others_SendMessageFormats, createSettingBox_Others_ReceivedMessageFormats, createSettingBox_Others_Additional
 from .setting_box_containers.setting_box_advanced_settings import createSettingBox_AdvancedSettings
 from .setting_box_containers.setting_box_translation import createSettingBox_Translation
@@ -94,6 +94,10 @@ def createSideMenuAndSettingsBoxContainers(config_window, settings, view_variabl
                        "var_section_title": view_variable.VAR_SECOND_TITLE_TRANSCRIPTION_SPEAKER,
                        "setting_box": createSettingBox_Speaker
                    },
                    {
                        "var_section_title": view_variable.VAR_SECOND_TITLE_TRANSCRIPTION_INTERNAL_MODEL,
                        "setting_box": createSettingBox_InternalModel
                    },
                ]
            },
        },
--- a/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/init.py
+++ b/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/init.py
@@ -1,2 +1,3 @@
 from .createSettingBox_Mic import createSettingBox_Mic
 from .createSettingBox_Speaker import createSettingBox_Speaker
 from .createSettingBox_InternalModel import createSettingBox_InternalModel
--- a/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/createSettingBox_InternalModel.py
+++ b/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/createSettingBox_InternalModel.py
@@ -0,0 +1,37 @@
 from utils import callFunctionIfCallable
 from .._SettingBoxGenerator import _SettingBoxGenerator
 def createSettingBox_InternalModel(setting_box_wrapper, config_window, settings, view_variable):
    sbg = _SettingBoxGenerator(setting_box_wrapper, config_window, settings, view_variable)
    createSettingBoxSwitch = sbg.createSettingBoxSwitch
    createSettingBoxDropdownMenu = sbg.createSettingBoxDropdownMenu
    def switchUseWhisperFeatureCallback(switch_widget):
        callFunctionIfCallable(view_variable.CALLBACK_SET_USE_WHISPER_FEATURE, switch_widget.get())
    def optionmenuWhisperWeightTypeCallback(value):
        callFunctionIfCallable(view_variable.CALLBACK_SET_WHISPER_WEIGHT_TYPE, value)
    row=0
    config_window.sb__use_whisper_feature = createSettingBoxSwitch(
        for_var_label_text=view_variable.VAR_LABEL_USE_WHISPER_FEATURE,
        for_var_desc_text=view_variable.VAR_DESC_USE_WHISPER_FEATURE,
        switch_attr_name="sb__switch_use_whisper_feature",
        command=lambda: switchUseWhisperFeatureCallback(config_window.sb__switch_use_whisper_feature),
        variable=view_variable.VAR_USE_WHISPER_FEATURE
    )
    config_window.sb__use_whisper_feature.grid(row=row, pady=0)
    row+=1
    config_window.sb__whisper_weight_type = createSettingBoxDropdownMenu(
        for_var_label_text=view_variable.VAR_LABEL_WHISPER_WEIGHT_TYPE,
        for_var_desc_text=view_variable.VAR_DESC_WHISPER_WEIGHT_TYPE,
        optionmenu_attr_name="sb__optionmenu_whisper_weight_type",
        dropdown_menu_values=view_variable.DICT_WHISPER_WEIGHT_TYPE,
        command=lambda value: optionmenuWhisperWeightTypeCallback(value),
        variable=view_variable.VAR_WHISPER_WEIGHT_TYPE,
    )
    config_window.sb__whisper_weight_type.grid(row=row, pady=0)
    row+=1