Merge branch 'faster-whisper' into develop

2024-02-07 22:50:26 +09:00
parent a22e4b9b91 61a6eb792b
commit d4164d7c58
17 changed files with 758 additions and 142 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,7 @@ VRCT.spec
 *.pyc
 logs/
 .venv/
-weight/
+weights/
 .vscode
 error.log
 *.exe
--- a/config.py
+++ b/config.py
@@ -98,6 +98,10 @@ class Config:
    def SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT(self):
        return self._SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT

+    @property
+    def SELECTABLE_WHISPER_WEIGHT_TYPE_DICT(self):
+        return self._SELECTABLE_WHISPER_WEIGHT_TYPE_DICT
+
    @property
    def MAX_MIC_ENERGY_THRESHOLD(self):
        return self._MAX_MIC_ENERGY_THRESHOLD
@@ -224,6 +228,15 @@ class Config:
        if isinstance(value, bool):
            self._IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION = value

+    @property
+    def IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER(self):
+        return self._IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER
+
+    @IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER.setter
+    def IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER(self, value):
+        if isinstance(value, bool):
+            self._IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = value
+
    # Save Json Data
    ## Main Window
    @property
@@ -281,6 +294,17 @@ class Config:
            self._SELECTED_TAB_TARGET_LANGUAGES = value
            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)

+    @property
+    @json_serializable('SELECTED_TRANSCRIPTION_ENGINE')
+    def SELECTED_TRANSCRIPTION_ENGINE(self):
+        return self._SELECTED_TRANSCRIPTION_ENGINE
+
+    @SELECTED_TRANSCRIPTION_ENGINE.setter
+    def SELECTED_TRANSCRIPTION_ENGINE(self, value):
+        if isinstance(value, str):
+            self._SELECTED_TRANSCRIPTION_ENGINE = value
+            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
+
    @property
    @json_serializable('IS_MAIN_WINDOW_SIDEBAR_COMPACT_MODE')
    def IS_MAIN_WINDOW_SIDEBAR_COMPACT_MODE(self):
@@ -587,15 +611,37 @@ class Config:
            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)

    @property
-    @json_serializable('WEIGHT_TYPE')
-    def WEIGHT_TYPE(self):
-        return self._WEIGHT_TYPE
+    @json_serializable('USE_WHISPER_FEATURE')
+    def USE_WHISPER_FEATURE(self):
+        return self._USE_WHISPER_FEATURE

-    @WEIGHT_TYPE.setter
-    def WEIGHT_TYPE(self, value):
+    @USE_WHISPER_FEATURE.setter
+    def USE_WHISPER_FEATURE(self, value):
+        if isinstance(value, bool):
+            self._USE_WHISPER_FEATURE = value
+            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
+
+    @property
+    @json_serializable('CTRANSLATE2_WEIGHT_TYPE')
+    def CTRANSLATE2_WEIGHT_TYPE(self):
+        return self._CTRANSLATE2_WEIGHT_TYPE
+
+    @CTRANSLATE2_WEIGHT_TYPE.setter
+    def CTRANSLATE2_WEIGHT_TYPE(self, value):
        # if isinstance(value, str) and value in self.SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_DICT:
        if isinstance(value, str):
-            self._WEIGHT_TYPE = value
+            self._CTRANSLATE2_WEIGHT_TYPE = value
+            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
+
+    @property
+    @json_serializable('WHISPER_WEIGHT_TYPE')
+    def WHISPER_WEIGHT_TYPE(self):
+        return self._WHISPER_WEIGHT_TYPE
+
+    @WHISPER_WEIGHT_TYPE.setter
+    def WHISPER_WEIGHT_TYPE(self, value):
+        if isinstance(value, str):
+            self._WHISPER_WEIGHT_TYPE = value
            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)

    @property
@@ -774,6 +820,18 @@ class Config:
            "Small": "Small",
            "Large": "Large",
        }
+
+        self._SELECTABLE_WHISPER_WEIGHT_TYPE_DICT = {
+            # {Save json str}: {i18n_placeholder} pairs
+            "tiny": "tiny",
+            "base": "base",
+            "small": "small",
+            "medium": "medium",
+            "large-v1": "large-v1",
+            "large-v2": "large-v2",
+            "large-v3": "large-v3",
+        }
+
        self._MAX_MIC_ENERGY_THRESHOLD = 2000
        self._MAX_SPEAKER_ENERGY_THRESHOLD = 4000

@@ -791,6 +849,7 @@ class Config:
        self._SENT_MESSAGES_LOG = []
        self._CURRENT_SENT_MESSAGES_LOG_INDEX = 0
        self._IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION = False
+        self._IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = False

        # Save Json Data
        ## Main Window
@@ -815,6 +874,7 @@ class Config:
            "2":"English\n(United States)",
            "3":"English\n(United States)",
        }
+        self._SELECTED_TRANSCRIPTION_ENGINE = "Google"
        self._IS_MAIN_WINDOW_SIDEBAR_COMPACT_MODE = False

        ## Config Window
@@ -851,7 +911,9 @@ class Config:
            "DeepL_API": None,
        }
        self._USE_TRANSLATION_FEATURE = True
-        self._WEIGHT_TYPE = "Small"
+        self._CTRANSLATE2_WEIGHT_TYPE = "Small"
+        self._USE_WHISPER_FEATURE = False
+        self._WHISPER_WEIGHT_TYPE = "base"
        self._SEND_MESSAGE_FORMAT = "[message]"
        self._SEND_MESSAGE_FORMAT_WITH_T = "[message]([translation])"
        self._RECEIVED_MESSAGE_FORMAT = "[message]"
--- a/controller.py
+++ b/controller.py
@@ -530,8 +530,8 @@ def callbackSetUseTranslationFeature(value):

 def callbackSetCtranslate2WeightType(value):
    print("callbackSetCtranslate2WeightType", value)
-    config.WEIGHT_TYPE = str(value)
-    view.updateSelectedCtranslate2WeightType(config.WEIGHT_TYPE)
+    config.CTRANSLATE2_WEIGHT_TYPE = str(value)
+    view.updateSelectedCtranslate2WeightType(config.CTRANSLATE2_WEIGHT_TYPE)
    view.setWidgetsStatus_changeWeightType_Pending()
    if model.checkCTranslatorCTranslate2ModelWeight():
        config.IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION = False
@@ -792,6 +792,33 @@ def callbackSetSpeakerMaxPhrases(value):
    except Exception:
        view.showErrorMessage_SpeakerMaxPhrases()

+# Transcription (Internal AI Model)
+def callbackSetUserWhisperFeature(value):
+    print("callbackSetUserWhisperFeature", value)
+    config.USE_WHISPER_FEATURE = value
+    if config.USE_WHISPER_FEATURE is True:
+        view.openWhisperWeightTypeWidget()
+        if model.checkTranscriptionWhisperModelWeight() is True:
+            config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = False
+            config.SELECTED_TRANSCRIPTION_ENGINE = "Whisper"
+        else:
+            config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = True
+            config.SELECTED_TRANSCRIPTION_ENGINE = "Google"
+    else:
+        view.closeWhisperWeightTypeWidget()
+    view.showRestartButtonIfRequired()
+
+def callbackSetWhisperWeightType(value):
+    print("callbackSetWhisperWeightType", value)
+    config.WHISPER_WEIGHT_TYPE = str(value)
+    view.updateSelectedWhisperWeightType(config.WHISPER_WEIGHT_TYPE)
+    if model.checkTranscriptionWhisperModelWeight() is True:
+        config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = False
+        config.SELECTED_TRANSCRIPTION_ENGINE = "Whisper"
+    else:
+        config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER = True
+        config.SELECTED_TRANSCRIPTION_ENGINE = "Google"
+    view.showRestartButtonIfRequired()

 # Others Tab
 def callbackSetEnableAutoClearMessageBox(value):
@@ -923,6 +950,12 @@ def createMainWindow(splash):
    # set Translation Engine
    updateTranslationEngineAndEngineList()

+    # set Transcription Engine
+    if config.USE_WHISPER_FEATURE is True:
+        config.SELECTED_TRANSCRIPTION_ENGINE = "Whisper"
+    else:
+        config.SELECTED_TRANSCRIPTION_ENGINE = "Google"
+
    # set word filter
    model.addKeywords()

@@ -1020,6 +1053,10 @@ def createMainWindow(splash):
            "callback_set_speaker_phrase_timeout": callbackSetSpeakerPhraseTimeout,
            "callback_set_speaker_max_phrases": callbackSetSpeakerMaxPhrases,

+            # Transcription Tab (Internal AI Model)
+            "callback_set_use_whisper_feature": callbackSetUserWhisperFeature,
+            "callback_set_whisper_weight_type": callbackSetWhisperWeightType,
+
            # Others Tab
            "callback_set_enable_auto_clear_chatbox": callbackSetEnableAutoClearMessageBox,
            "callback_set_send_only_translated_messages": callbackSetEnableSendOnlyTranslatedMessages,
--- a/install.bat
+++ b/install.bat
@@ -1,4 +1,2 @@
 python.exe -m pip install --upgrade pip
-pip install -r requirements.txt
-pip install git+https://github.com/misyaguziya/translators
-pip install git+https://github.com/misyaguziya/custom_speech_recognition
+pip install -r requirements.txt
--- a/locales/en.yml
+++ b/locales/en.yml
@@ -79,6 +79,7 @@ config_window:
    transcription: Transcription
    transcription_mic: Mic
    transcription_speaker: Speaker
+    transcription_internal_model: Internal Model
    others: Others
    others_send_message_formats: Message Formats (Send)
    others_received_message_formats: Message Formats (Received)
@@ -125,6 +126,21 @@ config_window:
    small: "Basic model (%{capacity})"
    large: "High accuracy model (%{capacity})"

+  use_whisper_feature:
+    label: Use Whisper Feature
+    desc: Description
+
+  whisper_weight_type:
+    label: Select Whisper Model
+    desc: Description
+    tiny: "tiny model (%{capacity})"
+    base: "base model (%{capacity})"
+    small: "small model (%{capacity})"
+    medium: "medium model (%{capacity})"
+    large_v1: "large_v1 model (%{capacity})"
+    large_v2: "large_v2 model (%{capacity})"
+    large_v3: "large_v3 model (%{capacity})"
+
  deepl_auth_key:
    label: DeepL Auth Key
    desc: Please select %{translator} on the main screen with DeepL_API when using. ※Some languages may not be supported.
--- a/main.py
+++ b/main.py
@@ -8,11 +8,16 @@ if __name__ == "__main__":
        splash.showSplash()

        from config import config
-        from models.translation.utils import downloadCTranslate2Weight
+        from models.translation.translation_utils import downloadCTranslate2Weight
        if config.USE_TRANSLATION_FEATURE is True:
-            downloadCTranslate2Weight(config.PATH_LOCAL, config.WEIGHT_TYPE, splash.updateDownloadProgress)
+            downloadCTranslate2Weight(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE, splash.updateDownloadProgress)
+
+        from models.transcription.transcription_whisper import downloadWhisperWeight
+        if config.USE_WHISPER_FEATURE is True:
+            downloadWhisperWeight(config.PATH_LOCAL, config.WHISPER_WEIGHT_TYPE, splash.updateDownloadProgress)

        splash.toProgress(0)
+
        import controller
        controller.createMainWindow(splash)
        splash.destroySplash()
--- a/model.py
+++ b/model.py
@@ -23,7 +23,8 @@ from models.transcription.transcription_transcriber import AudioTranscriber
 from models.xsoverlay.notification import xsoverlayForVRCT
 from models.translation.translation_languages import translation_lang
 from models.transcription.transcription_languages import transcription_lang
-from models.translation.utils import checkCTranslate2Weight
+from models.translation.translation_utils import checkCTranslate2Weight
+from models.transcription.transcription_whisper import checkWhisperWeight
 from config import config

 class threadFnc(Thread):
@@ -65,14 +66,17 @@ class Model:
        self.speaker_energy_plot_progressbar = None
        self.translator = Translator()
        if config.USE_TRANSLATION_FEATURE is True:
-            self.translator.changeCTranslate2Model(config.PATH_LOCAL, config.WEIGHT_TYPE)
+            self.translator.changeCTranslate2Model(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE)
        self.keyword_processor = KeywordProcessor()

    def checkCTranslatorCTranslate2ModelWeight(self):
-        return checkCTranslate2Weight(config.PATH_LOCAL, config.WEIGHT_TYPE)
+        return checkCTranslate2Weight(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE)

    def changeTranslatorCTranslate2Model(self):
-        self.translator.changeCTranslate2Model(config.PATH_LOCAL, config.WEIGHT_TYPE)
+        self.translator.changeCTranslate2Model(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE)
+
+    def checkTranscriptionWhisperModelWeight(self):
+        return checkWhisperWeight(config.PATH_LOCAL, config.WHISPER_WEIGHT_TYPE)

    def resetKeywordProcessor(self):
        del self.keyword_processor
@@ -335,9 +339,11 @@ class Model:
            source=self.mic_audio_recorder.source,
            phrase_timeout=phase_timeout,
            max_phrases=config.INPUT_MIC_MAX_PHRASES,
+            root=config.PATH_LOCAL,
+            whisper_weight_type=config.WHISPER_WEIGHT_TYPE,
        )
        def sendMicTranscript():
-            mic_transcriber.transcribeAudioQueue(mic_audio_queue, config.SOURCE_LANGUAGE, config.SOURCE_COUNTRY)
+            mic_transcriber.transcribeAudioQueue(mic_audio_queue, config.SOURCE_LANGUAGE, config.SOURCE_COUNTRY, config.SELECTED_TRANSCRIPTION_ENGINE)
            message = mic_transcriber.getTranscript()
            try:
                fnc(message)
@@ -416,9 +422,11 @@ class Model:
            source=self.speaker_audio_recorder.source,
            phrase_timeout=phase_timeout,
            max_phrases=config.INPUT_SPEAKER_MAX_PHRASES,
+            root=config.PATH_LOCAL,
+            whisper_weight_type=config.WHISPER_WEIGHT_TYPE,
        )
        def sendSpeakerTranscript():
-            speaker_transcriber.transcribeAudioQueue(speaker_audio_queue, config.TARGET_LANGUAGE, config.TARGET_COUNTRY)
+            speaker_transcriber.transcribeAudioQueue(speaker_audio_queue, config.TARGET_LANGUAGE, config.TARGET_COUNTRY, config.SELECTED_TRANSCRIPTION_ENGINE)
            message = speaker_transcriber.getTranscript()
            try:
                fnc(message)
--- a/models/transcription/transcription_languages.py
+++ b/models/transcription/transcription_languages.py
@@ -1,177 +1,438 @@
 transcription_lang = {
    "Afrikaans":{
-        "South Africa":"af-ZA",
+        "South Africa":{
+            "Google": "af-ZA",
+            "Whisper": "af",
+        },
    },
    "Arabic":{
-        "Algeria":"ar-DZ",
-        "Bahrain":"ar-BH",
-        "Egypt":"ar-EG",
-        "Israel":"ar-IL",
-        "Iraq":"ar-IQ",
-        "Jordan":"ar-JO",
-        "Kuwait":"ar-KW",
-        "Lebanon":"ar-LB",
-        "Morocco":"ar-MA",
-        "Oman":"ar-OM",
-        "State of Palestine":"ar-PS",
-        "Qatar":"ar-QA",
-        "Saudi Arabia":"ar-SA",
-        "Tunisia":"ar-TN",
-        "United Arab Emirates":"ar-AE",
+        "Algeria":{
+            "Google": "ar-DZ",
+            "Whisper": "ar",
+        },
+        "Bahrain":{
+            "Google": "ar-BH",
+            "Whisper": "ar",
+        },
+        "Egypt":{
+            "Google": "ar-EG",
+            "Whisper": "ar",
+        },
+        "Israel":{
+            "Google": "ar-IL",
+            "Whisper": "ar",
+        },
+        "Iraq":{
+            "Google": "ar-IQ",
+            "Whisper": "ar",
+        },
+        "Jordan":{
+            "Google": "ar-JO",
+            "Whisper": "ar",
+        },
+        "Kuwait":{
+            "Google": "ar-KW",
+            "Whisper": "ar",
+        },
+        "Lebanon":{
+            "Google": "ar-LB",
+            "Whisper": "ar",
+        },
+        "Morocco":{
+            "Google": "ar-MA",
+            "Whisper": "ar",
+        },
+        "Oman":{
+            "Google": "ar-OM",
+            "Whisper": "ar",
+        },
+        "State of Palestine":{
+            "Google": "ar-PS",
+            "Whisper": "ar",
+        },
+        "Qatar":{
+            "Google": "ar-QA",
+            "Whisper": "ar",
+        },
+        "Saudi Arabia":{
+            "Google": "ar-SA",
+            "Whisper": "ar",
+        },
+        "Tunisia":{
+            "Google": "ar-TN",
+            "Whisper": "ar",
+        },
+        "United Arab Emirates":{
+            "Google": "ar-AE",
+            "Whisper": "ar",
+        },
    },
    "Basque":{
-        "Spain":"eu-ES",
+        "Spain":{
+            "Google": "eu-ES",
+            "Whisper": "eu",
+        },
    },
    "Bulgarian":{
-        "Bulgaria":"bg-BG",
+        "Bulgaria":{
+            "Google": "bg-BG",
+            "Whisper": "bg",
+        },
    },
    "Catalan":{
-        "Spain":"ca-ES",
+        "Spain":{
+            "Google": "ca-ES",
+            "Whisper": "ca",
+        },
    },
    "Chinese":{
-        "Mandarin (Simplified, China)":"cmn-Hans-CN",
-        "Mandarin (Simplified, Hong Kong)":"cmn-Hans-HK",
-        "Mandarin (Traditional, Taiwan)":"cmn-Hant-TW",
-        "Cantonese (Traditional Hong Kong)":"yue-Hant-HK",
+        "Mandarin (Simplified, China)":{
+            "Google": "cmn-Hans-CN",
+            "Whisper": "zh",
+        },
+        "Mandarin (Simplified, Hong Kong)":{
+            "Google": "cmn-Hans-HK",
+            "Whisper": "zh",
+        },
+        "Mandarin (Traditional, Taiwan)":{
+            "Google": "cmn-Hant-TW",
+            "Whisper": "zh",
+        },
+        "Cantonese (Traditional Hong Kong)":{
+            "Google": "yue-Hant-HK",
+            "Whisper": "yue",
+        },
    },
    "Croatian":{
-        "Croatia":"hr-HR",
+        "Croatia":{
+            "Google": "hr-HR",
+            "Whisper": "hr",
+        },
    },
    "Czech":{
-        "Czech Republic":"cs-CZ",
+        "Czech Republic":{
+            "Google": "cs-CZ",
+            "Whisper": "cs",
+        },
    },
    "Danish":{
-        "Denmark":"da-DK",
+        "Denmark":{
+            "Google": "da-DK",
+            "Whisper": "da",
+        },
    },
    "Dutch":{
-        "Netherlands":"nl-NL",
+        "Netherlands":{
+            "Google": "nl-NL",
+            "Whisper": "nl",
+        },
    },
    "English": {
-        "United States":"en-US",
-        "United Kingdom":"en-GB",
-        "Australia":"en-AU",
-        "Canada":"en-CA",
-        "India":"en-IN",
-        "Ireland":"en-IE",
-        "New Zealand":"en-NZ",
-        "Philippines":"en-PH",
-        "South Africa":"en-ZA",
+        "United States":{
+            "Google": "en-US",
+            "Whisper": "en",
+        },
+        "United Kingdom":{
+            "Google": "en-GB",
+            "Whisper": "en",
+        },
+        "Australia":{
+            "Google": "en-AU",
+            "Whisper": "en",
+        },
+        "Canada":{
+            "Google": "en-CA",
+            "Whisper": "en",
+        },
+        "India":{
+            "Google": "en-IN",
+            "Whisper": "en",
+        },
+        "Ireland":{
+            "Google": "en-IE",
+            "Whisper": "en",
+        },
+        "New Zealand":{
+            "Google": "en-NZ",
+            "Whisper": "en",
+        },
+        "Philippines":{
+            "Google": "en-PH",
+            "Whisper": "en",
+        },
+        "South Africa":{
+            "Google": "en-ZA",
+            "Whisper": "en",
+        },
    },
    "Filipino":{
-        "Philippines":"fil-PH",
+        "Philippines":{
+            "Google": "fil-PH",
+            "Whisper": "tl",
+        },
    },
    "Finnish":{
-        "Finland":"fi-FI",
+        "Finland":{
+            "Google": "fi-FI",
+            "Whisper": "fi",
+        },
    },
    "French":{
-        "France":"fr-FR",
+        "France":{
+            "Google": "fr-FR",
+            "Whisper": "fr",
+        },
    },
    "Galician":{
-        "Spain":"gl-ES",
+        "Spain":{
+            "Google": "gl-ES",
+            "Whisper": "gl",
+        },
    },
    "German":{
-        "Germany":"de-DE",
+        "Germany":{
+            "Google": "de-DE",
+            "Whisper": "de",
+        },
    },
    "Greek":{
-        "Greece":"el-GR",
+        "Greece":{
+            "Google": "el-GR",
+            "Whisper": "el",
+        },
    },
    "Hebrew":{
-        "Israel":"he-IL",
+        "Israel":{
+            "Google": "he-IL",
+            "Whisper": "he",
+        },
    },
    "Hindi": {
-        "India":"hi-IN",
+        "India":{
+            "Google": "hi-IN",
+            "Whisper": "hi",
+        },
    },
    "Hungarian":{
-        "Hungary":"hu-HU",
+        "Hungary":{
+            "Google": "hu-HU",
+            "Whisper": "hu",
+        },
    },
    "Indonesian":{
-        "Indonesia":"id-ID",
+        "Indonesia":{
+            "Google": "id-ID",
+            "Whisper": "id",
+        },
    },
    "Icelandic":{
-        "Iceland":"is-IS",
+        "Iceland":{
+            "Google": "is-IS",
+            "Whisper": "is",
+        },
    },
    "Italian":{
-        "Italy":"it-IT",
-        "Switzerland":"it-CH",
+        "Italy":{
+            "Google": "it-IT",
+            "Whisper": "it",
+        },
+        "Switzerland":{
+            "Google": "it-CH",
+            "Whisper": "it",
+        },
    },
    "Japanese":{
-        "Japan":"ja-JP",
+        "Japan":{
+            "Google": "ja-JP",
+            "Whisper": "ja",
+        },
    },
    "Korean":{
-        "South Korea":"ko-KR",
+        "South Korea":{
+            "Google": "ko-KR",
+            "Whisper": "ko",
+        },
    },
    "Lithuanian":{
-        "Lithuania":"lt-LT",
+        "Lithuania":{
+            "Google": "lt-LT",
+            "Whisper": "lt",
+        },
    },
    "Malay":{
-        "Malaysia":"ms-MY",
+        "Malaysia":{
+            "Google": "ms-MY",
+            "Whisper": "ms",
+        },
    },
    "Norwegian":{
-        "Norway":"nb-NO",
+        "Norway":{
+            "Google": "nb-NO",
+            "Whisper": "no",
+        },
    },
    "Persian":{
-        "Iran":"fa-IR",
+        "Iran":{
+            "Google": "fa-IR",
+            "Whisper": "fa",
+        },
    },
    "Polish":{
-        "Poland":"pl-PL",
+        "Poland":{
+            "Google": "pl-PL",
+            "Whisper": "pl",
+        },
    },
    "Portuguese":{
-        "Brazil":"pt-BR",
-        "Portugal":"pt-PT",
+        "Brazil":{
+            "Google": "pt-BR",
+            "Whisper": "pt",
+        },
+        "Portugal":{
+            "Google": "pt-PT",
+            "Whisper": "pt",
+        },
    },
    "Romanian":{
-        "Romania":"ro-RO",
+        "Romania":{
+            "Google": "ro-RO",
+            "Whisper": "ro",
+        },
    },
    "Russian":{
-        "Russia":"ru-RU",
+        "Russia":{
+            "Google": "ru-RU",
+            "Whisper": "ru",
+        },
    },
    "Serbian":{
-        "Serbia":"sr-RS",
+        "Serbia":{
+            "Google": "sr-RS",
+            "Whisper": "sr",
+        },
    },
    "Slovak":{
-        "Slovakia":"sk-SK",
+        "Slovakia":{
+            "Google": "sk-SK",
+            "Whisper": "sk",
+        },
    },
    "Slovenian":{
-        "Slovenia":"sl-SI",
+        "Slovenia":{
+            "Google": "sl-SI",
+            "Whisper": "sl",
+        },
    },
    "Spanish":{
-        "Argentina":"es-AR",
-        "Bolivia":"es-BO",
-        "Chile":"es-CL",
-        "Colombia":"es-CO",
-        "Costa Rica":"es-CR",
-        "Dominican Republic":"es-DO",
-        "Ecuador":"es-EC",
-        "El Salvador":"es-SV",
-        "Guatemala":"es-GT",
-        "Honduras":"es-HN",
-        "Mexico":"es-MX",
-        "Nicaragua":"es-NI",
-        "Panama":"es-PA",
-        "Paraguay":"es-PY",
-        "Peru":"es-PE",
-        "Puerto Rico":"es-PR",
-        "Spain":"es-ES",
-        "Uruguay":"es-UY",
-        "United States":"es-US",
-        "Venezuela":"es-VE",
+        "Argentina":{
+            "Google": "es-AR",
+            "Whisper": "es",
+        },
+        "Bolivia":{
+            "Google": "es-BO",
+            "Whisper": "es",
+        },
+        "Chile":{
+            "Google": "es-CL",
+            "Whisper": "es",
+        },
+        "Colombia":{
+            "Google": "es-CO",
+            "Whisper": "es",
+        },
+        "Costa Rica":{
+            "Google": "es-CR",
+            "Whisper": "es",
+        },
+        "Dominican Republic":{
+            "Google": "es-DO",
+            "Whisper": "es",
+        },
+        "Ecuador":{
+            "Google": "es-EC",
+            "Whisper": "es",
+        },
+        "El Salvador":{
+            "Google": "es-SV",
+            "Whisper": "es",
+        },
+        "Guatemala":{
+            "Google": "es-GT",
+            "Whisper": "es",
+        },
+        "Honduras":{
+            "Google": "es-HN",
+            "Whisper": "es",
+        },
+        "Mexico":{
+            "Google": "es-MX",
+            "Whisper": "es",
+        },
+        "Nicaragua":{
+            "Google": "es-NI",
+            "Whisper": "es",
+        },
+        "Panama":{
+            "Google": "es-PA",
+            "Whisper": "es",
+        },
+        "Paraguay":{
+            "Google": "es-PY",
+            "Whisper": "es",
+        },
+        "Peru":{
+            "Google": "es-PE",
+            "Whisper": "es",
+        },
+        "Puerto Rico":{
+            "Google": "es-PR",
+            "Whisper": "es",
+        },
+        "Spain":{
+            "Google": "es-ES",
+            "Whisper": "es",
+        },
+        "Uruguay":{
+            "Google": "es-UY",
+            "Whisper": "es",
+        },
+        "United States":{
+            "Google": "es-US",
+            "Whisper": "es",
+        },
+        "Venezuela":{
+            "Google": "es-VE",
+            "Whisper": "es",
+        },
    },
    "Swedish":{
-        "Sweden":"sv-SE",
+        "Sweden":{
+            "Google": "sv-SE",
+            "Whisper": "sv",
+        },
    },
    "Thai":{
-        "Thailand":"th-TH",
+        "Thailand":{
+            "Google": "th-TH",
+            "Whisper": "th",
+        },
    },
    "Turkish":{
-        "Turkey":"tr-TR",
+        "Turkey":{
+            "Google": "tr-TR",
+            "Whisper": "tr",
+        },
    },
    "Ukrainian":{
-        "Ukraine":"uk-UA",
+        "Ukraine":{
+            "Google": "uk-UA",
+            "Whisper": "uk",
+        },
    },
    "Vietnamese":{
-        "Vietnam":"vi-VN",
-    },
-    "Zulu":{
-        "South Africa":"zu-ZA"
+        "Vietnam":{
+            "Google": "vi-VN",
+            "Whisper": "vi",
+        },
    },
 }
--- a/models/transcription/transcription_transcriber.py
+++ b/models/transcription/transcription_transcriber.py
@@ -5,12 +5,16 @@ from speech_recognition import Recognizer, AudioData, AudioFile
 from datetime import timedelta
 from pyaudiowpatch import get_sample_size, paInt16
 from .transcription_languages import transcription_lang
+from .transcription_whisper import getWhisperModel, checkWhisperWeight
+
+import torch
+import numpy as np

 PHRASE_TIMEOUT = 3
 MAX_PHRASES = 10

 class AudioTranscriber:
-    def __init__(self, speaker, source, phrase_timeout, max_phrases):
+    def __init__(self, speaker, source, phrase_timeout, max_phrases, root=None, whisper_weight_type=None, ):
        self.speaker = speaker
        self.phrase_timeout = phrase_timeout
        self.max_phrases = max_phrases
@@ -26,23 +30,51 @@ class AudioTranscriber:
                "new_phrase": True,
                "process_data_func": self.processSpeakerData if speaker else self.processSpeakerData
        }
+        if whisper_weight_type is not None and root is not None and checkWhisperWeight(root, whisper_weight_type) is True:
+            self.whisper_model = getWhisperModel(root, whisper_weight_type)
+        else:
+            self.whisper_model = None

-    def transcribeAudioQueue(self, audio_queue, language, country):
-        # while True:
+    def transcribeAudioQueue(self, audio_queue, language, country, transcription_engine):
        audio, time_spoken = audio_queue.get()
        self.updateLastSampleAndPhraseStatus(audio, time_spoken)

        text = ''
        try:
-            # fd, path = tempfile.mkstemp(suffix=".wav")
-            # os.close(fd)
+            # Whisperが使用できない場合はGoogle Speech-to-Textを使用する
+            if transcription_engine == "Whisper":
+                if self.whisper_model is None:
+                    transcription_engine = "Google"
+
            audio_data = self.audio_sources["process_data_func"]()
-            text = self.audio_recognizer.recognize_google(audio_data, language=transcription_lang[language][country])
+            match transcription_engine:
+                case "Google":
+                    text = self.audio_recognizer.recognize_google(audio_data, language=transcription_lang[language][country][transcription_engine])
+                case "Whisper":
+                    audio_data = np.frombuffer(audio_data.get_raw_data(convert_rate=16000, convert_width=2), np.int16).flatten().astype(np.float32) / 32768.0
+                    if isinstance(audio_data, torch.Tensor):
+                        audio_data = audio_data.detach().numpy()
+                    segments, _ = self.whisper_model.transcribe(
+                        audio_data,
+                        beam_size=5,
+                        temperature=0.0,
+                        log_prob_threshold=-0.8,
+                        no_speech_threshold=0.6,
+                        language=transcription_lang[language][country][transcription_engine],
+                        word_timestamps=False,
+                        without_timestamps=True,
+                        task="transcribe",
+                        vad_filter=False,
+                        )
+                    for s in segments:
+                        if s.avg_logprob < -0.8 or s.no_speech_prob > 0.6:
+                            continue
+                        text += s.text
+
        except Exception:
            pass
        finally:
            pass
-            # os.unlink(path)

        if text != '':
            self.updateTranscript(text)
--- a/models/transcription/transcription_whisper.py
+++ b/models/transcription/transcription_whisper.py
@@ -0,0 +1,98 @@
+from os import path as os_path, makedirs as os_makedirs
+from requests import get as requests_get
+from typing import Callable
+import huggingface_hub
+from faster_whisper import WhisperModel
+import logging
+logger = logging.getLogger('faster_whisper')
+logger.setLevel(logging.CRITICAL)
+
+_MODELS = {
+    "tiny": "Systran/faster-whisper-tiny",
+    "base": "Systran/faster-whisper-base",
+    "small": "Systran/faster-whisper-small",
+    "medium": "Systran/faster-whisper-medium",
+    "large-v1": "Systran/faster-whisper-large-v1",
+    "large-v2": "Systran/faster-whisper-large-v2",
+    "large-v3": "Systran/faster-whisper-large-v3",
+}
+
+_FILENAMES = [
+    "config.json",
+    "preprocessor_config.json",
+    "model.bin",
+    "tokenizer.json",
+    "vocabulary.txt",
+    "vocabulary.json",
+]
+
+def downloadFile(url, path, func=None):
+    try:
+        res = requests_get(url, stream=True)
+        res.raise_for_status()
+        file_size = int(res.headers.get('content-length', 0))
+        total_chunk = 0
+        with open(os_path.join(path), 'wb') as file:
+            for chunk in res.iter_content(chunk_size=1024*5):
+                file.write(chunk)
+                if isinstance(func, Callable):
+                    total_chunk += len(chunk)
+                    func(total_chunk/file_size)
+
+    except Exception as e:
+            print("error:downloadFile()", e)
+
+def checkWhisperWeight(root, weight_type):
+    path = os_path.join(root, "weights", "whisper", weight_type)
+    result = False
+    try:
+        WhisperModel(
+            path,
+            device="cpu",
+            device_index=0,
+            compute_type="int8",
+            cpu_threads=4,
+            num_workers=1,
+            local_files_only=True,
+        )
+        result = True
+    except Exception:
+        pass
+    return result
+
+def downloadWhisperWeight(root, weight_type, callbackFunc):
+    path = os_path.join(root, "weights", "whisper", weight_type)
+    os_makedirs(path, exist_ok=True)
+    if checkWhisperWeight(root, weight_type) is True:
+        return
+
+    for filename in _FILENAMES:
+        print("Downloading", filename, "...")
+        file_path = os_path.join(path, filename)
+        url = huggingface_hub.hf_hub_url(_MODELS[weight_type], filename)
+        downloadFile(url, file_path, func=callbackFunc)
+
+def getWhisperModel(root, weight_type):
+    path = os_path.join(root, "weights", "whisper", weight_type)
+    return WhisperModel(
+        path,
+        device="cpu",
+        device_index=0,
+        compute_type="int8",
+        cpu_threads=4,
+        num_workers=1,
+        local_files_only=True,
+    )
+
+if __name__ == "__main__":
+    def callback(value):
+        print(value)
+        pass
+
+    downloadWhisperWeight("./", "tiny", callback)
+    downloadWhisperWeight("./", "base", callback)
+    downloadWhisperWeight("./", "small", callback)
+    downloadWhisperWeight("./", "medium", callback)
+    downloadWhisperWeight("./", "large-v1", callback)
+    downloadWhisperWeight("./", "large-v2", callback)
+    downloadWhisperWeight("./", "large-v3", callback)
--- a/models/translation/translation_translator.py
+++ b/models/translation/translation_translator.py
@@ -2,7 +2,7 @@ import os
 from deepl import Translator as deepl_Translator
 from translators import translate_text as other_web_Translator
 from .translation_languages import translation_lang
-from .utils import ctranslate2_weights
+from .translation_utils import ctranslate2_weights

 import ctranslate2
 import transformers
@@ -27,8 +27,8 @@ class Translator():
    def changeCTranslate2Model(self, path, model_type):
        directory_name = ctranslate2_weights[model_type]["directory_name"]
        tokenizer = ctranslate2_weights[model_type]["tokenizer"]
-        weight_path = os.path.join(path, "weight", directory_name)
-        tokenizer_path = os.path.join(path, "weight", directory_name, "tokenizer")
+        weight_path = os.path.join(path, "weights", "ctranslate2", directory_name)
+        tokenizer_path = os.path.join(path, "weights", "ctranslate2", directory_name, "tokenizer")
        self.ctranslate2_translator = ctranslate2.Translator(
            weight_path,
            device="cpu",
@@ -41,7 +41,7 @@ class Translator():
            self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)
        except Exception as e:
            print("Error: changeCTranslate2Model()", e)
-            tokenizer_path = os.path.join("./weight", directory_name, "tokenizer")
+            tokenizer_path = os.path.join("./weights", "ctranslate2", directory_name, "tokenizer")
            self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)

    @staticmethod
--- a/models/translation/translation_utils.py
+++ b/models/translation/translation_utils.py
@@ -39,36 +39,36 @@ def calculate_file_hash(file_path, block_size=65536):
    return hash_object.hexdigest()

 def checkCTranslate2Weight(path, weight_type="Small"):
-    directory_name = 'weight'
-    current_directory = path
    weight_directory_name = ctranslate2_weights[weight_type]["directory_name"]
    hash_data = ctranslate2_weights[weight_type]["hash"]
-    files = ["model.bin", "sentencepiece.model", "shared_vocabulary.txt"]
+    files = [
+        "model.bin",
+        "sentencepiece.model",
+        "shared_vocabulary.txt"
+    ]

    # check already downloaded
    already_downloaded = False
-    if all(os_path.exists(os_path.join(current_directory, directory_name, weight_directory_name, file)) for file in files):
+    if all(os_path.exists(os_path.join(path, weight_directory_name, file)) for file in files):
        # check hash
        for file in files:
            original_hash = hash_data[file]
-            current_hash = calculate_file_hash(os_path.join(current_directory, directory_name, weight_directory_name, file))
+            current_hash = calculate_file_hash(os_path.join(path, weight_directory_name, file))
            if original_hash != current_hash:
                break
        already_downloaded = True
    return already_downloaded

-def downloadCTranslate2Weight(path, weight_type="Small", func=None):
+def downloadCTranslate2Weight(root, weight_type="Small", func=None):
    url = ctranslate2_weights[weight_type]["url"]
-    filename = 'weight.zip'
-    directory_name = 'weight'
-    current_directory = path
+    filename = "weight.zip"
+    path = os_path.join(root, "weights", "ctranslate2")
+    os_makedirs(path, exist_ok=True)

    if checkCTranslate2Weight(path, weight_type):
        return

    try:
-        os_makedirs(os_path.join(current_directory, directory_name), exist_ok=True)
-        print(os_path.join(current_directory, directory_name))
        with tempfile.TemporaryDirectory() as tmp_path:
            res = requests_get(url, stream=True)
            file_size = int(res.headers.get('content-length', 0))
@@ -81,6 +81,6 @@ def downloadCTranslate2Weight(path, weight_type="Small", func=None):
                        func(total_chunk/file_size)

            with ZipFile(os_path.join(tmp_path, filename)) as zf:
-                zf.extractall(os_path.join(current_directory, directory_name))
+                zf.extractall(path)
    except Exception as e:
            print("error:downloadCTranslate2Weight()", e)
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,9 @@ pyyaml == 6.0.1
 python-i18n == 0.3.9
 CTkToolTip == 0.8
 pyinstaller==6.2.0
-transformers[torch]
+transformers[torch]==4.37.2
 sentencepiece==0.1.99
-ctranslate2==3.21.0
+ctranslate2==3.24.0
+faster-whisper==0.10.0
+translators @ git+https://github.com/misyaguziya/translators@master
+SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@master
--- a/view.py
+++ b/view.py
@@ -29,6 +29,7 @@ class View():
            font_family=config.FONT_FAMILY,
            ui_language=config.UI_LANGUAGE,
            is_reset_button_displayed_for_translation=config.IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION,
+            is_reset_button_displayed_for_whisper=config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER,
        )

        if config.ENABLE_SPEAKER2CHATBOX is False:
@@ -213,6 +214,7 @@ class View():
            VAR_SIDE_MENU_LABEL_TRANSCRIPTION=StringVar(value=i18n.t("config_window.side_menu_labels.transcription")),
            VAR_SECOND_TITLE_TRANSCRIPTION_MIC=StringVar(value=i18n.t("config_window.side_menu_labels.transcription_mic")),
            VAR_SECOND_TITLE_TRANSCRIPTION_SPEAKER=StringVar(value=i18n.t("config_window.side_menu_labels.transcription_speaker")),
+            VAR_SECOND_TITLE_TRANSCRIPTION_INTERNAL_MODEL=StringVar(value=i18n.t("config_window.side_menu_labels.transcription_internal_model")),
            VAR_SIDE_MENU_LABEL_OTHERS=StringVar(value=i18n.t("config_window.side_menu_labels.others")),
            VAR_SIDE_MENU_LABEL_ADVANCED_SETTINGS=StringVar(value=i18n.t("config_window.side_menu_labels.advanced_settings")),

@@ -282,7 +284,7 @@ class View():
            VAR_DESC_CTRANSLATE2_WEIGHT_TYPE=StringVar(value=i18n.t("config_window.ctranslate2_weight_type.desc")),
            DICT_CTRANSLATE2_WEIGHT_TYPE=self.getSelectableCtranslate2WeightTypeDict(),
            CALLBACK_SET_CTRANSLATE2_WEIGHT_TYPE=None,
-            VAR_CTRANSLATE2_WEIGHT_TYPE=StringVar(value=self.getSelectableCtranslate2WeightTypeDict()[config.WEIGHT_TYPE]),
+            VAR_CTRANSLATE2_WEIGHT_TYPE=StringVar(value=self.getSelectableCtranslate2WeightTypeDict()[config.CTRANSLATE2_WEIGHT_TYPE]),

            VAR_LABEL_DEEPL_AUTH_KEY=StringVar(value=i18n.t( "config_window.deepl_auth_key.label")),
            VAR_DESC_DEEPL_AUTH_KEY=StringVar(
@@ -383,6 +385,19 @@ class View():
            CALLBACK_FOCUS_OUT_SPEAKER_MAX_PHRASES=self.callbackBindFocusOut_SpeakerMaxPhrases,


+            # Transcription Tab (Whisper Internal AI Model)
+            VAR_LABEL_USE_WHISPER_FEATURE=StringVar(value=i18n.t("config_window.use_whisper_feature.label")),
+            VAR_DESC_USE_WHISPER_FEATURE=StringVar(value=i18n.t("config_window.use_whisper_feature.desc")),
+            CALLBACK_SET_USE_WHISPER_FEATURE=None,
+            VAR_USE_WHISPER_FEATURE=BooleanVar(value=config.USE_WHISPER_FEATURE),
+
+            VAR_LABEL_WHISPER_WEIGHT_TYPE=StringVar(value=i18n.t("config_window.whisper_weight_type.label")),
+            VAR_DESC_WHISPER_WEIGHT_TYPE=StringVar(value=i18n.t("config_window.whisper_weight_type.desc")),
+            DICT_WHISPER_WEIGHT_TYPE=self.getSelectableWhisperWeightTypeDict(),
+            CALLBACK_SET_WHISPER_WEIGHT_TYPE=None,
+            VAR_WHISPER_WEIGHT_TYPE=StringVar(value=self.getSelectableWhisperWeightTypeDict()[config.WHISPER_WEIGHT_TYPE]),
+
+
            # Others Tab
            VAR_LABEL_ENABLE_AUTO_CLEAR_MESSAGE_BOX=StringVar(value=i18n.t("config_window.auto_clear_the_message_box.label")),
            VAR_DESC_ENABLE_AUTO_CLEAR_MESSAGE_BOX=None,
@@ -630,6 +645,11 @@ class View():
            self.view_variable.CALLBACK_SET_SPEAKER_PHRASE_TIMEOUT = config_window_registers.get("callback_set_speaker_phrase_timeout", None)
            self.view_variable.CALLBACK_SET_SPEAKER_MAX_PHRASES = config_window_registers.get("callback_set_speaker_max_phrases", None)

+            # Transcription Tab (Internal AI Model)
+            self.view_variable.CALLBACK_SET_USE_WHISPER_FEATURE = config_window_registers.get("callback_set_use_whisper_feature", None)
+            self.view_variable.CALLBACK_SET_WHISPER_WEIGHT_TYPE = config_window_registers.get("callback_set_whisper_weight_type", None)
+
+
            # Others Tab
            self.view_variable.CALLBACK_SET_ENABLE_AUTO_CLEAR_MESSAGE_BOX = config_window_registers.get("callback_set_enable_auto_clear_chatbox", None)
            self.view_variable.CALLBACK_SET_ENABLE_SEND_ONLY_TRANSLATED_MESSAGES = config_window_registers.get("callback_set_send_only_translated_messages", None)
@@ -684,6 +704,11 @@ class View():
            )
            self.replaceMicThresholdCheckButton_Disabled()

+        if config.USE_WHISPER_FEATURE is True:
+            self.openWhisperWeightTypeWidget()
+        else:
+            self.closeWhisperWeightTypeWidget()
+
        if config.ENABLE_SPEAKER2CHATBOX is False:
            vrct_gui._changeConfigWindowWidgetsStatus(
                status="disabled",
@@ -925,6 +950,17 @@ class View():
        vrct_gui.update()
        vrct_gui.config_window.lift()

+    @staticmethod
+    def getSelectableWhisperWeightTypeDict():
+        return {
+            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["tiny"]: i18n.t("config_window.whisper_weight_type.tiny", capacity="74.5MB"),
+            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["base"]: i18n.t("config_window.whisper_weight_type.base", capacity="141MB"),
+            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["small"]: i18n.t("config_window.whisper_weight_type.small", capacity="463MB"),
+            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["medium"]: i18n.t("config_window.whisper_weight_type.medium", capacity="1.42GB"),
+            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["large-v1"]: i18n.t("config_window.whisper_weight_type.large_v1", capacity="2.87GB"),
+            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["large-v2"]: i18n.t("config_window.whisper_weight_type.large_v2", capacity="2.87GB"),
+            config.SELECTABLE_WHISPER_WEIGHT_TYPE_DICT["large-v3"]: i18n.t("config_window.whisper_weight_type.large_v3", capacity="2.87GB"),
+        }

 # Open Webpage Functions
    def openWebPage_Booth(self):
@@ -1020,7 +1056,8 @@ class View():
            self.restart_required_configs_pre_data.ui_scaling == config.UI_SCALING and
            self.restart_required_configs_pre_data.font_family == config.FONT_FAMILY and
            self.restart_required_configs_pre_data.ui_language == config.UI_LANGUAGE and
-            self.restart_required_configs_pre_data.is_reset_button_displayed_for_translation == config.IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION
+            self.restart_required_configs_pre_data.is_reset_button_displayed_for_translation == config.IS_RESET_BUTTON_DISPLAYED_FOR_TRANSLATION and
+            self.restart_required_configs_pre_data.is_reset_button_displayed_for_whisper == config.IS_RESET_BUTTON_DISPLAYED_FOR_WHISPER
        )

        if locale is None:
@@ -1075,7 +1112,7 @@ class View():
        self.view_variable.VAR_CTRANSLATE2_WEIGHT_TYPE.set(self.getSelectableCtranslate2WeightTypeDict()[selected_weight_type])

    def setLatestCTranslate2WeightType(self):
-        selected_weight_type = self.getSelectableCtranslate2WeightTypeDict()[config.WEIGHT_TYPE]
+        selected_weight_type = self.getSelectableCtranslate2WeightTypeDict()[config.CTRANSLATE2_WEIGHT_TYPE]
        self.view_variable.VAR_CTRANSLATE2_WEIGHT_TYPE.set(selected_weight_type)


@@ -1088,6 +1125,23 @@ class View():
        vrct_gui.config_window.sb__ctranslate2_weight_type.grid_remove()


+    def openWhisperWeightTypeWidget(self):
+        vrct_gui.config_window.sb__use_whisper_feature.grid()
+        vrct_gui.config_window.sb__whisper_weight_type.grid()
+
+    def closeWhisperWeightTypeWidget(self):
+        vrct_gui.config_window.sb__use_whisper_feature.grid()
+        vrct_gui.config_window.sb__whisper_weight_type.grid_remove()
+
+
+    def updateSelectedWhisperWeightType(self, selected_weight_type:str):
+        self.view_variable.VAR_WHISPER_WEIGHT_TYPE.set(self.getSelectableWhisperWeightTypeDict()[selected_weight_type])
+
+    def setLatestCTranslate2WeightType(self):
+        selected_weight_type = self.getSelectableWhisperWeightTypeDict()[config.WHISPER_WEIGHT_TYPE]
+        self.view_variable.VAR_WHISPER_WEIGHT_TYPE.set(selected_weight_type)
+
+
    def openMicEnergyThresholdWidget(self):
        self.view_variable.VAR_LABEL_MIC_DYNAMIC_ENERGY_THRESHOLD.set(i18n.t("config_window.mic_dynamic_energy_threshold.label_for_manual"))
        self.view_variable.VAR_DESC_MIC_DYNAMIC_ENERGY_THRESHOLD.set(i18n.t("config_window.mic_dynamic_energy_threshold.desc_for_manual"))
--- a/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/createSideMenuAndSettingsBoxContainers.py
+++ b/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/createSideMenuAndSettingsBoxContainers.py
@@ -7,7 +7,7 @@ from ._createSettingBoxContainer import _createSettingBoxContainer


 from .setting_box_containers.setting_box_appearance import createSettingBox_Appearance
-from .setting_box_containers.setting_box_transcription import createSettingBox_Mic, createSettingBox_Speaker
+from .setting_box_containers.setting_box_transcription import createSettingBox_Mic, createSettingBox_Speaker, createSettingBox_InternalModel
 from .setting_box_containers.setting_box_others import createSettingBox_Others, createSettingBox_Others_SendMessageFormats, createSettingBox_Others_ReceivedMessageFormats, createSettingBox_Others_Additional
 from .setting_box_containers.setting_box_advanced_settings import createSettingBox_AdvancedSettings
 from .setting_box_containers.setting_box_translation import createSettingBox_Translation
@@ -94,6 +94,10 @@ def createSideMenuAndSettingsBoxContainers(config_window, settings, view_variabl
                        "var_section_title": view_variable.VAR_SECOND_TITLE_TRANSCRIPTION_SPEAKER,
                        "setting_box": createSettingBox_Speaker
                    },
+                    {
+                        "var_section_title": view_variable.VAR_SECOND_TITLE_TRANSCRIPTION_INTERNAL_MODEL,
+                        "setting_box": createSettingBox_InternalModel
+                    },
                ]
            },
        },
--- a/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/init.py
+++ b/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/init.py
@@ -1,2 +1,3 @@
 from .createSettingBox_Mic import createSettingBox_Mic
-from .createSettingBox_Speaker import createSettingBox_Speaker
+from .createSettingBox_Speaker import createSettingBox_Speaker
+from .createSettingBox_InternalModel import createSettingBox_InternalModel
--- a/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/createSettingBox_InternalModel.py
+++ b/vrct_gui/config_window/widgets/createSideMenuAndSettingsBoxContainers/setting_box_containers/setting_box_transcription/createSettingBox_InternalModel.py
@@ -0,0 +1,37 @@
+from utils import callFunctionIfCallable
+
+from .._SettingBoxGenerator import _SettingBoxGenerator
+
+def createSettingBox_InternalModel(setting_box_wrapper, config_window, settings, view_variable):
+    sbg = _SettingBoxGenerator(setting_box_wrapper, config_window, settings, view_variable)
+    createSettingBoxSwitch = sbg.createSettingBoxSwitch
+    createSettingBoxDropdownMenu = sbg.createSettingBoxDropdownMenu
+
+    def switchUseWhisperFeatureCallback(switch_widget):
+        callFunctionIfCallable(view_variable.CALLBACK_SET_USE_WHISPER_FEATURE, switch_widget.get())
+
+    def optionmenuWhisperWeightTypeCallback(value):
+        callFunctionIfCallable(view_variable.CALLBACK_SET_WHISPER_WEIGHT_TYPE, value)
+
+
+    row=0
+    config_window.sb__use_whisper_feature = createSettingBoxSwitch(
+        for_var_label_text=view_variable.VAR_LABEL_USE_WHISPER_FEATURE,
+        for_var_desc_text=view_variable.VAR_DESC_USE_WHISPER_FEATURE,
+        switch_attr_name="sb__switch_use_whisper_feature",
+        command=lambda: switchUseWhisperFeatureCallback(config_window.sb__switch_use_whisper_feature),
+        variable=view_variable.VAR_USE_WHISPER_FEATURE
+    )
+    config_window.sb__use_whisper_feature.grid(row=row, pady=0)
+    row+=1
+
+    config_window.sb__whisper_weight_type = createSettingBoxDropdownMenu(
+        for_var_label_text=view_variable.VAR_LABEL_WHISPER_WEIGHT_TYPE,
+        for_var_desc_text=view_variable.VAR_DESC_WHISPER_WEIGHT_TYPE,
+        optionmenu_attr_name="sb__optionmenu_whisper_weight_type",
+        dropdown_menu_values=view_variable.DICT_WHISPER_WEIGHT_TYPE,
+        command=lambda value: optionmenuWhisperWeightTypeCallback(value),
+        variable=view_variable.VAR_WHISPER_WEIGHT_TYPE,
+    )
+    config_window.sb__whisper_weight_type.grid(row=row, pady=0)
+    row+=1