From b8b04f1be4a99affbe5dee9171139ad5023b9dcd Mon Sep 17 00:00:00 2001 From: misyaguziya Date: Sat, 1 Jul 2023 23:16:56 +0900 Subject: [PATCH] add Hyperparameter phrase_timeout, max_phrases --- VRCT.py | 38 +++++++++++++++++++++++++++++++------- audio_transcriber.py | 8 +++++--- window_config.py | 25 +++++++++++++++---------- 3 files changed, 51 insertions(+), 20 deletions(-) diff --git a/VRCT.py b/VRCT.py index 9e85d309..b9c990ba 100644 --- a/VRCT.py +++ b/VRCT.py @@ -44,16 +44,19 @@ class App(customtkinter.CTk): ## Transcription Send self.CHOICE_MIC_DEVICE = audio_utils.get_default_input_device()["name"] self.INPUT_MIC_VOICE_LANGUAGE = list(languages.transcription_lang.keys())[0] - self.INPUT_MIC_ENERGY_THRESHOLD = 1000 - self.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD = False + self.INPUT_MIC_ENERGY_THRESHOLD = 300 + self.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD = True self.INPUT_MIC_RECORD_TIMEOUT = 3 + self.INPUT_MIC_PHRASE_TIMEOUT = 3 + self.INPUT_MIC_MAX_PHRASES = 10 ## Transcription Receive self.CHOICE_SPEAKER_DEVICE = audio_utils.get_default_output_device()["name"] self.INPUT_SPEAKER_VOICE_LANGUAGE = list(languages.transcription_lang.keys())[1] - self.INPUT_SPEAKER_ENERGY_THRESHOLD = 1000 - self.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD = False + self.INPUT_SPEAKER_ENERGY_THRESHOLD = 300 + self.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD = True self.INPUT_SPEAKER_RECORD_TIMEOUT = 3 - + self.INPUT_SPEAKER_PHRASE_TIMEOUT = 3 + self.INPUT_SPEAKER_MAX_PHRASES = 10 ## Parameter self.OSC_IP_ADDRESS = "127.0.0.1" self.OSC_PORT = 9000 @@ -131,6 +134,13 @@ class App(customtkinter.CTk): if "INPUT_MIC_RECORD_TIMEOUT" in config.keys(): if type(config["INPUT_MIC_RECORD_TIMEOUT"]) is int: self.INPUT_MIC_RECORD_TIMEOUT = config["INPUT_MIC_RECORD_TIMEOUT"] + if "INPUT_MIC_PHRASE_TIMEOUT" in config.keys(): + if type(config["INPUT_MIC_PHRASE_TIMEOUT"]) is int: + self.INPUT_MIC_PHRASE_TIMEOUT = config["INPUT_MIC_PHRASE_TIMEOUT"] + if "INPUT_MIC_MAX_PHRASES" in config.keys(): + if type(config["INPUT_MIC_MAX_PHRASES"]) is int: + self.INPUT_MIC_MAX_PHRASES = config["INPUT_MIC_MAX_PHRASES"] + if "CHOICE_SPEAKER_DEVICE" in config.keys(): if config["CHOICE_SPEAKER_DEVICE"] in [device["name"] for device in audio_utils.get_output_device_list()]: self.CHOICE_SPEAKER_DEVICE = config["CHOICE_SPEAKER_DEVICE"] @@ -146,6 +156,12 @@ class App(customtkinter.CTk): if "INPUT_SPEAKER_RECORD_TIMEOUT" in config.keys(): if type(config["INPUT_SPEAKER_RECORD_TIMEOUT"]) is int: self.INPUT_SPEAKER_RECORD_TIMEOUT = config["INPUT_SPEAKER_RECORD_TIMEOUT"] + if "INPUT_SPEAKER_PHRASE_TIMEOUT" in config.keys(): + if type(config["INPUT_SPEAKER_PHRASE_TIMEOUT"]) is int: + self.INPUT_SPEAKER_PHRASE_TIMEOUT = config["INPUT_SPEAKER_PHRASE_TIMEOUT"] + if "INPUT_SPEAKER_MAX_PHRASES" in config.keys(): + if type(config["INPUT_SPEAKER_MAX_PHRASES"]) is int: + self.INPUT_MIC_MAX_PHRASES = config["INPUT_SPEAKER_MAX_PHRASES"] # Parameter if "OSC_IP_ADDRESS" in config.keys(): @@ -184,11 +200,15 @@ class App(customtkinter.CTk): "INPUT_MIC_ENERGY_THRESHOLD": self.INPUT_MIC_ENERGY_THRESHOLD, "INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD": self.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD, "INPUT_MIC_RECORD_TIMEOUT": self.INPUT_MIC_RECORD_TIMEOUT, + "INPUT_MIC_PHRASE_TIMEOUT": self.INPUT_MIC_PHRASE_TIMEOUT, + "INPUT_MIC_MAX_PHRASES": self.INPUT_MIC_MAX_PHRASES, "CHOICE_SPEAKER_DEVICE": self.CHOICE_SPEAKER_DEVICE, "INPUT_SPEAKER_VOICE_LANGUAGE": self.INPUT_SPEAKER_VOICE_LANGUAGE, "INPUT_SPEAKER_ENERGY_THRESHOLD": self.INPUT_SPEAKER_ENERGY_THRESHOLD, "INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD": self.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD, "INPUT_SPEAKER_RECORD_TIMEOUT": self.INPUT_SPEAKER_RECORD_TIMEOUT, + "INPUT_SPEAKER_PHRASE_TIMEOUT": self.INPUT_SPEAKER_PHRASE_TIMEOUT, + "INPUT_SPEAKER_MAX_PHRASES": self.INPUT_SPEAKER_MAX_PHRASES, "OSC_IP_ADDRESS": self.OSC_IP_ADDRESS, "OSC_PORT": self.OSC_PORT, "AUTH_KEYS": self.AUTH_KEYS, @@ -423,7 +443,9 @@ class App(customtkinter.CTk): self.mic_transcriber = audio_transcriber.AudioTranscriber( speaker=False, source=self.mic_audio_recorder.source, - language=languages.transcription_lang[self.INPUT_MIC_VOICE_LANGUAGE] + language=languages.transcription_lang[self.INPUT_MIC_VOICE_LANGUAGE], + phrase_timeout=self.INPUT_MIC_PHRASE_TIMEOUT, + max_phrases=self.INPUT_MIC_MAX_PHRASES, ) self.mic_transcribe = utils.thread_fnc(self.mic_transcriber.transcribe_audio_queue, args=(self.mic_audio_queue,)) self.mic_transcribe.daemon = True @@ -461,7 +483,9 @@ class App(customtkinter.CTk): self.spk_transcriber = audio_transcriber.AudioTranscriber( speaker=True, source=self.spk_audio_recorder.source, - language=languages.transcription_lang[self.INPUT_SPEAKER_VOICE_LANGUAGE] + language=languages.transcription_lang[self.INPUT_SPEAKER_VOICE_LANGUAGE], + phrase_timeout=self.INPUT_SPEAKER_PHRASE_TIMEOUT, + max_phrases=self.INPUT_SPEAKER_MAX_PHRASES, ) self.spk_transcribe = utils.thread_fnc(self.spk_transcriber.transcribe_audio_queue, args=(self.spk_audio_queue,)) self.spk_transcribe.daemon = True diff --git a/audio_transcriber.py b/audio_transcriber.py index 299617af..695ee55f 100644 --- a/audio_transcriber.py +++ b/audio_transcriber.py @@ -9,9 +9,11 @@ PHRASE_TIMEOUT = 3 MAX_PHRASES = 10 class AudioTranscriber: - def __init__(self, speaker, source, language): + def __init__(self, speaker, source, language, phrase_timeout, max_phrases): self.speaker = speaker self.language = language + self.phrase_timeout = phrase_timeout + self.max_phrases = max_phrases self.transcript_data = [] self.transcript_changed_event = threading.Event() self.audio_recognizer = sr.Recognizer() @@ -47,7 +49,7 @@ class AudioTranscriber: def update_last_sample_and_phrase_status(self, data, time_spoken): source_info = self.audio_sources - if source_info["last_spoken"] and time_spoken - source_info["last_spoken"] > timedelta(seconds=PHRASE_TIMEOUT): + if source_info["last_spoken"] and time_spoken - source_info["last_spoken"] > timedelta(seconds=self.phrase_timeout): source_info["last_sample"] = bytes() source_info["new_phrase"] = True else: @@ -78,7 +80,7 @@ class AudioTranscriber: transcript = self.transcript_data if source_info["new_phrase"] or len(transcript) == 0: - if len(transcript) > MAX_PHRASES: + if len(transcript) > self.max_phrases: transcript.pop(-1) transcript.insert(0, text) else: diff --git a/window_config.py b/window_config.py index 9b7cf946..95aa0c4c 100644 --- a/window_config.py +++ b/window_config.py @@ -106,13 +106,16 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): # tab Translation ## optionmenu translation translator + row = 0 + padx = 5 + pady = 1 self.label_translation_translator = customtkinter.CTkLabel( self.tabview_config.tab("Translation"), text="Select Translator:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.label_translation_translator.grid(row=0, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_translation_translator.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") self.optionmenu_translation_translator = customtkinter.CTkOptionMenu( self.tabview_config.tab("Translation"), values=list(self.parent.translator.translator_status.keys()), @@ -120,17 +123,18 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): variable=customtkinter.StringVar(value=self.parent.CHOICE_TRANSLATOR), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_translation_translator.grid(row=0, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew") + self.optionmenu_translation_translator.grid(row=row, column=1, columnspan=3 ,padx=padx, pady=pady, sticky="nsew") self.optionmenu_translation_translator._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) ## optionmenu translation input language + row +=1 self.label_translation_input_language = customtkinter.CTkLabel( self.tabview_config.tab("Translation"), text="Send Language:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_translation_input_language.grid(row=1, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_translation_input_language.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") ## select translation input source language self.optionmenu_translation_input_source_language = customtkinter.CTkOptionMenu( @@ -140,7 +144,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): variable=customtkinter.StringVar(value=self.parent.INPUT_SOURCE_LANG), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_translation_input_source_language.grid(row=1, column=1, columnspan=1, padx=5, pady=5, sticky="nsew") + self.optionmenu_translation_input_source_language.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.optionmenu_translation_input_source_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) ## label translation input arrow @@ -150,7 +154,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_translation_input_arrow.grid(row=1, column=2, columnspan=1, padx=5, pady=5, sticky="nsew") + self.label_translation_input_arrow.grid(row=row, column=2, columnspan=1, padx=padx, pady=pady, sticky="nsew") ## select translation input target language self.optionmenu_translation_input_target_language = customtkinter.CTkOptionMenu( @@ -160,17 +164,18 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): variable=customtkinter.StringVar(value=self.parent.INPUT_TARGET_LANG), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_translation_input_target_language.grid(row=1, column=3, columnspan=1, padx=5, pady=5, sticky="nsew") + self.optionmenu_translation_input_target_language.grid(row=row, column=3, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.optionmenu_translation_input_target_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) ## optionmenu translation output language + row +=1 self.label_translation_output_language = customtkinter.CTkLabel( self.tabview_config.tab("Translation"), text="Receive Language:", fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_translation_output_language.grid(row=2, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") + self.label_translation_output_language.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw") ## select translation output source language self.optionmenu_translation_output_source_language = customtkinter.CTkOptionMenu( @@ -180,7 +185,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): variable=customtkinter.StringVar(value=self.parent.OUTPUT_SOURCE_LANG), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_translation_output_source_language.grid(row=2, column=1, columnspan=1, padx=5, pady=5, sticky="nsew") + self.optionmenu_translation_output_source_language.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.optionmenu_translation_output_source_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) ## label translation output arrow @@ -190,7 +195,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): fg_color="transparent", font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) ) - self.label_translation_output_arrow.grid(row=2, column=2, columnspan=1, padx=5, pady=5, sticky="nsew") + self.label_translation_output_arrow.grid(row=row, column=2, columnspan=1, padx=padx, pady=pady, sticky="nsew") ## select translation output target language self.optionmenu_translation_output_target_language = customtkinter.CTkOptionMenu( @@ -200,7 +205,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): variable=customtkinter.StringVar(value=self.parent.OUTPUT_TARGET_LANG), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), ) - self.optionmenu_translation_output_target_language.grid(row=2, column=3, columnspan=1, padx=5, pady=5, sticky="nsew") + self.optionmenu_translation_output_target_language.grid(row=row, column=3, columnspan=1, padx=padx, pady=pady, sticky="nsew") self.optionmenu_translation_output_target_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)) # tab Transcription