add Hyperparameter

phrase_timeout, max_phrases
This commit is contained in:
misyaguziya
2023-07-01 23:16:56 +09:00
parent b4c8d79734
commit b8b04f1be4
3 changed files with 51 additions and 20 deletions

38
VRCT.py
View File

@@ -44,16 +44,19 @@ class App(customtkinter.CTk):
## Transcription Send
self.CHOICE_MIC_DEVICE = audio_utils.get_default_input_device()["name"]
self.INPUT_MIC_VOICE_LANGUAGE = list(languages.transcription_lang.keys())[0]
self.INPUT_MIC_ENERGY_THRESHOLD = 1000
self.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD = False
self.INPUT_MIC_ENERGY_THRESHOLD = 300
self.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD = True
self.INPUT_MIC_RECORD_TIMEOUT = 3
self.INPUT_MIC_PHRASE_TIMEOUT = 3
self.INPUT_MIC_MAX_PHRASES = 10
## Transcription Receive
self.CHOICE_SPEAKER_DEVICE = audio_utils.get_default_output_device()["name"]
self.INPUT_SPEAKER_VOICE_LANGUAGE = list(languages.transcription_lang.keys())[1]
self.INPUT_SPEAKER_ENERGY_THRESHOLD = 1000
self.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD = False
self.INPUT_SPEAKER_ENERGY_THRESHOLD = 300
self.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD = True
self.INPUT_SPEAKER_RECORD_TIMEOUT = 3
self.INPUT_SPEAKER_PHRASE_TIMEOUT = 3
self.INPUT_SPEAKER_MAX_PHRASES = 10
## Parameter
self.OSC_IP_ADDRESS = "127.0.0.1"
self.OSC_PORT = 9000
@@ -131,6 +134,13 @@ class App(customtkinter.CTk):
if "INPUT_MIC_RECORD_TIMEOUT" in config.keys():
if type(config["INPUT_MIC_RECORD_TIMEOUT"]) is int:
self.INPUT_MIC_RECORD_TIMEOUT = config["INPUT_MIC_RECORD_TIMEOUT"]
if "INPUT_MIC_PHRASE_TIMEOUT" in config.keys():
if type(config["INPUT_MIC_PHRASE_TIMEOUT"]) is int:
self.INPUT_MIC_PHRASE_TIMEOUT = config["INPUT_MIC_PHRASE_TIMEOUT"]
if "INPUT_MIC_MAX_PHRASES" in config.keys():
if type(config["INPUT_MIC_MAX_PHRASES"]) is int:
self.INPUT_MIC_MAX_PHRASES = config["INPUT_MIC_MAX_PHRASES"]
if "CHOICE_SPEAKER_DEVICE" in config.keys():
if config["CHOICE_SPEAKER_DEVICE"] in [device["name"] for device in audio_utils.get_output_device_list()]:
self.CHOICE_SPEAKER_DEVICE = config["CHOICE_SPEAKER_DEVICE"]
@@ -146,6 +156,12 @@ class App(customtkinter.CTk):
if "INPUT_SPEAKER_RECORD_TIMEOUT" in config.keys():
if type(config["INPUT_SPEAKER_RECORD_TIMEOUT"]) is int:
self.INPUT_SPEAKER_RECORD_TIMEOUT = config["INPUT_SPEAKER_RECORD_TIMEOUT"]
if "INPUT_SPEAKER_PHRASE_TIMEOUT" in config.keys():
if type(config["INPUT_SPEAKER_PHRASE_TIMEOUT"]) is int:
self.INPUT_SPEAKER_PHRASE_TIMEOUT = config["INPUT_SPEAKER_PHRASE_TIMEOUT"]
if "INPUT_SPEAKER_MAX_PHRASES" in config.keys():
if type(config["INPUT_SPEAKER_MAX_PHRASES"]) is int:
self.INPUT_MIC_MAX_PHRASES = config["INPUT_SPEAKER_MAX_PHRASES"]
# Parameter
if "OSC_IP_ADDRESS" in config.keys():
@@ -184,11 +200,15 @@ class App(customtkinter.CTk):
"INPUT_MIC_ENERGY_THRESHOLD": self.INPUT_MIC_ENERGY_THRESHOLD,
"INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD": self.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD,
"INPUT_MIC_RECORD_TIMEOUT": self.INPUT_MIC_RECORD_TIMEOUT,
"INPUT_MIC_PHRASE_TIMEOUT": self.INPUT_MIC_PHRASE_TIMEOUT,
"INPUT_MIC_MAX_PHRASES": self.INPUT_MIC_MAX_PHRASES,
"CHOICE_SPEAKER_DEVICE": self.CHOICE_SPEAKER_DEVICE,
"INPUT_SPEAKER_VOICE_LANGUAGE": self.INPUT_SPEAKER_VOICE_LANGUAGE,
"INPUT_SPEAKER_ENERGY_THRESHOLD": self.INPUT_SPEAKER_ENERGY_THRESHOLD,
"INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD": self.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD,
"INPUT_SPEAKER_RECORD_TIMEOUT": self.INPUT_SPEAKER_RECORD_TIMEOUT,
"INPUT_SPEAKER_PHRASE_TIMEOUT": self.INPUT_SPEAKER_PHRASE_TIMEOUT,
"INPUT_SPEAKER_MAX_PHRASES": self.INPUT_SPEAKER_MAX_PHRASES,
"OSC_IP_ADDRESS": self.OSC_IP_ADDRESS,
"OSC_PORT": self.OSC_PORT,
"AUTH_KEYS": self.AUTH_KEYS,
@@ -423,7 +443,9 @@ class App(customtkinter.CTk):
self.mic_transcriber = audio_transcriber.AudioTranscriber(
speaker=False,
source=self.mic_audio_recorder.source,
language=languages.transcription_lang[self.INPUT_MIC_VOICE_LANGUAGE]
language=languages.transcription_lang[self.INPUT_MIC_VOICE_LANGUAGE],
phrase_timeout=self.INPUT_MIC_PHRASE_TIMEOUT,
max_phrases=self.INPUT_MIC_MAX_PHRASES,
)
self.mic_transcribe = utils.thread_fnc(self.mic_transcriber.transcribe_audio_queue, args=(self.mic_audio_queue,))
self.mic_transcribe.daemon = True
@@ -461,7 +483,9 @@ class App(customtkinter.CTk):
self.spk_transcriber = audio_transcriber.AudioTranscriber(
speaker=True,
source=self.spk_audio_recorder.source,
language=languages.transcription_lang[self.INPUT_SPEAKER_VOICE_LANGUAGE]
language=languages.transcription_lang[self.INPUT_SPEAKER_VOICE_LANGUAGE],
phrase_timeout=self.INPUT_SPEAKER_PHRASE_TIMEOUT,
max_phrases=self.INPUT_SPEAKER_MAX_PHRASES,
)
self.spk_transcribe = utils.thread_fnc(self.spk_transcriber.transcribe_audio_queue, args=(self.spk_audio_queue,))
self.spk_transcribe.daemon = True

View File

@@ -9,9 +9,11 @@ PHRASE_TIMEOUT = 3
MAX_PHRASES = 10
class AudioTranscriber:
def __init__(self, speaker, source, language):
def __init__(self, speaker, source, language, phrase_timeout, max_phrases):
self.speaker = speaker
self.language = language
self.phrase_timeout = phrase_timeout
self.max_phrases = max_phrases
self.transcript_data = []
self.transcript_changed_event = threading.Event()
self.audio_recognizer = sr.Recognizer()
@@ -47,7 +49,7 @@ class AudioTranscriber:
def update_last_sample_and_phrase_status(self, data, time_spoken):
source_info = self.audio_sources
if source_info["last_spoken"] and time_spoken - source_info["last_spoken"] > timedelta(seconds=PHRASE_TIMEOUT):
if source_info["last_spoken"] and time_spoken - source_info["last_spoken"] > timedelta(seconds=self.phrase_timeout):
source_info["last_sample"] = bytes()
source_info["new_phrase"] = True
else:
@@ -78,7 +80,7 @@ class AudioTranscriber:
transcript = self.transcript_data
if source_info["new_phrase"] or len(transcript) == 0:
if len(transcript) > MAX_PHRASES:
if len(transcript) > self.max_phrases:
transcript.pop(-1)
transcript.insert(0, text)
else:

View File

@@ -106,13 +106,16 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
# tab Translation
## optionmenu translation translator
row = 0
padx = 5
pady = 1
self.label_translation_translator = customtkinter.CTkLabel(
self.tabview_config.tab("Translation"),
text="Select Translator:",
fg_color="transparent",
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
)
self.label_translation_translator.grid(row=0, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
self.label_translation_translator.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw")
self.optionmenu_translation_translator = customtkinter.CTkOptionMenu(
self.tabview_config.tab("Translation"),
values=list(self.parent.translator.translator_status.keys()),
@@ -120,17 +123,18 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
variable=customtkinter.StringVar(value=self.parent.CHOICE_TRANSLATOR),
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
)
self.optionmenu_translation_translator.grid(row=0, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew")
self.optionmenu_translation_translator.grid(row=row, column=1, columnspan=3 ,padx=padx, pady=pady, sticky="nsew")
self.optionmenu_translation_translator._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY))
## optionmenu translation input language
row +=1
self.label_translation_input_language = customtkinter.CTkLabel(
self.tabview_config.tab("Translation"),
text="Send Language:",
fg_color="transparent",
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
)
self.label_translation_input_language.grid(row=1, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
self.label_translation_input_language.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw")
## select translation input source language
self.optionmenu_translation_input_source_language = customtkinter.CTkOptionMenu(
@@ -140,7 +144,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
variable=customtkinter.StringVar(value=self.parent.INPUT_SOURCE_LANG),
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
)
self.optionmenu_translation_input_source_language.grid(row=1, column=1, columnspan=1, padx=5, pady=5, sticky="nsew")
self.optionmenu_translation_input_source_language.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew")
self.optionmenu_translation_input_source_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY))
## label translation input arrow
@@ -150,7 +154,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
fg_color="transparent",
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
)
self.label_translation_input_arrow.grid(row=1, column=2, columnspan=1, padx=5, pady=5, sticky="nsew")
self.label_translation_input_arrow.grid(row=row, column=2, columnspan=1, padx=padx, pady=pady, sticky="nsew")
## select translation input target language
self.optionmenu_translation_input_target_language = customtkinter.CTkOptionMenu(
@@ -160,17 +164,18 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
variable=customtkinter.StringVar(value=self.parent.INPUT_TARGET_LANG),
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
)
self.optionmenu_translation_input_target_language.grid(row=1, column=3, columnspan=1, padx=5, pady=5, sticky="nsew")
self.optionmenu_translation_input_target_language.grid(row=row, column=3, columnspan=1, padx=padx, pady=pady, sticky="nsew")
self.optionmenu_translation_input_target_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY))
## optionmenu translation output language
row +=1
self.label_translation_output_language = customtkinter.CTkLabel(
self.tabview_config.tab("Translation"),
text="Receive Language:",
fg_color="transparent",
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
)
self.label_translation_output_language.grid(row=2, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
self.label_translation_output_language.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw")
## select translation output source language
self.optionmenu_translation_output_source_language = customtkinter.CTkOptionMenu(
@@ -180,7 +185,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
variable=customtkinter.StringVar(value=self.parent.OUTPUT_SOURCE_LANG),
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
)
self.optionmenu_translation_output_source_language.grid(row=2, column=1, columnspan=1, padx=5, pady=5, sticky="nsew")
self.optionmenu_translation_output_source_language.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew")
self.optionmenu_translation_output_source_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY))
## label translation output arrow
@@ -190,7 +195,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
fg_color="transparent",
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
)
self.label_translation_output_arrow.grid(row=2, column=2, columnspan=1, padx=5, pady=5, sticky="nsew")
self.label_translation_output_arrow.grid(row=row, column=2, columnspan=1, padx=padx, pady=pady, sticky="nsew")
## select translation output target language
self.optionmenu_translation_output_target_language = customtkinter.CTkOptionMenu(
@@ -200,7 +205,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
variable=customtkinter.StringVar(value=self.parent.OUTPUT_TARGET_LANG),
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
)
self.optionmenu_translation_output_target_language.grid(row=2, column=3, columnspan=1, padx=5, pady=5, sticky="nsew")
self.optionmenu_translation_output_target_language.grid(row=row, column=3, columnspan=1, padx=padx, pady=pady, sticky="nsew")
self.optionmenu_translation_output_target_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY))
# tab Transcription