add Hyperparameter
phrase_timeout, max_phrases
This commit is contained in:
38
VRCT.py
38
VRCT.py
@@ -44,16 +44,19 @@ class App(customtkinter.CTk):
|
||||
## Transcription Send
|
||||
self.CHOICE_MIC_DEVICE = audio_utils.get_default_input_device()["name"]
|
||||
self.INPUT_MIC_VOICE_LANGUAGE = list(languages.transcription_lang.keys())[0]
|
||||
self.INPUT_MIC_ENERGY_THRESHOLD = 1000
|
||||
self.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD = False
|
||||
self.INPUT_MIC_ENERGY_THRESHOLD = 300
|
||||
self.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD = True
|
||||
self.INPUT_MIC_RECORD_TIMEOUT = 3
|
||||
self.INPUT_MIC_PHRASE_TIMEOUT = 3
|
||||
self.INPUT_MIC_MAX_PHRASES = 10
|
||||
## Transcription Receive
|
||||
self.CHOICE_SPEAKER_DEVICE = audio_utils.get_default_output_device()["name"]
|
||||
self.INPUT_SPEAKER_VOICE_LANGUAGE = list(languages.transcription_lang.keys())[1]
|
||||
self.INPUT_SPEAKER_ENERGY_THRESHOLD = 1000
|
||||
self.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD = False
|
||||
self.INPUT_SPEAKER_ENERGY_THRESHOLD = 300
|
||||
self.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD = True
|
||||
self.INPUT_SPEAKER_RECORD_TIMEOUT = 3
|
||||
|
||||
self.INPUT_SPEAKER_PHRASE_TIMEOUT = 3
|
||||
self.INPUT_SPEAKER_MAX_PHRASES = 10
|
||||
## Parameter
|
||||
self.OSC_IP_ADDRESS = "127.0.0.1"
|
||||
self.OSC_PORT = 9000
|
||||
@@ -131,6 +134,13 @@ class App(customtkinter.CTk):
|
||||
if "INPUT_MIC_RECORD_TIMEOUT" in config.keys():
|
||||
if type(config["INPUT_MIC_RECORD_TIMEOUT"]) is int:
|
||||
self.INPUT_MIC_RECORD_TIMEOUT = config["INPUT_MIC_RECORD_TIMEOUT"]
|
||||
if "INPUT_MIC_PHRASE_TIMEOUT" in config.keys():
|
||||
if type(config["INPUT_MIC_PHRASE_TIMEOUT"]) is int:
|
||||
self.INPUT_MIC_PHRASE_TIMEOUT = config["INPUT_MIC_PHRASE_TIMEOUT"]
|
||||
if "INPUT_MIC_MAX_PHRASES" in config.keys():
|
||||
if type(config["INPUT_MIC_MAX_PHRASES"]) is int:
|
||||
self.INPUT_MIC_MAX_PHRASES = config["INPUT_MIC_MAX_PHRASES"]
|
||||
|
||||
if "CHOICE_SPEAKER_DEVICE" in config.keys():
|
||||
if config["CHOICE_SPEAKER_DEVICE"] in [device["name"] for device in audio_utils.get_output_device_list()]:
|
||||
self.CHOICE_SPEAKER_DEVICE = config["CHOICE_SPEAKER_DEVICE"]
|
||||
@@ -146,6 +156,12 @@ class App(customtkinter.CTk):
|
||||
if "INPUT_SPEAKER_RECORD_TIMEOUT" in config.keys():
|
||||
if type(config["INPUT_SPEAKER_RECORD_TIMEOUT"]) is int:
|
||||
self.INPUT_SPEAKER_RECORD_TIMEOUT = config["INPUT_SPEAKER_RECORD_TIMEOUT"]
|
||||
if "INPUT_SPEAKER_PHRASE_TIMEOUT" in config.keys():
|
||||
if type(config["INPUT_SPEAKER_PHRASE_TIMEOUT"]) is int:
|
||||
self.INPUT_SPEAKER_PHRASE_TIMEOUT = config["INPUT_SPEAKER_PHRASE_TIMEOUT"]
|
||||
if "INPUT_SPEAKER_MAX_PHRASES" in config.keys():
|
||||
if type(config["INPUT_SPEAKER_MAX_PHRASES"]) is int:
|
||||
self.INPUT_MIC_MAX_PHRASES = config["INPUT_SPEAKER_MAX_PHRASES"]
|
||||
|
||||
# Parameter
|
||||
if "OSC_IP_ADDRESS" in config.keys():
|
||||
@@ -184,11 +200,15 @@ class App(customtkinter.CTk):
|
||||
"INPUT_MIC_ENERGY_THRESHOLD": self.INPUT_MIC_ENERGY_THRESHOLD,
|
||||
"INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD": self.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD,
|
||||
"INPUT_MIC_RECORD_TIMEOUT": self.INPUT_MIC_RECORD_TIMEOUT,
|
||||
"INPUT_MIC_PHRASE_TIMEOUT": self.INPUT_MIC_PHRASE_TIMEOUT,
|
||||
"INPUT_MIC_MAX_PHRASES": self.INPUT_MIC_MAX_PHRASES,
|
||||
"CHOICE_SPEAKER_DEVICE": self.CHOICE_SPEAKER_DEVICE,
|
||||
"INPUT_SPEAKER_VOICE_LANGUAGE": self.INPUT_SPEAKER_VOICE_LANGUAGE,
|
||||
"INPUT_SPEAKER_ENERGY_THRESHOLD": self.INPUT_SPEAKER_ENERGY_THRESHOLD,
|
||||
"INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD": self.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD,
|
||||
"INPUT_SPEAKER_RECORD_TIMEOUT": self.INPUT_SPEAKER_RECORD_TIMEOUT,
|
||||
"INPUT_SPEAKER_PHRASE_TIMEOUT": self.INPUT_SPEAKER_PHRASE_TIMEOUT,
|
||||
"INPUT_SPEAKER_MAX_PHRASES": self.INPUT_SPEAKER_MAX_PHRASES,
|
||||
"OSC_IP_ADDRESS": self.OSC_IP_ADDRESS,
|
||||
"OSC_PORT": self.OSC_PORT,
|
||||
"AUTH_KEYS": self.AUTH_KEYS,
|
||||
@@ -423,7 +443,9 @@ class App(customtkinter.CTk):
|
||||
self.mic_transcriber = audio_transcriber.AudioTranscriber(
|
||||
speaker=False,
|
||||
source=self.mic_audio_recorder.source,
|
||||
language=languages.transcription_lang[self.INPUT_MIC_VOICE_LANGUAGE]
|
||||
language=languages.transcription_lang[self.INPUT_MIC_VOICE_LANGUAGE],
|
||||
phrase_timeout=self.INPUT_MIC_PHRASE_TIMEOUT,
|
||||
max_phrases=self.INPUT_MIC_MAX_PHRASES,
|
||||
)
|
||||
self.mic_transcribe = utils.thread_fnc(self.mic_transcriber.transcribe_audio_queue, args=(self.mic_audio_queue,))
|
||||
self.mic_transcribe.daemon = True
|
||||
@@ -461,7 +483,9 @@ class App(customtkinter.CTk):
|
||||
self.spk_transcriber = audio_transcriber.AudioTranscriber(
|
||||
speaker=True,
|
||||
source=self.spk_audio_recorder.source,
|
||||
language=languages.transcription_lang[self.INPUT_SPEAKER_VOICE_LANGUAGE]
|
||||
language=languages.transcription_lang[self.INPUT_SPEAKER_VOICE_LANGUAGE],
|
||||
phrase_timeout=self.INPUT_SPEAKER_PHRASE_TIMEOUT,
|
||||
max_phrases=self.INPUT_SPEAKER_MAX_PHRASES,
|
||||
)
|
||||
self.spk_transcribe = utils.thread_fnc(self.spk_transcriber.transcribe_audio_queue, args=(self.spk_audio_queue,))
|
||||
self.spk_transcribe.daemon = True
|
||||
|
||||
@@ -9,9 +9,11 @@ PHRASE_TIMEOUT = 3
|
||||
MAX_PHRASES = 10
|
||||
|
||||
class AudioTranscriber:
|
||||
def __init__(self, speaker, source, language):
|
||||
def __init__(self, speaker, source, language, phrase_timeout, max_phrases):
|
||||
self.speaker = speaker
|
||||
self.language = language
|
||||
self.phrase_timeout = phrase_timeout
|
||||
self.max_phrases = max_phrases
|
||||
self.transcript_data = []
|
||||
self.transcript_changed_event = threading.Event()
|
||||
self.audio_recognizer = sr.Recognizer()
|
||||
@@ -47,7 +49,7 @@ class AudioTranscriber:
|
||||
|
||||
def update_last_sample_and_phrase_status(self, data, time_spoken):
|
||||
source_info = self.audio_sources
|
||||
if source_info["last_spoken"] and time_spoken - source_info["last_spoken"] > timedelta(seconds=PHRASE_TIMEOUT):
|
||||
if source_info["last_spoken"] and time_spoken - source_info["last_spoken"] > timedelta(seconds=self.phrase_timeout):
|
||||
source_info["last_sample"] = bytes()
|
||||
source_info["new_phrase"] = True
|
||||
else:
|
||||
@@ -78,7 +80,7 @@ class AudioTranscriber:
|
||||
transcript = self.transcript_data
|
||||
|
||||
if source_info["new_phrase"] or len(transcript) == 0:
|
||||
if len(transcript) > MAX_PHRASES:
|
||||
if len(transcript) > self.max_phrases:
|
||||
transcript.pop(-1)
|
||||
transcript.insert(0, text)
|
||||
else:
|
||||
|
||||
@@ -106,13 +106,16 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
|
||||
|
||||
# tab Translation
|
||||
## optionmenu translation translator
|
||||
row = 0
|
||||
padx = 5
|
||||
pady = 1
|
||||
self.label_translation_translator = customtkinter.CTkLabel(
|
||||
self.tabview_config.tab("Translation"),
|
||||
text="Select Translator:",
|
||||
fg_color="transparent",
|
||||
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
|
||||
)
|
||||
self.label_translation_translator.grid(row=0, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
|
||||
self.label_translation_translator.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw")
|
||||
self.optionmenu_translation_translator = customtkinter.CTkOptionMenu(
|
||||
self.tabview_config.tab("Translation"),
|
||||
values=list(self.parent.translator.translator_status.keys()),
|
||||
@@ -120,17 +123,18 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
|
||||
variable=customtkinter.StringVar(value=self.parent.CHOICE_TRANSLATOR),
|
||||
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
|
||||
)
|
||||
self.optionmenu_translation_translator.grid(row=0, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew")
|
||||
self.optionmenu_translation_translator.grid(row=row, column=1, columnspan=3 ,padx=padx, pady=pady, sticky="nsew")
|
||||
self.optionmenu_translation_translator._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY))
|
||||
|
||||
## optionmenu translation input language
|
||||
row +=1
|
||||
self.label_translation_input_language = customtkinter.CTkLabel(
|
||||
self.tabview_config.tab("Translation"),
|
||||
text="Send Language:",
|
||||
fg_color="transparent",
|
||||
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
|
||||
)
|
||||
self.label_translation_input_language.grid(row=1, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
|
||||
self.label_translation_input_language.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw")
|
||||
|
||||
## select translation input source language
|
||||
self.optionmenu_translation_input_source_language = customtkinter.CTkOptionMenu(
|
||||
@@ -140,7 +144,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
|
||||
variable=customtkinter.StringVar(value=self.parent.INPUT_SOURCE_LANG),
|
||||
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
|
||||
)
|
||||
self.optionmenu_translation_input_source_language.grid(row=1, column=1, columnspan=1, padx=5, pady=5, sticky="nsew")
|
||||
self.optionmenu_translation_input_source_language.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew")
|
||||
self.optionmenu_translation_input_source_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY))
|
||||
|
||||
## label translation input arrow
|
||||
@@ -150,7 +154,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
|
||||
fg_color="transparent",
|
||||
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
|
||||
)
|
||||
self.label_translation_input_arrow.grid(row=1, column=2, columnspan=1, padx=5, pady=5, sticky="nsew")
|
||||
self.label_translation_input_arrow.grid(row=row, column=2, columnspan=1, padx=padx, pady=pady, sticky="nsew")
|
||||
|
||||
## select translation input target language
|
||||
self.optionmenu_translation_input_target_language = customtkinter.CTkOptionMenu(
|
||||
@@ -160,17 +164,18 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
|
||||
variable=customtkinter.StringVar(value=self.parent.INPUT_TARGET_LANG),
|
||||
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
|
||||
)
|
||||
self.optionmenu_translation_input_target_language.grid(row=1, column=3, columnspan=1, padx=5, pady=5, sticky="nsew")
|
||||
self.optionmenu_translation_input_target_language.grid(row=row, column=3, columnspan=1, padx=padx, pady=pady, sticky="nsew")
|
||||
self.optionmenu_translation_input_target_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY))
|
||||
|
||||
## optionmenu translation output language
|
||||
row +=1
|
||||
self.label_translation_output_language = customtkinter.CTkLabel(
|
||||
self.tabview_config.tab("Translation"),
|
||||
text="Receive Language:",
|
||||
fg_color="transparent",
|
||||
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
|
||||
)
|
||||
self.label_translation_output_language.grid(row=2, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
|
||||
self.label_translation_output_language.grid(row=row, column=0, columnspan=1, padx=padx, pady=pady, sticky="nsw")
|
||||
|
||||
## select translation output source language
|
||||
self.optionmenu_translation_output_source_language = customtkinter.CTkOptionMenu(
|
||||
@@ -180,7 +185,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
|
||||
variable=customtkinter.StringVar(value=self.parent.OUTPUT_SOURCE_LANG),
|
||||
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
|
||||
)
|
||||
self.optionmenu_translation_output_source_language.grid(row=2, column=1, columnspan=1, padx=5, pady=5, sticky="nsew")
|
||||
self.optionmenu_translation_output_source_language.grid(row=row, column=1, columnspan=1, padx=padx, pady=pady, sticky="nsew")
|
||||
self.optionmenu_translation_output_source_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY))
|
||||
|
||||
## label translation output arrow
|
||||
@@ -190,7 +195,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
|
||||
fg_color="transparent",
|
||||
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
|
||||
)
|
||||
self.label_translation_output_arrow.grid(row=2, column=2, columnspan=1, padx=5, pady=5, sticky="nsew")
|
||||
self.label_translation_output_arrow.grid(row=row, column=2, columnspan=1, padx=padx, pady=pady, sticky="nsew")
|
||||
|
||||
## select translation output target language
|
||||
self.optionmenu_translation_output_target_language = customtkinter.CTkOptionMenu(
|
||||
@@ -200,7 +205,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
|
||||
variable=customtkinter.StringVar(value=self.parent.OUTPUT_TARGET_LANG),
|
||||
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
|
||||
)
|
||||
self.optionmenu_translation_output_target_language.grid(row=2, column=3, columnspan=1, padx=5, pady=5, sticky="nsew")
|
||||
self.optionmenu_translation_output_target_language.grid(row=row, column=3, columnspan=1, padx=padx, pady=pady, sticky="nsew")
|
||||
self.optionmenu_translation_output_target_language._dropdown_menu.configure(font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY))
|
||||
|
||||
# tab Transcription
|
||||
|
||||
Reference in New Issue
Block a user