Merge branch 'transcription' into develop

This commit is contained in:
misyaguziya
2023-06-15 09:40:21 +09:00
4 changed files with 458 additions and 229 deletions

282
VRCT.py
View File

@@ -19,7 +19,8 @@ class App(customtkinter.CTk):
self.PATH_CONFIG = "./config.json" self.PATH_CONFIG = "./config.json"
## main window ## main window
self.ENABLE_TRANSLATION = False self.ENABLE_TRANSLATION = False
self.ENABLE_TRANSCRIPTION = False self.ENABLE_TRANSCRIPTION_SEND = False
self.ENABLE_TRANSCRIPTION_RECEIVE = False
self.ENABLE_FOREGROUND = False self.ENABLE_FOREGROUND = False
## UI ## UI
self.TRANSPARENCY = 100 self.TRANSPARENCY = 100
@@ -30,17 +31,19 @@ class App(customtkinter.CTk):
self.CHOICE_TRANSLATOR = "DeepL(web)" self.CHOICE_TRANSLATOR = "DeepL(web)"
self.INPUT_SOURCE_LANG = "JA" self.INPUT_SOURCE_LANG = "JA"
self.INPUT_TARGET_LANG = "EN" self.INPUT_TARGET_LANG = "EN"
self.OUTPUT_SOURCE_LANG = "JA" self.OUTPUT_SOURCE_LANG = "EN"
self.OUTPUT_TARGET_LANG = "EN" self.OUTPUT_TARGET_LANG = "JA"
## Transcription ## Transcription
self.CHOICE_MIC_DEVICE = None self.CHOICE_MIC_DEVICE = None
self.INPUT_MIC_VOICE_LANGUAGE = "ja-JP" self.INPUT_MIC_VOICE_LANGUAGE = "ja-JP"
self.ENABLE_MIC_IS_DYNAMIC = False self.INPUT_MIC_IS_DYNAMIC = False
self.MIC_THRESHOLD = 300 self.INPUT_MIC_THRESHOLD = 300
self.CHOICE_SPEAKER_DEVICE = None self.CHOICE_SPEAKER_DEVICE = None
self.INPUT_SPEAKER_VOICE_LANGUAGE = "ja-JP" self.INPUT_SPEAKER_VOICE_LANGUAGE = "en-US"
self.ENABLE_SPEAKER_IS_DYNAMIC = False self.INPUT_SPEAKER_SAMPLING_RATE = 16000
self.SPEAKER_THRESHOLD = 300 self.INPUT_SPEAKER_INTERVAL = 3
self.INPUT_SPEAKER_BUFFER_SIZE = 4096
## Parameter ## Parameter
self.OSC_IP_ADDRESS = "127.0.0.1" self.OSC_IP_ADDRESS = "127.0.0.1"
self.OSC_PORT = 9000 self.OSC_PORT = 9000
@@ -59,8 +62,10 @@ class App(customtkinter.CTk):
# main window # main window
if "ENABLE_TRANSLATION" in config.keys(): if "ENABLE_TRANSLATION" in config.keys():
self.ENABLE_TRANSLATION = config["ENABLE_TRANSLATION"] self.ENABLE_TRANSLATION = config["ENABLE_TRANSLATION"]
if "ENABLE_TRANSCRIPTION" in config.keys(): if "ENABLE_TRANSCRIPTION_SEND" in config.keys():
self.ENABLE_TRANSCRIPTION = config["ENABLE_TRANSCRIPTION"] self.ENABLE_TRANSCRIPTION_SEND = config["ENABLE_TRANSCRIPTION_SEND"]
if "ENABLE_TRANSCRIPTION_RECEIVE" in config.keys():
self.ENABLE_TRANSCRIPTION_RECEIVE = config["ENABLE_TRANSCRIPTION_RECEIVE"]
if "ENABLE_FOREGROUND" in config.keys(): if "ENABLE_FOREGROUND" in config.keys():
self.ENABLE_FOREGROUND = config["ENABLE_FOREGROUND"] self.ENABLE_FOREGROUND = config["ENABLE_FOREGROUND"]
@@ -91,18 +96,20 @@ class App(customtkinter.CTk):
self.CHOICE_MIC_DEVICE = config["CHOICE_MIC_DEVICE"] self.CHOICE_MIC_DEVICE = config["CHOICE_MIC_DEVICE"]
if "INPUT_MIC_VOICE_LANGUAGE" in config.keys(): if "INPUT_MIC_VOICE_LANGUAGE" in config.keys():
self.INPUT_MIC_VOICE_LANGUAGE = config["INPUT_MIC_VOICE_LANGUAGE"] self.INPUT_MIC_VOICE_LANGUAGE = config["INPUT_MIC_VOICE_LANGUAGE"]
if "ENABLE_MIC_IS_DYNAMIC" in config.keys(): if "INPUT_MIC_IS_DYNAMIC" in config.keys():
self.ENABLE_MIC_IS_DYNAMIC = config["ENABLE_MIC_IS_DYNAMIC"] self.INPUT_MIC_IS_DYNAMIC = config["INPUT_MIC_IS_DYNAMIC"]
if "MIC_THRESHOLD" in config.keys(): if "INPUT_MIC_THRESHOLD" in config.keys():
self.MIC_THRESHOLD = config["MIC_THRESHOLD"] self.INPUT_MIC_THRESHOLD = config["INPUT_MIC_THRESHOLD"]
if "CHOICE_SPEAKER_DEVICE" in config.keys(): if "CHOICE_SPEAKER_DEVICE" in config.keys():
self.CHOICE_SPEAKER_DEVICE = config["CHOICE_SPEAKER_DEVICE"] self.CHOICE_SPEAKER_DEVICE = config["CHOICE_SPEAKER_DEVICE"]
if "INPUT_SPEAKER_VOICE_LANGUAGE" in config.keys(): if "INPUT_SPEAKER_VOICE_LANGUAGE" in config.keys():
self.INPUT_SPEAKER_VOICE_LANGUAGE = config["INPUT_SPEAKER_VOICE_LANGUAGE"] self.INPUT_SPEAKER_VOICE_LANGUAGE = config["INPUT_SPEAKER_VOICE_LANGUAGE"]
if "ENABLE_SPEAKER_IS_DYNAMIC" in config.keys(): if "INPUT_SPEAKER_SAMPLING_RATE" in config.keys():
self.ENABLE_SPEAKER_IS_DYNAMIC = config["ENABLE_SPEAKER_IS_DYNAMIC"] self.INPUT_SPEAKER_SAMPLING_RATE = config["INPUT_SPEAKER_SAMPLING_RATE"]
if "SPEAKER_THRESHOLD" in config.keys(): if "INPUT_SPEAKER_INTERVAL" in config.keys():
self.SPEAKER_THRESHOLD = config["SPEAKER_THRESHOLD"] self.INPUT_SPEAKER_INTERVAL = config["INPUT_SPEAKER_INTERVAL"]
if "INPUT_SPEAKER_BUFFER_SIZE" in config.keys():
self.INPUT_SPEAKER_BUFFER_SIZE = config["INPUT_SPEAKER_BUFFER_SIZE"]
# Parameter # Parameter
if "OSC_IP_ADDRESS" in config.keys(): if "OSC_IP_ADDRESS" in config.keys():
@@ -117,7 +124,8 @@ class App(customtkinter.CTk):
with open(self.PATH_CONFIG, 'w') as fp: with open(self.PATH_CONFIG, 'w') as fp:
config = { config = {
"ENABLE_TRANSLATION": self.ENABLE_TRANSLATION, "ENABLE_TRANSLATION": self.ENABLE_TRANSLATION,
"ENABLE_TRANSCRIPTION": self.ENABLE_TRANSCRIPTION, "ENABLE_TRANSCRIPTION_SEND": self.ENABLE_TRANSCRIPTION_SEND,
"ENABLE_TRANSCRIPTION_RECEIVE": self.ENABLE_TRANSCRIPTION_RECEIVE,
"ENABLE_FOREGROUND": self.ENABLE_FOREGROUND, "ENABLE_FOREGROUND": self.ENABLE_FOREGROUND,
"TRANSPARENCY": self.TRANSPARENCY, "TRANSPARENCY": self.TRANSPARENCY,
"APPEARANCE_THEME": self.APPEARANCE_THEME, "APPEARANCE_THEME": self.APPEARANCE_THEME,
@@ -130,12 +138,13 @@ class App(customtkinter.CTk):
"OUTPUT_TARGET_LANG": self.OUTPUT_TARGET_LANG, "OUTPUT_TARGET_LANG": self.OUTPUT_TARGET_LANG,
"CHOICE_MIC_DEVICE": self.CHOICE_MIC_DEVICE, "CHOICE_MIC_DEVICE": self.CHOICE_MIC_DEVICE,
"INPUT_MIC_VOICE_LANGUAGE": self.INPUT_MIC_VOICE_LANGUAGE, "INPUT_MIC_VOICE_LANGUAGE": self.INPUT_MIC_VOICE_LANGUAGE,
"ENABLE_MIC_IS_DYNAMIC": self.ENABLE_MIC_IS_DYNAMIC, "INPUT_MIC_IS_DYNAMIC": self.INPUT_MIC_IS_DYNAMIC,
"MIC_THRESHOLD": self.MIC_THRESHOLD, "INPUT_MIC_THRESHOLD": self.INPUT_MIC_THRESHOLD,
"CHOICE_SPEAKER_DEVICE": self.CHOICE_SPEAKER_DEVICE, "CHOICE_SPEAKER_DEVICE": self.CHOICE_SPEAKER_DEVICE,
"INPUT_SPEAKER_VOICE_LANGUAGE": self.INPUT_SPEAKER_VOICE_LANGUAGE, "INPUT_SPEAKER_VOICE_LANGUAGE": self.INPUT_SPEAKER_VOICE_LANGUAGE,
"ENABLE_SPEAKER_IS_DYNAMIC": self.ENABLE_SPEAKER_IS_DYNAMIC, "INPUT_SPEAKER_SAMPLING_RATE": self.INPUT_SPEAKER_SAMPLING_RATE,
"SPEAKER_THRESHOLD": self.SPEAKER_THRESHOLD, "INPUT_SPEAKER_INTERVAL": self.INPUT_SPEAKER_INTERVAL,
"INPUT_SPEAKER_BUFFER_SIZE": self.INPUT_SPEAKER_BUFFER_SIZE,
"OSC_IP_ADDRESS": self.OSC_IP_ADDRESS, "OSC_IP_ADDRESS": self.OSC_IP_ADDRESS,
"OSC_PORT": self.OSC_PORT, "OSC_PORT": self.OSC_PORT,
"AUTH_KEYS": self.AUTH_KEYS, "AUTH_KEYS": self.AUTH_KEYS,
@@ -146,8 +155,8 @@ class App(customtkinter.CTk):
# init main window # init main window
self.iconbitmap(os.path.join(os.path.dirname(__file__), "img", "app.ico")) self.iconbitmap(os.path.join(os.path.dirname(__file__), "img", "app.ico"))
self.title("VRCT") self.title("VRCT")
self.geometry(f"{400}x{140}") self.geometry(f"{400}x{170}")
self.minsize(400, 140) self.minsize(400, 170)
self.grid_columnconfigure(1, weight=1) self.grid_columnconfigure(1, weight=1)
self.grid_rowconfigure(0, weight=1) self.grid_rowconfigure(0, weight=1)
@@ -167,16 +176,27 @@ class App(customtkinter.CTk):
) )
self.checkbox_translation.grid(row=0, column=0, columnspan=2 ,padx=10, pady=(5, 5), sticky="we") self.checkbox_translation.grid(row=0, column=0, columnspan=2 ,padx=10, pady=(5, 5), sticky="we")
# add checkbox transcription # add checkbox transcription send
self.checkbox_transcription = customtkinter.CTkCheckBox( self.checkbox_transcription_send = customtkinter.CTkCheckBox(
self.sidebar_frame, self.sidebar_frame,
text="Transcription", text="TranscriptionSend",
onvalue=True, onvalue=True,
offvalue=False, offvalue=False,
command=self.checkbox_transcription_callback, command=self.checkbox_transcription_send_callback,
font=customtkinter.CTkFont(family=self.FONT_FAMILY) font=customtkinter.CTkFont(family=self.FONT_FAMILY)
) )
self.checkbox_transcription.grid(row=1, column=0, columnspan=2 ,padx=10, pady=(5, 5), sticky="we") self.checkbox_transcription_send.grid(row=1, column=0, columnspan=2 ,padx=10, pady=(5, 5), sticky="we")
# add checkbox transcription receive
self.checkbox_transcription_receive = customtkinter.CTkCheckBox(
self.sidebar_frame,
text="TranscriptionReceive",
onvalue=True,
offvalue=False,
command=self.checkbox_transcription_receive_callback,
font=customtkinter.CTkFont(family=self.FONT_FAMILY)
)
self.checkbox_transcription_receive.grid(row=2, column=0, columnspan=2 ,padx=10, pady=(5, 5), sticky="we")
# add checkbox foreground # add checkbox foreground
self.checkbox_foreground = customtkinter.CTkCheckBox( self.checkbox_foreground = customtkinter.CTkCheckBox(
@@ -187,7 +207,7 @@ class App(customtkinter.CTk):
command=self.checkbox_foreground_callback, command=self.checkbox_foreground_callback,
font=customtkinter.CTkFont(family=self.FONT_FAMILY) font=customtkinter.CTkFont(family=self.FONT_FAMILY)
) )
self.checkbox_foreground.grid(row=2, column=0, columnspan=2 ,padx=10, pady=(5, 5), sticky="we") self.checkbox_foreground.grid(row=3, column=0, columnspan=2 ,padx=10, pady=(5, 5), sticky="we")
# add button information # add button information
self.button_information = customtkinter.CTkButton( self.button_information = customtkinter.CTkButton(
@@ -215,23 +235,25 @@ class App(customtkinter.CTk):
self.tabview_logs = customtkinter.CTkTabview(master=self) self.tabview_logs = customtkinter.CTkTabview(master=self)
self.tabview_logs.add("send") self.tabview_logs.add("send")
self.tabview_logs.add("receive") self.tabview_logs.add("receive")
self.tabview_logs.add("system")
self.tabview_logs.grid(row=0, column=1, padx=5, pady=0, sticky="nsew") self.tabview_logs.grid(row=0, column=1, padx=5, pady=0, sticky="nsew")
self.tabview_logs._segmented_button.grid(sticky="W") self.tabview_logs._segmented_button.grid(sticky="W")
self.tabview_logs.tab("send").grid_rowconfigure(0, weight=1) self.tabview_logs.tab("send").grid_rowconfigure(0, weight=1)
self.tabview_logs.tab("send").grid_columnconfigure(0, weight=1) self.tabview_logs.tab("send").grid_columnconfigure(0, weight=1)
self.tabview_logs.tab("receive").grid_rowconfigure(0, weight=1) self.tabview_logs.tab("receive").grid_rowconfigure(0, weight=1)
self.tabview_logs.tab("receive").grid_columnconfigure(0, weight=1) self.tabview_logs.tab("receive").grid_columnconfigure(0, weight=1)
self.tabview_logs.configure(state='disabled') self.tabview_logs.tab("system").grid_rowconfigure(0, weight=1)
self.tabview_logs.tab("system").grid_columnconfigure(0, weight=1)
# add textbox message log # add textbox message send log
self.textbox_message_log = customtkinter.CTkTextbox( self.textbox_message_send_log = customtkinter.CTkTextbox(
self.tabview_logs.tab("send"), self.tabview_logs.tab("send"),
font=customtkinter.CTkFont(family=self.FONT_FAMILY) font=customtkinter.CTkFont(family=self.FONT_FAMILY)
) )
self.textbox_message_log.grid(row=0, column=0, padx=0, pady=0, sticky="nsew") self.textbox_message_send_log.grid(row=0, column=0, padx=0, pady=0, sticky="nsew")
self.textbox_message_log.configure(state='disabled') self.textbox_message_send_log.configure(state='disabled')
# add textbox message log # add textbox message receive log
self.textbox_message_receive_log = customtkinter.CTkTextbox( self.textbox_message_receive_log = customtkinter.CTkTextbox(
self.tabview_logs.tab("receive"), self.tabview_logs.tab("receive"),
font=customtkinter.CTkFont(family=self.FONT_FAMILY) font=customtkinter.CTkFont(family=self.FONT_FAMILY)
@@ -239,6 +261,14 @@ class App(customtkinter.CTk):
self.textbox_message_receive_log.grid(row=0, column=0, padx=0, pady=0, sticky="nsew") self.textbox_message_receive_log.grid(row=0, column=0, padx=0, pady=0, sticky="nsew")
self.textbox_message_receive_log.configure(state='disabled') self.textbox_message_receive_log.configure(state='disabled')
# add textbox message system log
self.textbox_message_system_log = customtkinter.CTkTextbox(
self.tabview_logs.tab("system"),
font=customtkinter.CTkFont(family=self.FONT_FAMILY)
)
self.textbox_message_system_log.grid(row=0, column=0, padx=0, pady=0, sticky="nsew")
self.textbox_message_system_log.configure(state='disabled')
# add entry message box # add entry message box
self.entry_message_box = customtkinter.CTkEntry( self.entry_message_box = customtkinter.CTkEntry(
self, self,
@@ -252,14 +282,12 @@ class App(customtkinter.CTk):
self.translator = translation.Translator() self.translator = translation.Translator()
if self.translator.authentication(self.CHOICE_TRANSLATOR, self.AUTH_KEYS[self.CHOICE_TRANSLATOR]) is False: if self.translator.authentication(self.CHOICE_TRANSLATOR, self.AUTH_KEYS[self.CHOICE_TRANSLATOR]) is False:
# error update Auth key # error update Auth key
self.textbox_message_log.configure(state='normal') utils.print_textbox(self.textbox_message_system_log, "[error] Auth Key or language setting is incorrect")
self.textbox_message_log.insert("end", f"[ERROR] Auth Keyを設定してないか間違っています\n")
self.textbox_message_log.configure(state='disabled')
self.textbox_message_log.see("end")
## set transcription instance ## set transcription instance
self.vr = transcription.VoiceRecognizer() self.vr = transcription.VoiceRecognizer()
self.CHOICE_MIC_DEVICE = self.CHOICE_MIC_DEVICE if self.CHOICE_MIC_DEVICE is not None else list(self.vr.input_device_dict.keys())[0] self.CHOICE_MIC_DEVICE = self.CHOICE_MIC_DEVICE if self.CHOICE_MIC_DEVICE is not None else self.vr.search_default_device()[0]
self.CHOICE_SPEAKER_DEVICE = self.CHOICE_SPEAKER_DEVICE if self.CHOICE_SPEAKER_DEVICE is not None else self.vr.search_default_device()[1]
## set checkbox enable translation ## set checkbox enable translation
if self.ENABLE_TRANSLATION: if self.ENABLE_TRANSLATION:
@@ -268,19 +296,30 @@ class App(customtkinter.CTk):
else: else:
self.checkbox_translation.deselect() self.checkbox_translation.deselect()
## set checkbox enable transcription ## set checkbox enable transcription send
if self.ENABLE_TRANSCRIPTION: self.th_vr_listen_mic = None
self.checkbox_transcription.select() self.th_vr_recognize_mic = None
if self.ENABLE_TRANSCRIPTION_SEND:
self.checkbox_transcription_send.select()
self.checkbox_transcription_send_callback()
else: else:
self.checkbox_transcription.deselect() self.checkbox_transcription_send.deselect()
self.checkbox_transcription_callback()
## set checkbox enable transcription receive
self.th_vr_listen_spk = None
self.th_vr_recognize_spk = None
if self.ENABLE_TRANSCRIPTION_RECEIVE:
self.checkbox_transcription_receive.select()
self.checkbox_transcription_receive_callback()
else:
self.checkbox_transcription_receive.deselect()
## set set checkbox enable foreground ## set set checkbox enable foreground
if self.ENABLE_FOREGROUND: if self.ENABLE_FOREGROUND:
self.checkbox_foreground.select() self.checkbox_foreground.select()
self.checkbox_foreground_callback()
else: else:
self.checkbox_foreground.deselect() self.checkbox_foreground.deselect()
self.checkbox_foreground_callback()
## set bind entry message box ## set bind entry message box
self.entry_message_box.bind("<Return>", self.entry_message_box_press_key_enter) self.entry_message_box.bind("<Return>", self.entry_message_box_press_key_enter)
@@ -298,6 +337,9 @@ class App(customtkinter.CTk):
customtkinter.set_appearance_mode(self.APPEARANCE_THEME) customtkinter.set_appearance_mode(self.APPEARANCE_THEME)
customtkinter.set_default_color_theme("blue") customtkinter.set_default_color_theme("blue")
# delete window
self.protocol("WM_DELETE_WINDOW", self.delete_window)
def button_config_callback(self): def button_config_callback(self):
if self.config_window is None or not self.config_window.winfo_exists(): if self.config_window is None or not self.config_window.winfo_exists():
self.config_window = window_config.ToplevelWindowConfig(self) self.config_window = window_config.ToplevelWindowConfig(self)
@@ -310,46 +352,73 @@ class App(customtkinter.CTk):
def checkbox_translation_callback(self): def checkbox_translation_callback(self):
self.ENABLE_TRANSLATION = self.checkbox_translation.get() self.ENABLE_TRANSLATION = self.checkbox_translation.get()
self.textbox_message_log.configure(state='normal')
if self.ENABLE_TRANSLATION: if self.ENABLE_TRANSLATION:
self.textbox_message_log.insert("end", f"[INFO] start translation\n") utils.print_textbox(self.textbox_message_system_log, "[info] Start translation")
else: else:
self.textbox_message_log.insert("end", f"[INFO] stop translation\n") utils.print_textbox(self.textbox_message_system_log, "[info] Stop translation")
self.textbox_message_log.configure(state='disabled')
self.textbox_message_log.see("end")
utils.save_json(self.PATH_CONFIG, "ENABLE_TRANSLATION", self.ENABLE_TRANSLATION) utils.save_json(self.PATH_CONFIG, "ENABLE_TRANSLATION", self.ENABLE_TRANSLATION)
def checkbox_transcription_callback(self): def checkbox_transcription_send_callback(self):
self.ENABLE_TRANSCRIPTION = self.checkbox_transcription.get() self.ENABLE_TRANSCRIPTION_SEND = self.checkbox_transcription_send.get()
if self.ENABLE_TRANSCRIPTION is True: if self.ENABLE_TRANSCRIPTION_SEND is True:
utils.print_textbox(self.textbox_message_system_log, "[info] Start sending transcription from your voice")
# start threading # start threading
th = threading.Thread(target = self.voice_input) self.vr.set_mic(
th.start() device_name=self.CHOICE_MIC_DEVICE,
utils.save_json(self.PATH_CONFIG, "ENABLE_TRANSCRIPTION", self.ENABLE_TRANSCRIPTION) threshold=int(self.INPUT_MIC_THRESHOLD),
is_dynamic=self.INPUT_MIC_IS_DYNAMIC,
)
self.vr.init_mic()
self.th_vr_listen_mic = utils.thread_fnc(self.vr_listen_mic)
self.th_vr_recognize_mic = utils.thread_fnc(self.vr_recognize_mic)
self.th_vr_listen_mic.start()
self.th_vr_recognize_mic.start()
else:
if isinstance(self.th_vr_listen_mic, utils.thread_fnc):
self.th_vr_listen_mic.stop()
if isinstance(self.th_vr_recognize_mic, utils.thread_fnc):
self.th_vr_recognize_mic.stop()
def voice_input(self): utils.print_textbox(self.textbox_message_system_log, "[info] Stop sending transcription from your voice")
self.vr.set_mic(self.CHOICE_MIC_DEVICE) utils.save_json(self.PATH_CONFIG, "ENABLE_TRANSCRIPTION_SEND", self.ENABLE_TRANSCRIPTION_SEND)
self.vr.init_mic(threshold=self.MIC_THRESHOLD, is_dynamic=self.ENABLE_MIC_IS_DYNAMIC)
# start voice_input def checkbox_transcription_receive_callback(self):
if self.checkbox_transcription.get() is True: self.ENABLE_TRANSCRIPTION_RECEIVE = self.checkbox_transcription_receive.get()
self.textbox_message_log.configure(state='normal') if self.ENABLE_TRANSCRIPTION_RECEIVE is True:
self.textbox_message_log.insert("end", f"[INFO] start transcription\n") utils.print_textbox(self.textbox_message_system_log, "[info] Start transcription of speaker's voice")
self.textbox_message_log.configure(state='disabled') # start threading
self.textbox_message_log.see("end") self.vr.set_spk(
device_name=self.CHOICE_SPEAKER_DEVICE,
sample_rate=int(self.INPUT_SPEAKER_SAMPLING_RATE),
interval=int(self.INPUT_SPEAKER_INTERVAL),
buffer_size=int(self.INPUT_SPEAKER_BUFFER_SIZE),
)
self.vr.init_spk()
self.th_vr_listen_spk = utils.thread_fnc(self.vr_listen_spk)
self.th_vr_recognize_spk = utils.thread_fnc(self.vr_recognize_spk)
self.th_vr_listen_spk.start()
self.th_vr_recognize_spk.start()
else:
if isinstance(self.th_vr_listen_spk, utils.thread_fnc):
self.th_vr_listen_spk.stop()
if isinstance(self.th_vr_recognize_spk, utils.thread_fnc):
self.th_vr_recognize_spk.stop()
while self.checkbox_transcription.get() is True: utils.print_textbox(self.textbox_message_system_log, "[info] Stop transcription of speaker's voice")
message = self.vr.listen_voice(language=self.INPUT_MIC_VOICE_LANGUAGE) utils.save_json(self.PATH_CONFIG, "ENABLE_TRANSCRIPTION_RECEIVE", self.ENABLE_TRANSCRIPTION_RECEIVE)
def vr_listen_mic(self):
self.vr.listen_mic()
def vr_recognize_mic(self):
message = self.vr.recognize_mic(language=self.INPUT_MIC_VOICE_LANGUAGE)
if len(message) > 0: if len(message) > 0:
# translate # translate
if self.checkbox_translation.get() is False: if self.checkbox_translation.get() is False:
chat_message = f"{message}" voice_message = f"{message}"
elif (self.translator.translator_status[self.CHOICE_TRANSLATOR] is False) or (self.INPUT_SOURCE_LANG == "None") or (self.INPUT_TARGET_LANG == "None"): elif self.translator.translator_status[self.CHOICE_TRANSLATOR] is False:
self.textbox_message_log.configure(state='normal') utils.print_textbox(self.textbox_message_system_log, "[error] Auth Key or language setting is incorrect")
self.textbox_message_log.insert("end", f"[ERROR] Auth Keyもしくは言語の設定が間違っています\n") voice_message = f"{message}"
self.textbox_message_log.configure(state='disabled')
self.textbox_message_log.see("end")
chat_message = f"{message}"
else: else:
result = self.translator.translate( result = self.translator.translate(
translator_name=self.CHOICE_TRANSLATOR, translator_name=self.CHOICE_TRANSLATOR,
@@ -357,20 +426,36 @@ class App(customtkinter.CTk):
target_language=self.INPUT_TARGET_LANG, target_language=self.INPUT_TARGET_LANG,
message=message message=message
) )
chat_message = self.MESSAGE_FORMAT.replace("[message]", message).replace("[translation]", result) voice_message = self.MESSAGE_FORMAT.replace("[message]", message).replace("[translation]", result)
# send OSC message # send OSC message
osc_tools.send_message(chat_message, self.OSC_IP_ADDRESS, self.OSC_PORT) osc_tools.send_message(voice_message, self.OSC_IP_ADDRESS, self.OSC_PORT)
# update textbox message log # update textbox message log
self.textbox_message_log.configure(state='normal') utils.print_textbox(self.textbox_message_send_log, f"[voice] {voice_message}")
self.textbox_message_log.insert("end", f"[VOICE] {chat_message}\n")
self.textbox_message_log.configure(state='disabled') def vr_listen_spk(self):
self.textbox_message_log.see("end") self.vr.listen_spk()
self.textbox_message_log.configure(state='normal')
self.textbox_message_log.insert("end", f"[INFO] stop transcription\n") def vr_recognize_spk(self):
self.textbox_message_log.configure(state='disabled') message = self.vr.recognize_spk(language=self.INPUT_SPEAKER_VOICE_LANGUAGE)
self.textbox_message_log.see("end") if len(message) > 0:
# translate
if self.checkbox_translation.get() is False:
voice_message = f"{message}"
elif self.translator.translator_status[self.CHOICE_TRANSLATOR] is False:
utils.print_textbox(self.textbox_message_system_log, "[error] Auth Key or language setting is incorrect")
voice_message = f"{message}"
else:
result = self.translator.translate(
translator_name=self.CHOICE_TRANSLATOR,
source_language=self.OUTPUT_SOURCE_LANG,
target_language=self.OUTPUT_TARGET_LANG,
message=message
)
voice_message = self.MESSAGE_FORMAT.replace("[message]", message).replace("[translation]", result)
# send OSC message
# osc_tools.send_message(voice_message, self.OSC_IP_ADDRESS, self.OSC_PORT)
# update textbox message receive log
utils.print_textbox(self.textbox_message_receive_log, f"[voice] {voice_message}")
def checkbox_foreground_callback(self): def checkbox_foreground_callback(self):
self.ENABLE_FOREGROUND = self.checkbox_foreground.get() self.ENABLE_FOREGROUND = self.checkbox_foreground.get()
@@ -392,11 +477,8 @@ class App(customtkinter.CTk):
# translate # translate
if self.checkbox_translation.get() is False: if self.checkbox_translation.get() is False:
chat_message = f"{message}" chat_message = f"{message}"
elif (self.translator.translator_status[self.CHOICE_TRANSLATOR] is False) or (self.INPUT_SOURCE_LANG == "None") or (self.INPUT_TARGET_LANG == "None"): elif self.translator.translator_status[self.CHOICE_TRANSLATOR] is False:
self.textbox_message_log.configure(state='normal') utils.print_textbox(self.textbox_message_system_log, "[error] Auth Key or language setting is incorrect")
self.textbox_message_log.insert("end", f"[ERROR] Auth Keyもしくは言語の設定が間違っています\n")
self.textbox_message_log.configure(state='disabled')
self.textbox_message_log.see("end")
chat_message = f"{message}" chat_message = f"{message}"
else: else:
result = self.translator.translate( result = self.translator.translate(
@@ -411,10 +493,7 @@ class App(customtkinter.CTk):
osc_tools.send_message(chat_message, self.OSC_IP_ADDRESS, self.OSC_PORT) osc_tools.send_message(chat_message, self.OSC_IP_ADDRESS, self.OSC_PORT)
# update textbox message log # update textbox message log
self.textbox_message_log.configure(state='normal') utils.print_textbox(self.textbox_message_send_log, f"[chat] {chat_message}")
self.textbox_message_log.insert("end", f"[CHAT] {chat_message}\n")
self.textbox_message_log.configure(state='disabled')
self.textbox_message_log.see("end")
# delete message in entry message box # delete message in entry message box
# self.entry_message_box.delete(0, customtkinter.END) # self.entry_message_box.delete(0, customtkinter.END)
@@ -431,6 +510,13 @@ class App(customtkinter.CTk):
if self.ENABLE_FOREGROUND: if self.ENABLE_FOREGROUND:
self.attributes("-topmost", True) self.attributes("-topmost", True)
def delete_window(self):
thread_list = threading.enumerate()
thread_list.remove(threading.main_thread())
for thread in thread_list:
thread.stop()
self.destroy()
if __name__ == "__main__": if __name__ == "__main__":
app = App() app = App()
app.mainloop() app.mainloop()

View File

@@ -1,12 +1,15 @@
import pyaudio import io
import queue
import numpy as np
import soundcard as sc
import soundfile as sf
import sounddevice as sd
import speech_recognition as sr import speech_recognition as sr
# VoiceRecognizer # VoiceRecognizer
class VoiceRecognizer(): class VoiceRecognizer():
def __init__(self): def __init__(self):
self.input_device_dict = self.search_input_device()
self.r = sr.Recognizer() self.r = sr.Recognizer()
self.mic = None
self.languages = [ self.languages = [
"ja-JP","en-US","en-GB","af-ZA","ar-DZ","ar-BH","ar-EG","ar-IL","ar-IQ","ar-JO","ar-KW","ar-LB","ar-MA", "ja-JP","en-US","en-GB","af-ZA","ar-DZ","ar-BH","ar-EG","ar-IL","ar-IQ","ar-JO","ar-KW","ar-LB","ar-MA",
"ar-OM","ar-PS","ar-QA","ar-SA","ar-TN","ar-AE","eu-ES","bg-BG","ca-ES","cmn-Hans-CN","cmn-Hans-HK", "ar-OM","ar-PS","ar-QA","ar-SA","ar-TN","ar-AE","eu-ES","bg-BG","ca-ES","cmn-Hans-CN","cmn-Hans-HK",
@@ -17,54 +20,135 @@ class VoiceRecognizer():
"es-NI","es-PA","es-PY","es-PE","es-PR","es-ES","es-UY","es-US","es-VE","sv-SE","th-TH","tr-TR","uk-UA", "es-NI","es-PA","es-PY","es-PE","es-PR","es-ES","es-UY","es-US","es-VE","sv-SE","th-TH","tr-TR","uk-UA",
"vi-VN","zu-ZA" "vi-VN","zu-ZA"
] ]
self.mic_device_name = None
self.mic_threshold = 50
self.mic_is_dynamic = False
self.mic_queue = queue.Queue()
self.spk_device_name = None
self.spk_sample_rate = 16000
self.spk_interval = 3
self.spk_buffer_size = 4096
self.spk_audio = np.empty(self.spk_sample_rate * self.spk_interval + self.spk_buffer_size, dtype=np.float32)
self.n = 0
self.spk_queue = queue.Queue()
def search_input_device(self): def search_input_device(self):
pa = pyaudio.PyAudio() device_list = sd.query_devices()
input_device_dict = {} input_device_list = []
mic_cnt = 1 for device in device_list:
for i in range(pa.get_device_count()): if device["max_input_channels"] > 0:
device = pa.get_device_info_by_index(i) input_device_list.append({"name": device["name"], "index": device["index"]})
try:
device["name"] = device["name"].encode('shift_jis').decode('utf-8') return input_device_list
except:
device["name"] = device["name"].encode('utf-8').decode('utf-8') def search_output_device(self):
if device["maxInputChannels"] > 0: device_list = sc.all_speakers()
input_device_dict[f'No.{mic_cnt}:{device["name"]}'] = device["index"] output_device_list = []
mic_cnt += 1
pa.terminate() for device in device_list:
return input_device_dict output_device_list.append(str(device.name))
return output_device_list
def search_default_device(self):
device_list = sd.query_devices()
mic_index = sd.default.device[0]
name_mic = device_list[mic_index]["name"]
name_spk = str(sc.default_speaker().name)
return name_mic, name_spk
def set_mic(self, device_name, threshold=50, is_dynamic=False): def set_mic(self, device_name, threshold=50, is_dynamic=False):
if device_name in [v for v in self.input_device_dict.keys()]: input_device_list = self.search_input_device()
index = self.input_device_dict[device_name] self.mic_device_name = [device["index"] for device in input_device_list if device["name"] == device_name][0]
self.mic = sr.Microphone(device_index=index) self.mic_threshold = threshold
self.r.energy_threshold = threshold self.mic_is_dynamic = is_dynamic
if is_dynamic:
def init_mic(self):
self.r.energy_threshold = self.mic_threshold
if self.mic_is_dynamic:
with self.mic as source: with self.mic as source:
self.r.adjust_for_ambient_noise(source, 3.0) self.r.adjust_for_ambient_noise(source, 3.0)
return True
else:
return False
def init_mic(self, threshold=50, is_dynamic=False): def listen_mic(self):
if isinstance(self.mic, sr.Microphone): with sr.Microphone(device_index=self.mic_device_name) as source:
self.r.energy_threshold = threshold
if is_dynamic:
with self.mic as source:
self.r.adjust_for_ambient_noise(source, 3.0)
return True
else:
return False
def listen_voice(self, language):
if self.mic != None:
with self.mic as source:
audio = self.r.listen(source) audio = self.r.listen(source)
self.mic_queue.put(audio)
def recognize_mic(self, language):
try: try:
audio = self.mic_queue.get()
text = self.r.recognize_google(audio, language=language) text = self.r.recognize_google(audio, language=language)
return text
except: except:
return "" text = ""
else: return text
return False
def set_spk(self, device_name=str(sc.default_speaker().name), sample_rate=16000, interval=3, buffer_size=4096):
self.spk_device_name = device_name
self.spk_sample_rate = sample_rate
self.spk_interval = interval
self.spk_buffer_size = buffer_size
def init_spk(self):
self.spk_audio = np.empty(self.spk_sample_rate * self.spk_interval + self.spk_buffer_size, dtype=np.float32)
self.n = 0
def listen_spk(self):
audio = self.spk_audio
n = self.n
with sc.get_microphone(id=self.spk_device_name, include_loopback=True).recorder(samplerate=self.spk_sample_rate, channels=1) as source:
while n < self.spk_sample_rate * self.spk_interval:
data = source.record(self.spk_buffer_size)
audio[n:n+len(data)] = data.reshape(-1)
n += len(data)
m = n * 4 // 5
vol = np.convolve(audio[m:n] ** 2, np.ones(100) / 100, 'same')
m += vol.argmin()
audio_prev = audio.copy()
self.spk_queue.put(audio[:m])
audio = np.empty(self.spk_sample_rate * self.spk_interval + self.spk_buffer_size, dtype=np.float32)
audio[:n-m] = audio_prev[m:n]
n = n-m
self.spk_audio = audio
self.n = n
def recognize_spk(self, language):
try:
audio = self.spk_queue.get()
with io.BytesIO() as memory_file:
sf.write(file=memory_file, data=audio, format="WAV", samplerate=self.spk_sample_rate)
memory_file.seek(0)
with sr.AudioFile(memory_file) as source:
audio = self.r.record(source)
text = self.r.recognize_google(audio, language=language)
except:
text = ""
return text
if __name__ == "__main__":
import time
import threading
vr = VoiceRecognizer()
mic_name, spk_name = vr.search_default_device()
vr.spk_enable_recognize = True
vr.set_spk(language="ja-JP")
vr.init_spk()
def vr_listen_spk():
while True:
vr.listen_spk()
def vr_recognize_spk():
while True:
text = vr.recognize_spk()
print(text)
th_vr_listen_spk = threading.Thread(target=vr_listen_spk)
th_vr_recognize_spk = threading.Thread(target=vr_recognize_spk)
th_vr_listen_spk.start()
th_vr_recognize_spk.start()
while True:
time.sleep(60)

View File

@@ -1,4 +1,6 @@
import json import json
import datetime
import threading
def save_json(path, key, value): def save_json(path, key, value):
with open(path, "r") as fp: with open(path, "r") as fp:
@@ -6,3 +8,26 @@ def save_json(path, key, value):
json_data[key] = value json_data[key] = value
with open(path, "w") as fp: with open(path, "w") as fp:
json.dump(json_data, fp, indent=4) json.dump(json_data, fp, indent=4)
def print_textbox(textbox, message):
now = datetime.datetime.now()
now = now.strftime('%H:%M:%S')
textbox.configure(state='normal')
textbox.insert("end", f"[{now}]{message}\n")
textbox.configure(state='disabled')
textbox.see("end")
class thread_fnc(threading.Thread):
def __init__(self, fnc, *args, **kwargs):
super(thread_fnc, self).__init__(*args, **kwargs)
self.fnc = fnc
self._stop = threading.Event()
def stop(self):
self._stop.set()
def stopped(self):
return self._stop.isSet()
def run(self):
while True:
if self.stopped():
return
self.fnc()

View File

@@ -158,18 +158,17 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
self.tabview_config.tab("Translation"), self.tabview_config.tab("Translation"),
text="Output Language:", text="Output Language:",
fg_color="transparent", fg_color="transparent",
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY, overstrike=True) font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
) )
self.label_translation_output_language.grid(row=2, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") self.label_translation_output_language.grid(row=2, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
## select translation output source language ## select translation output source language
self.optionmenu_translation_output_source_language = customtkinter.CTkOptionMenu( self.optionmenu_translation_output_source_language = customtkinter.CTkOptionMenu(
self.tabview_config.tab("Translation"), self.tabview_config.tab("Translation"),
# command=self.optionmenu_translation_output_source_language_callback, command=self.optionmenu_translation_output_source_language_callback,
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
values=self.parent.translator.languages[self.parent.CHOICE_TRANSLATOR], values=self.parent.translator.languages[self.parent.CHOICE_TRANSLATOR],
variable=customtkinter.StringVar(value=self.parent.OUTPUT_SOURCE_LANG), variable=customtkinter.StringVar(value=self.parent.OUTPUT_SOURCE_LANG),
state="disabled",
) )
self.optionmenu_translation_output_source_language.grid(row=2, column=1, columnspan=1, padx=5, pady=5, sticky="nsew") self.optionmenu_translation_output_source_language.grid(row=2, column=1, columnspan=1, padx=5, pady=5, sticky="nsew")
@@ -185,11 +184,10 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
## select translation output target language ## select translation output target language
self.optionmenu_translation_output_target_language = customtkinter.CTkOptionMenu( self.optionmenu_translation_output_target_language = customtkinter.CTkOptionMenu(
self.tabview_config.tab("Translation"), self.tabview_config.tab("Translation"),
# command=self.optionmenu_translation_output_target_language_callback, command=self.optionmenu_translation_output_target_language_callback,
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
values=self.parent.translator.languages[self.parent.CHOICE_TRANSLATOR], values=self.parent.translator.languages[self.parent.CHOICE_TRANSLATOR],
variable=customtkinter.StringVar(value=self.parent.OUTPUT_TARGET_LANG), variable=customtkinter.StringVar(value=self.parent.OUTPUT_TARGET_LANG),
state="disabled",
) )
self.optionmenu_translation_output_target_language.grid(row=2, column=3, columnspan=1, padx=5, pady=5, sticky="nsew") self.optionmenu_translation_output_target_language.grid(row=2, column=3, columnspan=1, padx=5, pady=5, sticky="nsew")
@@ -204,7 +202,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
self.label_input_mic_device.grid(row=0, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") self.label_input_mic_device.grid(row=0, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
self.optionmenu_input_mic_device = customtkinter.CTkOptionMenu( self.optionmenu_input_mic_device = customtkinter.CTkOptionMenu(
self.tabview_config.tab("Transcription"), self.tabview_config.tab("Transcription"),
values=list(self.parent.vr.input_device_dict.keys()), values=[device["name"] for device in self.parent.vr.search_input_device()],
command=self.optionmenu_input_mic_device_callback, command=self.optionmenu_input_mic_device_callback,
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
variable=customtkinter.StringVar(value=self.parent.CHOICE_MIC_DEVICE) variable=customtkinter.StringVar(value=self.parent.CHOICE_MIC_DEVICE)
@@ -245,12 +243,12 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
) )
self.checkbox_input_mic_is_dynamic.grid(row=2, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew") self.checkbox_input_mic_is_dynamic.grid(row=2, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew")
if self.parent.ENABLE_MIC_IS_DYNAMIC is True: if self.parent.INPUT_MIC_IS_DYNAMIC is True:
self.checkbox_input_mic_is_dynamic.select() self.checkbox_input_mic_is_dynamic.select()
else: else:
self.checkbox_input_mic_is_dynamic.deselect() self.checkbox_input_mic_is_dynamic.deselect()
## slider input mic threshold ## entry input mic threshold
self.label_input_mic_threshold = customtkinter.CTkLabel( self.label_input_mic_threshold = customtkinter.CTkLabel(
self.tabview_config.tab("Transcription"), self.tabview_config.tab("Transcription"),
text="Input Mic Threshold:", text="Input Mic Threshold:",
@@ -258,30 +256,28 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY) font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
) )
self.label_input_mic_threshold.grid(row=3, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") self.label_input_mic_threshold.grid(row=3, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
self.slider_input_mic_threshold = customtkinter.CTkSlider( self.entry_input_mic_threshold = customtkinter.CTkEntry(
self.tabview_config.tab("Transcription"), self.tabview_config.tab("Transcription"),
from_=0, textvariable=customtkinter.StringVar(value=self.parent.INPUT_MIC_THRESHOLD),
to=300, font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
command=self.slider_input_mic_threshold_callback,
variable=tk.DoubleVar(value=self.parent.MIC_THRESHOLD),
) )
self.slider_input_mic_threshold.grid(row=3, column=1, columnspan=3 ,padx=5, pady=10, sticky="nsew") self.entry_input_mic_threshold.grid(row=3, column=1, columnspan=3 ,padx=5, pady=10, sticky="nsew")
self.entry_input_mic_threshold.bind("<Any-KeyRelease>", self.entry_input_mic_threshold_callback)
## optionmenu input speaker device ## optionmenu input speaker device
self.label_input_speaker_device = customtkinter.CTkLabel( self.label_input_speaker_device = customtkinter.CTkLabel(
self.tabview_config.tab("Transcription"), self.tabview_config.tab("Transcription"),
text="Input Speaker Device:", text="Input Speaker Device:",
fg_color="transparent", fg_color="transparent",
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY, overstrike=True) font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
) )
self.label_input_speaker_device.grid(row=4, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") self.label_input_speaker_device.grid(row=4, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
self.optionmenu_input_speaker_device = customtkinter.CTkOptionMenu( self.optionmenu_input_speaker_device = customtkinter.CTkOptionMenu(
self.tabview_config.tab("Transcription"), self.tabview_config.tab("Transcription"),
values=list(self.parent.vr.input_device_dict.keys()), values=self.parent.vr.search_output_device(),
# command=self.optionmenu_input_speaker_device_callback, command=self.optionmenu_input_speaker_device_callback,
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
variable=customtkinter.StringVar(value=self.parent.CHOICE_SPEAKER_DEVICE), variable=customtkinter.StringVar(value=self.parent.CHOICE_SPEAKER_DEVICE),
state="disabled"
) )
self.optionmenu_input_speaker_device.grid(row=4, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew") self.optionmenu_input_speaker_device.grid(row=4, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew")
@@ -290,59 +286,65 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
self.tabview_config.tab("Transcription"), self.tabview_config.tab("Transcription"),
text="Input Speaker Voice Language:", text="Input Speaker Voice Language:",
fg_color="transparent", fg_color="transparent",
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY, overstrike=True) font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
) )
self.label_input_speaker_voice_language.grid(row=5, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") self.label_input_speaker_voice_language.grid(row=5, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
self.optionmenu_input_speaker_voice_language = customtkinter.CTkOptionMenu( self.optionmenu_input_speaker_voice_language = customtkinter.CTkOptionMenu(
self.tabview_config.tab("Transcription"), self.tabview_config.tab("Transcription"),
values=list(self.parent.vr.languages), values=list(self.parent.vr.languages),
# command=self.optionmenu_input_speaker_voice_language_callback, command=self.optionmenu_input_speaker_voice_language_callback,
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
variable=customtkinter.StringVar(value=self.parent.INPUT_SPEAKER_VOICE_LANGUAGE), variable=customtkinter.StringVar(value=self.parent.INPUT_SPEAKER_VOICE_LANGUAGE),
state="disabled"
) )
self.optionmenu_input_speaker_voice_language.grid(row=5, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew") self.optionmenu_input_speaker_voice_language.grid(row=5, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew")
## checkbox input speaker in dynamic ## entry input speaker sampling rate
self.label_input_speaker_is_dynamic = customtkinter.CTkLabel( self.label_input_speaker_sampling_rate = customtkinter.CTkLabel(
self.tabview_config.tab("Transcription"), self.tabview_config.tab("Transcription"),
text="Input Speaker IsDynamic:", text="Input Speaker SamplingRate:",
fg_color="transparent", fg_color="transparent",
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY, overstrike=True) font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
) )
self.label_input_speaker_is_dynamic.grid(row=6, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") self.label_input_speaker_sampling_rate.grid(row=6, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
self.checkbox_input_speaker_is_dynamic = customtkinter.CTkCheckBox( self.entry_input_speaker_sampling_rate = customtkinter.CTkEntry(
self.tabview_config.tab("Transcription"), self.tabview_config.tab("Transcription"),
text="", textvariable=customtkinter.StringVar(value=self.parent.INPUT_SPEAKER_SAMPLING_RATE),
onvalue=True, font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
offvalue=False,
# command=self.checkbox_input_speaker_is_dynamic_callback,
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY),
state="disabled"
) )
self.checkbox_input_speaker_is_dynamic.grid(row=6, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew") self.entry_input_speaker_sampling_rate.grid(row=6, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew")
if self.parent.ENABLE_SPEAKER_IS_DYNAMIC is True: self.entry_input_speaker_sampling_rate.bind("<Any-KeyRelease>", self.entry_input_speaker_sampling_rate_callback)
self.checkbox_input_speaker_is_dynamic.select()
else:
self.checkbox_input_speaker_is_dynamic.deselect()
## slider input speaker threshold ## entry input speaker interval
self.label_input_speaker_threshold = customtkinter.CTkLabel( self.label_input_speaker_interval = customtkinter.CTkLabel(
self.tabview_config.tab("Transcription"), self.tabview_config.tab("Transcription"),
text="Input Speaker Threshold:", text="Input Speaker Interval:",
fg_color="transparent", fg_color="transparent",
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY, overstrike=True) font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
) )
self.label_input_speaker_threshold.grid(row=7, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") self.label_input_speaker_interval.grid(row=7, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
self.slider_input_speaker_threshold = customtkinter.CTkSlider( self.entry_input_speaker_interval = customtkinter.CTkEntry(
self.tabview_config.tab("Transcription"), self.tabview_config.tab("Transcription"),
from_=0, textvariable=customtkinter.StringVar(value=self.parent.INPUT_SPEAKER_INTERVAL),
to=300, font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
# command=self.slider_input_speaker_threshold_callback,
variable=tk.DoubleVar(value=self.parent.SPEAKER_THRESHOLD),
state="disabled"
) )
self.slider_input_speaker_threshold.grid(row=7, column=1, columnspan=3 ,padx=5, pady=10, sticky="nsew") self.entry_input_speaker_interval.grid(row=7, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew")
self.entry_input_speaker_interval.bind("<Any-KeyRelease>", self.entry_input_speaker_interval_callback)
## entry input speaker buffer size
self.label_input_speaker_buffer_size = customtkinter.CTkLabel(
self.tabview_config.tab("Transcription"),
text="Input Speaker BufferSize:",
fg_color="transparent",
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
)
self.label_input_speaker_buffer_size.grid(row=8, column=0, columnspan=1, padx=5, pady=5, sticky="nsw")
self.entry_input_speaker_buffer_size = customtkinter.CTkEntry(
self.tabview_config.tab("Transcription"),
textvariable=customtkinter.StringVar(value=self.parent.INPUT_SPEAKER_BUFFER_SIZE),
font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY)
)
self.entry_input_speaker_buffer_size.grid(row=8, column=1, columnspan=3 ,padx=5, pady=5, sticky="nsew")
self.entry_input_speaker_buffer_size.bind("<Any-KeyRelease>", self.entry_input_speaker_buffer_size_callback)
# tab Parameter # tab Parameter
## entry ip address ## entry ip address
@@ -490,8 +492,12 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
self.optionmenu_input_speaker_device.configure(font=customtkinter.CTkFont(family=choice)) self.optionmenu_input_speaker_device.configure(font=customtkinter.CTkFont(family=choice))
self.label_input_speaker_voice_language.configure(font=customtkinter.CTkFont(family=choice)) self.label_input_speaker_voice_language.configure(font=customtkinter.CTkFont(family=choice))
self.optionmenu_input_speaker_voice_language.configure(font=customtkinter.CTkFont(family=choice)) self.optionmenu_input_speaker_voice_language.configure(font=customtkinter.CTkFont(family=choice))
self.label_input_speaker_is_dynamic.configure(font=customtkinter.CTkFont(family=choice)) self.label_input_speaker_sampling_rate.configure(font=customtkinter.CTkFont(family=choice))
self.label_input_speaker_threshold.configure(font=customtkinter.CTkFont(family=choice)) self.entry_input_speaker_sampling_rate.configure(font=customtkinter.CTkFont(family=choice))
self.label_input_speaker_interval.configure(font=customtkinter.CTkFont(family=choice))
self.entry_input_speaker_interval.configure(font=customtkinter.CTkFont(family=choice))
self.label_input_speaker_buffer_size.configure(font=customtkinter.CTkFont(family=choice))
self.entry_input_speaker_buffer_size.configure(font=customtkinter.CTkFont(family=choice))
# tab Parameter # tab Parameter
self.label_ip_address.configure(font=customtkinter.CTkFont(family=choice)) self.label_ip_address.configure(font=customtkinter.CTkFont(family=choice))
@@ -505,9 +511,12 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
# main window # main window
self.parent.checkbox_translation.configure(font=customtkinter.CTkFont(family=choice)) self.parent.checkbox_translation.configure(font=customtkinter.CTkFont(family=choice))
self.parent.checkbox_transcription.configure(font=customtkinter.CTkFont(family=choice)) self.parent.checkbox_transcription_send.configure(font=customtkinter.CTkFont(family=choice))
self.parent.checkbox_transcription_receive.configure(font=customtkinter.CTkFont(family=choice))
self.parent.checkbox_foreground.configure(font=customtkinter.CTkFont(family=choice)) self.parent.checkbox_foreground.configure(font=customtkinter.CTkFont(family=choice))
self.parent.textbox_message_log.configure(font=customtkinter.CTkFont(family=choice)) self.parent.textbox_message_send_log.configure(font=customtkinter.CTkFont(family=choice))
self.parent.textbox_message_receive_log.configure(font=customtkinter.CTkFont(family=choice))
self.parent.textbox_message_system_log.configure(font=customtkinter.CTkFont(family=choice))
self.parent.entry_message_box.configure(font=customtkinter.CTkFont(family=choice)) self.parent.entry_message_box.configure(font=customtkinter.CTkFont(family=choice))
self.parent.tabview_logs._segmented_button.configure(font=customtkinter.CTkFont(family=choice)) self.parent.tabview_logs._segmented_button.configure(font=customtkinter.CTkFont(family=choice))
@@ -522,10 +531,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
def optionmenu_translation_translator_callback(self, choice): def optionmenu_translation_translator_callback(self, choice):
if self.parent.translator.authentication(choice, self.parent.AUTH_KEYS[choice]) is False: if self.parent.translator.authentication(choice, self.parent.AUTH_KEYS[choice]) is False:
self.parent.textbox_message_log.configure(state='normal') utils.print_textbox(self.parent.textbox_message_system_log, f"[error] Auth Key or language setting is incorrect")
self.parent.textbox_message_log.insert("end", f"[ERROR]Auth Keyを設定してないか間違っています\n")
self.parent.textbox_message_log.configure(state='disabled')
self.parent.textbox_message_log.see("end")
else: else:
self.optionmenu_translation_input_source_language.configure( self.optionmenu_translation_input_source_language.configure(
values=self.parent.translator.languages[choice], values=self.parent.translator.languages[choice],
@@ -533,13 +539,23 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
self.optionmenu_translation_input_target_language.configure( self.optionmenu_translation_input_target_language.configure(
values=self.parent.translator.languages[choice], values=self.parent.translator.languages[choice],
variable=customtkinter.StringVar(value=self.parent.translator.languages[choice][1])) variable=customtkinter.StringVar(value=self.parent.translator.languages[choice][1]))
self.optionmenu_translation_output_source_language.configure(
values=self.parent.translator.languages[choice],
variable=customtkinter.StringVar(value=self.parent.translator.languages[choice][1]))
self.optionmenu_translation_output_target_language.configure(
values=self.parent.translator.languages[choice],
variable=customtkinter.StringVar(value=self.parent.translator.languages[choice][0]))
self.parent.CHOICE_TRANSLATOR = choice self.parent.CHOICE_TRANSLATOR = choice
self.parent.INPUT_SOURCE_LANG = self.parent.translator.languages[choice][0] self.parent.INPUT_SOURCE_LANG = self.parent.translator.languages[choice][0]
self.parent.INPUT_TARGET_LANG = self.parent.translator.languages[choice][1] self.parent.INPUT_TARGET_LANG = self.parent.translator.languages[choice][1]
self.parent.OUTPUT_SOURCE_LANG = self.parent.translator.languages[choice][1]
self.parent.OUTPUT_TARGET_LANG = self.parent.translator.languages[choice][0]
utils.save_json(self.parent.PATH_CONFIG, "CHOICE_TRANSLATOR", self.parent.CHOICE_TRANSLATOR) utils.save_json(self.parent.PATH_CONFIG, "CHOICE_TRANSLATOR", self.parent.CHOICE_TRANSLATOR)
utils.save_json(self.parent.PATH_CONFIG, "INPUT_SOURCE_LANG", self.parent.INPUT_SOURCE_LANG) utils.save_json(self.parent.PATH_CONFIG, "INPUT_SOURCE_LANG", self.parent.INPUT_SOURCE_LANG)
utils.save_json(self.parent.PATH_CONFIG, "INPUT_TARGET_LANG", self.parent.INPUT_TARGET_LANG) utils.save_json(self.parent.PATH_CONFIG, "INPUT_TARGET_LANG", self.parent.INPUT_TARGET_LANG)
utils.save_json(self.parent.PATH_CONFIG, "OUTPUT_SOURCE_LANG", self.parent.OUTPUT_SOURCE_LANG)
utils.save_json(self.parent.PATH_CONFIG, "OUTPUT_TARGET_LANG", self.parent.OUTPUT_TARGET_LANG)
def optionmenu_translation_input_source_language_callback(self, choice): def optionmenu_translation_input_source_language_callback(self, choice):
self.parent.INPUT_SOURCE_LANG = choice self.parent.INPUT_SOURCE_LANG = choice
@@ -549,6 +565,14 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
self.parent.INPUT_TARGET_LANG = choice self.parent.INPUT_TARGET_LANG = choice
utils.save_json(self.parent.PATH_CONFIG, "INPUT_TARGET_LANG", self.parent.INPUT_TARGET_LANG) utils.save_json(self.parent.PATH_CONFIG, "INPUT_TARGET_LANG", self.parent.INPUT_TARGET_LANG)
def optionmenu_translation_output_source_language_callback(self, choice):
self.parent.OUTPUT_SOURCE_LANG = choice
utils.save_json(self.parent.PATH_CONFIG, "OUTPUT_SOURCE_LANG", self.parent.OUTPUT_SOURCE_LANG)
def optionmenu_translation_output_target_language_callback(self, choice):
self.parent.OUTPUT_TARGET_LANG = choice
utils.save_json(self.parent.PATH_CONFIG, "OUTPUT_TARGET_LANG", self.parent.OUTPUT_TARGET_LANG)
def optionmenu_input_mic_device_callback(self, choice): def optionmenu_input_mic_device_callback(self, choice):
self.parent.CHOICE_MIC_DEVICE = choice self.parent.CHOICE_MIC_DEVICE = choice
utils.save_json(self.parent.PATH_CONFIG, "CHOICE_MIC_DEVICE", self.parent.CHOICE_MIC_DEVICE) utils.save_json(self.parent.PATH_CONFIG, "CHOICE_MIC_DEVICE", self.parent.CHOICE_MIC_DEVICE)
@@ -559,14 +583,32 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
def checkbox_input_mic_is_dynamic_callback(self): def checkbox_input_mic_is_dynamic_callback(self):
value = self.checkbox_input_mic_is_dynamic.get() value = self.checkbox_input_mic_is_dynamic.get()
self.parent.ENABLE_MIC_IS_DYNAMIC = value self.parent.INPUT_MIC_IS_DYNAMIC = value
utils.save_json(self.parent.PATH_CONFIG, "ENABLE_MIC_IS_DYNAMIC", self.parent.ENABLE_MIC_IS_DYNAMIC) utils.save_json(self.parent.PATH_CONFIG, "INPUT_MIC_IS_DYNAMIC", self.parent.INPUT_MIC_IS_DYNAMIC)
self.parent.vr.init_mic(threshold=self.parent.MIC_THRESHOLD, is_dynamic=self.parent.ENABLE_MIC_IS_DYNAMIC)
def slider_input_mic_threshold_callback(self, value): def entry_input_mic_threshold_callback(self, event):
self.parent.MIC_THRESHOLD = value self.parent.INPUT_MIC_THRESHOLD = int(self.entry_input_mic_threshold.get())
utils.save_json(self.parent.PATH_CONFIG, "MIC_THRESHOLD", self.parent.MIC_THRESHOLD) utils.save_json(self.parent.PATH_CONFIG, "INPUT_MIC_THRESHOLD", self.parent.INPUT_MIC_THRESHOLD)
self.parent.vr.init_mic(threshold=self.parent.MIC_THRESHOLD, is_dynamic=self.parent.ENABLE_MIC_IS_DYNAMIC)
def optionmenu_input_speaker_device_callback(self, choice):
self.parent.CHOICE_SPEAKER_DEVICE = choice
utils.save_json(self.parent.PATH_CONFIG, "CHOICE_SPEAKER_DEVICE", self.parent.CHOICE_SPEAKER_DEVICE)
def optionmenu_input_speaker_voice_language_callback(self, choice):
self.parent.INPUT_SPEAKER_VOICE_LANGUAGE = choice
utils.save_json(self.parent.PATH_CONFIG, "INPUT_SPEAKER_VOICE_LANGUAGE", self.parent.INPUT_SPEAKER_VOICE_LANGUAGE)
def entry_input_speaker_sampling_rate_callback(self, event):
self.parent.INPUT_SPEAKER_SAMPLING_RATE = int(self.entry_input_speaker_sampling_rate.get())
utils.save_json(self.parent.PATH_CONFIG, "INPUT_SPEAKER_SAMPLING_RATE", self.parent.INPUT_SPEAKER_SAMPLING_RATE)
def entry_input_speaker_interval_callback(self, event):
self.parent.INPUT_SPEAKER_INTERVAL = int(self.entry_input_speaker_interval.get())
utils.save_json(self.parent.PATH_CONFIG, "INPUT_SPEAKER_INTERVAL", self.parent.INPUT_SPEAKER_INTERVAL)
def entry_input_speaker_buffer_size_callback(self, event):
self.parent.INPUT_SPEAKER_BUFFER_SIZE = int(self.entry_input_speaker_buffer_size.get())
utils.save_json(self.parent.PATH_CONFIG, "INPUT_SPEAKER_BUFFER_SIZE", self.parent.INPUT_SPEAKER_BUFFER_SIZE)
def update_ip_address(self): def update_ip_address(self):
value = self.entry_ip_address.get() value = self.entry_ip_address.get()
@@ -583,19 +625,11 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel):
def update_authkey(self): def update_authkey(self):
value = self.entry_authkey.get() value = self.entry_authkey.get()
if len(value) > 0: if len(value) > 0:
self.parent.textbox_message_log.configure(state='normal')
self.parent.textbox_message_log.delete("0.0", "end")
self.parent.textbox_message_log.configure(state='disabled')
self.parent.textbox_message_log.see("end")
if self.parent.translator.authentication(self.parent.CHOICE_TRANSLATOR, self.parent.AUTH_KEYS[self.parent.CHOICE_TRANSLATOR]) is True: if self.parent.translator.authentication(self.parent.CHOICE_TRANSLATOR, self.parent.AUTH_KEYS[self.parent.CHOICE_TRANSLATOR]) is True:
self.parent.AUTH_KEYS["DeepL(auth)"] = value self.parent.AUTH_KEYS["DeepL(auth)"] = value
utils.save_json(self.parent.PATH_CONFIG, "AUTH_KEYS", self.parent.AUTH_KEYS) utils.save_json(self.parent.PATH_CONFIG, "AUTH_KEYS", self.parent.AUTH_KEYS)
else: else:
self.parent.textbox_message_log.configure(state='normal') utils.print_textbox(self.parent.textbox_message_system_log, f"[error] Auth Key or language setting is incorrect")
self.parent.textbox_message_log.insert("end", f"[ERROR]Auth Keyを設定してないか間違っています\n")
self.parent.textbox_message_log.configure(state='disabled')
self.parent.textbox_message_log.see("end")
def update_message_format(self): def update_message_format(self):
value = self.entry_message_format.get() value = self.entry_message_format.get()