diff --git a/VRCT.py b/VRCT.py index 5e74690b..2fbe8467 100644 --- a/VRCT.py +++ b/VRCT.py @@ -259,7 +259,8 @@ class App(customtkinter.CTk): ## set transcription instance self.vr = transcription.VoiceRecognizer() - self.CHOICE_MIC_DEVICE = self.CHOICE_MIC_DEVICE if self.CHOICE_MIC_DEVICE is not None else list(self.vr.input_device_dict.keys())[0] + self.CHOICE_MIC_DEVICE = self.CHOICE_MIC_DEVICE if self.CHOICE_MIC_DEVICE is not None else self.vr.search_default_device_index()[0]["name"] + self.CHOICE_SPEAKER_DEVICE = self.CHOICE_SPEAKER_DEVICE if self.CHOICE_SPEAKER_DEVICE is not None else self.vr.search_default_device_index()[1]["name"] ## set checkbox enable translation if self.ENABLE_TRANSLATION: diff --git a/transcription.py b/transcription.py index 7b0eee17..ceddfb5c 100644 --- a/transcription.py +++ b/transcription.py @@ -1,10 +1,9 @@ -import pyaudio +import sounddevice as sd import speech_recognition as sr # VoiceRecognizer class VoiceRecognizer(): def __init__(self): - self.input_device_dict = self.search_input_device() self.r = sr.Recognizer() self.mic = None self.languages = [ @@ -19,25 +18,37 @@ class VoiceRecognizer(): ] def search_input_device(self): - pa = pyaudio.PyAudio() - input_device_dict = {} + device_list = sd.query_devices() + input_device_list = [] - mic_cnt = 1 - for i in range(pa.get_device_count()): - device = pa.get_device_info_by_index(i) - try: - device["name"] = device["name"].encode('shift_jis').decode('utf-8') - except: - device["name"] = device["name"].encode('utf-8').decode('utf-8') - if device["maxInputChannels"] > 0: - input_device_dict[f'No.{mic_cnt}:{device["name"]}'] = device["index"] - mic_cnt += 1 - pa.terminate() - return input_device_dict + for device in device_list: + if device["max_input_channels"] > 0: + input_device_list.append({"name": device["name"], "index": device["index"]}) + + return input_device_list + + def search_output_device(self): + device_list = sd.query_devices() + output_device_list = [] + + for device in device_list: + if device["max_output_channels"] > 0: + output_device_list.append({"name": device["name"], "index": device["index"]}) + + return output_device_list + + def search_default_device_index(self): + device_list = sd.query_devices() + default_device_list = [] + for i in sd.default.device: + default_device_list.append({"name": device_list[i]["name"], "index": device_list[i]["index"]}) + return default_device_list def set_mic(self, device_name, threshold=50, is_dynamic=False): - if device_name in [v for v in self.input_device_dict.keys()]: - index = self.input_device_dict[device_name] + input_device_list = self.search_input_device() + if device_name in [input_device["name"] for input_device in input_device_list]: + index = [device["index"] for device in input_device_list if device["name"] == device_name][0] + self.mic = sr.Microphone(device_index=index) self.r.energy_threshold = threshold if is_dynamic: diff --git a/window_config.py b/window_config.py index e95b01b4..a5ef6daf 100644 --- a/window_config.py +++ b/window_config.py @@ -204,7 +204,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): self.label_input_mic_device.grid(row=0, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") self.optionmenu_input_mic_device = customtkinter.CTkOptionMenu( self.tabview_config.tab("Transcription"), - values=list(self.parent.vr.input_device_dict.keys()), + values=[device["name"] for device in self.parent.vr.search_input_device()], command=self.optionmenu_input_mic_device_callback, font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), variable=customtkinter.StringVar(value=self.parent.CHOICE_MIC_DEVICE) @@ -277,7 +277,7 @@ class ToplevelWindowConfig(customtkinter.CTkToplevel): self.label_input_speaker_device.grid(row=4, column=0, columnspan=1, padx=5, pady=5, sticky="nsw") self.optionmenu_input_speaker_device = customtkinter.CTkOptionMenu( self.tabview_config.tab("Transcription"), - values=list(self.parent.vr.input_device_dict.keys()), + values=[device["name"] for device in self.parent.vr.search_output_device()], # command=self.optionmenu_input_speaker_device_callback, font=customtkinter.CTkFont(family=self.parent.FONT_FAMILY), variable=customtkinter.StringVar(value=self.parent.CHOICE_SPEAKER_DEVICE),