update send transcript

This commit is contained in:
misyaguziya
2023-06-30 17:46:35 +09:00
parent aca449349a
commit 98cf5a81f8
4 changed files with 56 additions and 45 deletions

View File

@@ -11,6 +11,7 @@ class BaseRecorder:
self.recorder = sr.Recognizer() self.recorder = sr.Recognizer()
self.recorder.energy_threshold = ENERGY_THRESHOLD self.recorder.energy_threshold = ENERGY_THRESHOLD
self.recorder.dynamic_energy_threshold = DYNAMIC_ENERGY_THRESHOLD self.recorder.dynamic_energy_threshold = DYNAMIC_ENERGY_THRESHOLD
self.stop = None
if source is None: if source is None:
raise ValueError("audio source can't be None") raise ValueError("audio source can't be None")
@@ -25,7 +26,7 @@ class BaseRecorder:
def record_callback(_, audio:sr.AudioData) -> None: def record_callback(_, audio:sr.AudioData) -> None:
audio_queue.put((audio.get_raw_data(), datetime.now())) audio_queue.put((audio.get_raw_data(), datetime.now()))
self.recorder.listen_in_background(self.source, record_callback, phrase_time_limit=RECORD_TIMEOUT) self.stop = self.recorder.listen_in_background(self.source, record_callback, phrase_time_limit=RECORD_TIMEOUT)
class SelectedMicRecorder(BaseRecorder): class SelectedMicRecorder(BaseRecorder):
def __init__(self, device): def __init__(self, device):

View File

@@ -1,6 +1,4 @@
import io import io
import os
import tempfile
import threading import threading
import wave import wave
import custom_speech_recognition as sr import custom_speech_recognition as sr
@@ -28,7 +26,7 @@ class AudioTranscriber:
} }
def transcribe_audio_queue(self, audio_queue): def transcribe_audio_queue(self, audio_queue):
while True: # while True:
audio, time_spoken = audio_queue.get() audio, time_spoken = audio_queue.get()
self.update_last_sample_and_phrase_status(audio, time_spoken) self.update_last_sample_and_phrase_status(audio, time_spoken)
@@ -87,7 +85,6 @@ class AudioTranscriber:
transcript[0] = text transcript[0] = text
def get_transcript(self): def get_transcript(self):
print(self.transcript_data)
if len(self.transcript_data) > 0: if len(self.transcript_data) > 0:
text = self.transcript_data.pop(-1) text = self.transcript_data.pop(-1)
else: else:

61
VRCT.py
View File

@@ -1,5 +1,7 @@
import os import os
import json import json
import queue
import time
import tkinter as tk import tkinter as tk
import customtkinter import customtkinter
from PIL import Image from PIL import Image
@@ -10,6 +12,10 @@ import transcription
import osc_tools import osc_tools
import window_config import window_config
import window_information import window_information
import languages
import audio_utils
import AudioRecorder
import AudioTranscriber
class App(customtkinter.CTk): class App(customtkinter.CTk):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@@ -33,17 +39,17 @@ class App(customtkinter.CTk):
self.FONT_FAMILY = "Yu Gothic UI" self.FONT_FAMILY = "Yu Gothic UI"
## Translation ## Translation
self.CHOICE_TRANSLATOR = "DeepL(web)" self.CHOICE_TRANSLATOR = "DeepL(web)"
self.INPUT_SOURCE_LANG = "Japanese" self.INPUT_SOURCE_LANG = list(languages.deepl_translate_lang.keys())[0]
self.INPUT_TARGET_LANG = "English" self.INPUT_TARGET_LANG = list(languages.deepl_translate_lang.keys())[1]
self.OUTPUT_SOURCE_LANG = "English" self.OUTPUT_SOURCE_LANG = list(languages.deepl_translate_lang.keys())[1]
self.OUTPUT_TARGET_LANG = "Japanese" self.OUTPUT_TARGET_LANG = list(languages.deepl_translate_lang.keys())[0]
## Transcription ## Transcription
self.CHOICE_MIC_DEVICE = self.vr.search_default_device()[0] self.CHOICE_MIC_DEVICE = self.vr.search_default_device()[0]
self.INPUT_MIC_VOICE_LANGUAGE = "Japanese Japan" self.INPUT_MIC_VOICE_LANGUAGE = list(languages.recognize_lang.keys())[0]
self.INPUT_MIC_IS_DYNAMIC = False self.INPUT_MIC_IS_DYNAMIC = False
self.INPUT_MIC_THRESHOLD = 300 self.INPUT_MIC_THRESHOLD = 300
self.CHOICE_SPEAKER_DEVICE = self.vr.search_default_device()[1] self.CHOICE_SPEAKER_DEVICE = self.vr.search_default_device()[1]
self.INPUT_SPEAKER_VOICE_LANGUAGE = "English United States" self.INPUT_SPEAKER_VOICE_LANGUAGE = list(languages.recognize_lang.keys())[1]
self.INPUT_SPEAKER_INTERVAL = 4 self.INPUT_SPEAKER_INTERVAL = 4
## Parameter ## Parameter
@@ -395,25 +401,31 @@ class App(customtkinter.CTk):
def checkbox_transcription_send_callback(self): def checkbox_transcription_send_callback(self):
self.ENABLE_TRANSCRIPTION_SEND = self.checkbox_transcription_send.get() self.ENABLE_TRANSCRIPTION_SEND = self.checkbox_transcription_send.get()
if self.ENABLE_TRANSCRIPTION_SEND is True: if self.ENABLE_TRANSCRIPTION_SEND is True:
self.mic_audio_queue = queue.Queue()
mic_device = audio_utils.get_default_input_device()
self.mic_audio_recorder = AudioRecorder.SelectedMicRecorder(mic_device)
self.mic_audio_recorder.record_into_queue(self.mic_audio_queue)
self.mic_transcriber = AudioTranscriber.AudioTranscriber(
speaker=False,
source=self.mic_audio_recorder.source,
language=languages.recognize_lang[self.INPUT_MIC_VOICE_LANGUAGE]
)
self.mic_transcribe = utils.thread_fnc(self.mic_transcriber.transcribe_audio_queue, args=(self.mic_audio_queue,))
self.mic_transcribe.daemon = True
self.mic_transcribe.start()
self.print_transcript = utils.thread_fnc(self.mic_transcript_to_chatbox)
self.print_transcript.start()
utils.print_textbox(self.textbox_message_log, "Start voice2chatbox", "INFO") utils.print_textbox(self.textbox_message_log, "Start voice2chatbox", "INFO")
utils.print_textbox(self.textbox_message_system_log, "Start voice2chatbox", "INFO") utils.print_textbox(self.textbox_message_system_log, "Start voice2chatbox", "INFO")
# start threading
self.vr.set_mic(
device_name=self.CHOICE_MIC_DEVICE,
threshold=int(self.INPUT_MIC_THRESHOLD),
is_dynamic=self.INPUT_MIC_IS_DYNAMIC,
language=self.INPUT_MIC_VOICE_LANGUAGE,
)
self.vr.init_mic()
self.th_vr_listen_mic = utils.thread_fnc(self.vr_listen_mic)
self.th_vr_recognize_mic = utils.thread_fnc(self.vr_recognize_mic)
self.th_vr_listen_mic.start()
self.th_vr_recognize_mic.start()
else: else:
if isinstance(self.th_vr_listen_mic, utils.thread_fnc): if isinstance(self.print_transcript, utils.thread_fnc):
self.th_vr_listen_mic.stop() self.print_transcript.stop()
if isinstance(self.th_vr_recognize_mic, utils.thread_fnc): if isinstance(self.mic_transcribe, utils.thread_fnc):
self.th_vr_recognize_mic.stop() self.mic_transcribe.stop()
if self.mic_audio_recorder.stop != None:
self.mic_audio_recorder.stop()
self.mic_audio_recorder.stop = None
utils.print_textbox(self.textbox_message_log, "Stop voice2chatbox", "INFO") utils.print_textbox(self.textbox_message_log, "Stop voice2chatbox", "INFO")
utils.print_textbox(self.textbox_message_system_log, "Stop voice2chatbox", "INFO") utils.print_textbox(self.textbox_message_system_log, "Stop voice2chatbox", "INFO")
@@ -448,8 +460,8 @@ class App(customtkinter.CTk):
if self.checkbox_transcription_send.get() is True: if self.checkbox_transcription_send.get() is True:
self.vr.listen_mic() self.vr.listen_mic()
def vr_recognize_mic(self): def mic_transcript_to_chatbox(self):
message = self.vr.recognize_mic() message = self.mic_transcriber.get_transcript()
if len(message) > 0: if len(message) > 0:
# translate # translate
if self.checkbox_translation.get() is False: if self.checkbox_translation.get() is False:
@@ -473,6 +485,7 @@ class App(customtkinter.CTk):
# update textbox message log # update textbox message log
utils.print_textbox(self.textbox_message_log, f"{voice_message}", "SEND") utils.print_textbox(self.textbox_message_log, f"{voice_message}", "SEND")
utils.print_textbox(self.textbox_message_send_log, f"{voice_message}", "SEND") utils.print_textbox(self.textbox_message_send_log, f"{voice_message}", "SEND")
time.sleep(1)
def vr_listen_spk(self): def vr_listen_spk(self):
if self.checkbox_transcription_receive.get() is True: if self.checkbox_transcription_receive.get() is True:

View File

@@ -38,4 +38,4 @@ class thread_fnc(threading.Thread):
while True: while True:
if self.stopped(): if self.stopped():
return return
self.fnc() self.fnc(*self._args, **self._kwargs)