update AudioTranscriber

This commit is contained in:
misyaguziya
2023-06-29 18:51:37 +09:00
parent 868c84a9eb
commit c89da3e7ae
2 changed files with 30 additions and 14 deletions

View File

@@ -1,14 +1,18 @@
import os
from io import BytesIO
import tempfile
import threading import threading
import wave
import custom_speech_recognition as sr import custom_speech_recognition as sr
from datetime import timedelta from datetime import timedelta
from heapq import merge import pyaudiowpatch as pyaudio
PHRASE_TIMEOUT = 3.05 PHRASE_TIMEOUT = 3.05
MAX_PHRASES = 10 MAX_PHRASES = 5
class AudioTranscriber: class AudioTranscriber:
def __init__(self, source, language): def __init__(self, speaker, source, language):
self.speaker = speaker
self.language = language self.language = language
self.transcript_data = [] self.transcript_data = []
self.transcript_changed_event = threading.Event() self.transcript_changed_event = threading.Event()
@@ -20,6 +24,7 @@ class AudioTranscriber:
"last_sample": bytes(), "last_sample": bytes(),
"last_spoken": None, "last_spoken": None,
"new_phrase": True, "new_phrase": True,
"process_data_func": self.process_speaker_data if speaker else self.process_speaker_data
} }
def transcribe_audio_queue(self, audio_queue): def transcribe_audio_queue(self, audio_queue):
@@ -29,12 +34,14 @@ class AudioTranscriber:
text = '' text = ''
try: try:
audio_data = self.process_data() fd, path = tempfile.mkstemp(suffix=".wav")
os.close(fd)
audio_data = self.audio_sources["process_data_func"](path)
text = self.audio_recognizer.recognize_google(audio_data, language=self.language) text = self.audio_recognizer.recognize_google(audio_data, language=self.language)
except Exception as e: except Exception as e:
pass pass
finally: finally:
pass os.unlink(path)
if text != '': if text != '':
self.update_transcript(text) self.update_transcript(text)
@@ -50,11 +57,21 @@ class AudioTranscriber:
source_info["last_sample"] += data source_info["last_sample"] += data
source_info["last_spoken"] = time_spoken source_info["last_spoken"] = time_spoken
def process_data(self): def process_mic_data(self):
print(self.audio_sources["last_sample"])
audio_data = sr.AudioData(self.audio_sources["last_sample"], self.audio_sources["sample_rate"], self.audio_sources["sample_width"]) audio_data = sr.AudioData(self.audio_sources["last_sample"], self.audio_sources["sample_rate"], self.audio_sources["sample_width"])
return audio_data return audio_data
def process_speaker_data(self, path):
with wave.open(path, 'wb') as wf:
wf.setnchannels(self.audio_sources["channels"])
p = pyaudio.PyAudio()
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
wf.setframerate(self.audio_sources["sample_rate"])
wf.writeframes(self.audio_sources["last_sample"])
with sr.AudioFile(path) as source:
audio = self.audio_recognizer.record(source)
return audio
def update_transcript(self, text): def update_transcript(self, text):
source_info = self.audio_sources source_info = self.audio_sources
transcript = self.transcript_data transcript = self.transcript_data

View File

@@ -6,12 +6,11 @@ import AudioRecorder
import audio_utils import audio_utils
mic_audio_queue = queue.Queue() mic_audio_queue = queue.Queue()
mic_device = audio_utils.get_default_input_device() mic_device = audio_utils.get_default_input_device()
mic_audio_recorder = AudioRecorder.SelectedMicRecorder(mic_device) mic_audio_recorder = AudioRecorder.SelectedMicRecorder(mic_device)
mic_audio_recorder.record_into_queue(mic_audio_queue) mic_audio_recorder.record_into_queue(mic_audio_queue)
mic_transcriber = AudioTranscriber.AudioTranscriber(source=mic_audio_recorder.source, language="ja-JP") mic_transcriber = AudioTranscriber.AudioTranscriber(speaker=False, source=mic_audio_recorder.source, language="ja-JP")
mic_transcribe = threading.Thread(target=mic_transcriber.transcribe_audio_queue, args=(mic_audio_queue,)) mic_transcribe = threading.Thread(target=mic_transcriber.transcribe_audio_queue, args=(mic_audio_queue,))
mic_transcribe.daemon = True mic_transcribe.daemon = True
mic_transcribe.start() mic_transcribe.start()
@@ -23,7 +22,7 @@ spk_device = audio_utils.get_default_output_device()
spk_audio_recorder = AudioRecorder.SelectedSpeakerRecorder(spk_device) spk_audio_recorder = AudioRecorder.SelectedSpeakerRecorder(spk_device)
spk_audio_recorder.record_into_queue(spk_audio_queue) spk_audio_recorder.record_into_queue(spk_audio_queue)
spk_transcriber = AudioTranscriber.AudioTranscriber(source=mic_audio_recorder.source, language="ja-JP") spk_transcriber = AudioTranscriber.AudioTranscriber(speaker=True, source=spk_audio_recorder.source, language="ja-JP")
spk_transcribe = threading.Thread(target=spk_transcriber.transcribe_audio_queue, args=(spk_audio_queue,)) spk_transcribe = threading.Thread(target=spk_transcriber.transcribe_audio_queue, args=(spk_audio_queue,))
spk_transcribe.daemon = True spk_transcribe.daemon = True
spk_transcribe.start() spk_transcribe.start()
@@ -32,7 +31,7 @@ while True:
text = mic_transcriber.get_transcript() text = mic_transcriber.get_transcript()
if len(text) > 0: if len(text) > 0:
print("mic:", text) print("mic:", text)
# text = spk_transcriber.get_transcript() text = spk_transcriber.get_transcript()
# if len(text) > 0: if len(text) > 0:
# print("spk:", text) print("spk:", text)
time.sleep(0.1) time.sleep(0.1)