update AudioTranscriber
This commit is contained in:
@@ -1,14 +1,18 @@
|
|||||||
|
import os
|
||||||
|
from io import BytesIO
|
||||||
|
import tempfile
|
||||||
import threading
|
import threading
|
||||||
|
import wave
|
||||||
import custom_speech_recognition as sr
|
import custom_speech_recognition as sr
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from heapq import merge
|
import pyaudiowpatch as pyaudio
|
||||||
|
|
||||||
PHRASE_TIMEOUT = 3.05
|
PHRASE_TIMEOUT = 3.05
|
||||||
MAX_PHRASES = 10
|
MAX_PHRASES = 5
|
||||||
|
|
||||||
class AudioTranscriber:
|
class AudioTranscriber:
|
||||||
def __init__(self, source, language):
|
def __init__(self, speaker, source, language):
|
||||||
|
self.speaker = speaker
|
||||||
self.language = language
|
self.language = language
|
||||||
self.transcript_data = []
|
self.transcript_data = []
|
||||||
self.transcript_changed_event = threading.Event()
|
self.transcript_changed_event = threading.Event()
|
||||||
@@ -20,6 +24,7 @@ class AudioTranscriber:
|
|||||||
"last_sample": bytes(),
|
"last_sample": bytes(),
|
||||||
"last_spoken": None,
|
"last_spoken": None,
|
||||||
"new_phrase": True,
|
"new_phrase": True,
|
||||||
|
"process_data_func": self.process_speaker_data if speaker else self.process_speaker_data
|
||||||
}
|
}
|
||||||
|
|
||||||
def transcribe_audio_queue(self, audio_queue):
|
def transcribe_audio_queue(self, audio_queue):
|
||||||
@@ -29,12 +34,14 @@ class AudioTranscriber:
|
|||||||
|
|
||||||
text = ''
|
text = ''
|
||||||
try:
|
try:
|
||||||
audio_data = self.process_data()
|
fd, path = tempfile.mkstemp(suffix=".wav")
|
||||||
|
os.close(fd)
|
||||||
|
audio_data = self.audio_sources["process_data_func"](path)
|
||||||
text = self.audio_recognizer.recognize_google(audio_data, language=self.language)
|
text = self.audio_recognizer.recognize_google(audio_data, language=self.language)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
finally:
|
finally:
|
||||||
pass
|
os.unlink(path)
|
||||||
|
|
||||||
if text != '':
|
if text != '':
|
||||||
self.update_transcript(text)
|
self.update_transcript(text)
|
||||||
@@ -50,11 +57,21 @@ class AudioTranscriber:
|
|||||||
source_info["last_sample"] += data
|
source_info["last_sample"] += data
|
||||||
source_info["last_spoken"] = time_spoken
|
source_info["last_spoken"] = time_spoken
|
||||||
|
|
||||||
def process_data(self):
|
def process_mic_data(self):
|
||||||
print(self.audio_sources["last_sample"])
|
|
||||||
audio_data = sr.AudioData(self.audio_sources["last_sample"], self.audio_sources["sample_rate"], self.audio_sources["sample_width"])
|
audio_data = sr.AudioData(self.audio_sources["last_sample"], self.audio_sources["sample_rate"], self.audio_sources["sample_width"])
|
||||||
return audio_data
|
return audio_data
|
||||||
|
|
||||||
|
def process_speaker_data(self, path):
|
||||||
|
with wave.open(path, 'wb') as wf:
|
||||||
|
wf.setnchannels(self.audio_sources["channels"])
|
||||||
|
p = pyaudio.PyAudio()
|
||||||
|
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
|
||||||
|
wf.setframerate(self.audio_sources["sample_rate"])
|
||||||
|
wf.writeframes(self.audio_sources["last_sample"])
|
||||||
|
with sr.AudioFile(path) as source:
|
||||||
|
audio = self.audio_recognizer.record(source)
|
||||||
|
return audio
|
||||||
|
|
||||||
def update_transcript(self, text):
|
def update_transcript(self, text):
|
||||||
source_info = self.audio_sources
|
source_info = self.audio_sources
|
||||||
transcript = self.transcript_data
|
transcript = self.transcript_data
|
||||||
|
|||||||
11
test_main.py
11
test_main.py
@@ -6,12 +6,11 @@ import AudioRecorder
|
|||||||
import audio_utils
|
import audio_utils
|
||||||
|
|
||||||
mic_audio_queue = queue.Queue()
|
mic_audio_queue = queue.Queue()
|
||||||
|
|
||||||
mic_device = audio_utils.get_default_input_device()
|
mic_device = audio_utils.get_default_input_device()
|
||||||
mic_audio_recorder = AudioRecorder.SelectedMicRecorder(mic_device)
|
mic_audio_recorder = AudioRecorder.SelectedMicRecorder(mic_device)
|
||||||
mic_audio_recorder.record_into_queue(mic_audio_queue)
|
mic_audio_recorder.record_into_queue(mic_audio_queue)
|
||||||
|
|
||||||
mic_transcriber = AudioTranscriber.AudioTranscriber(source=mic_audio_recorder.source, language="ja-JP")
|
mic_transcriber = AudioTranscriber.AudioTranscriber(speaker=False, source=mic_audio_recorder.source, language="ja-JP")
|
||||||
mic_transcribe = threading.Thread(target=mic_transcriber.transcribe_audio_queue, args=(mic_audio_queue,))
|
mic_transcribe = threading.Thread(target=mic_transcriber.transcribe_audio_queue, args=(mic_audio_queue,))
|
||||||
mic_transcribe.daemon = True
|
mic_transcribe.daemon = True
|
||||||
mic_transcribe.start()
|
mic_transcribe.start()
|
||||||
@@ -23,7 +22,7 @@ spk_device = audio_utils.get_default_output_device()
|
|||||||
spk_audio_recorder = AudioRecorder.SelectedSpeakerRecorder(spk_device)
|
spk_audio_recorder = AudioRecorder.SelectedSpeakerRecorder(spk_device)
|
||||||
spk_audio_recorder.record_into_queue(spk_audio_queue)
|
spk_audio_recorder.record_into_queue(spk_audio_queue)
|
||||||
|
|
||||||
spk_transcriber = AudioTranscriber.AudioTranscriber(source=mic_audio_recorder.source, language="ja-JP")
|
spk_transcriber = AudioTranscriber.AudioTranscriber(speaker=True, source=spk_audio_recorder.source, language="ja-JP")
|
||||||
spk_transcribe = threading.Thread(target=spk_transcriber.transcribe_audio_queue, args=(spk_audio_queue,))
|
spk_transcribe = threading.Thread(target=spk_transcriber.transcribe_audio_queue, args=(spk_audio_queue,))
|
||||||
spk_transcribe.daemon = True
|
spk_transcribe.daemon = True
|
||||||
spk_transcribe.start()
|
spk_transcribe.start()
|
||||||
@@ -32,7 +31,7 @@ while True:
|
|||||||
text = mic_transcriber.get_transcript()
|
text = mic_transcriber.get_transcript()
|
||||||
if len(text) > 0:
|
if len(text) > 0:
|
||||||
print("mic:", text)
|
print("mic:", text)
|
||||||
# text = spk_transcriber.get_transcript()
|
text = spk_transcriber.get_transcript()
|
||||||
# if len(text) > 0:
|
if len(text) > 0:
|
||||||
# print("spk:", text)
|
print("spk:", text)
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
Reference in New Issue
Block a user