Merge branch 'audio_indicator' into develop

This commit is contained in:
Sakamoto Shiina
2024-02-09 22:50:52 +09:00
3 changed files with 92 additions and 9 deletions

View File

@@ -17,7 +17,7 @@ from flashtext import KeywordProcessor
from models.translation.translation_translator import Translator
from models.transcription.transcription_utils import getInputDevices, getDefaultOutputDevice
from models.osc.osc_tools import sendTyping, sendMessage, sendTestAction, receiveOscParameters
from models.transcription.transcription_recorder import SelectedMicRecorder, SelectedSpeakerRecorder
from models.transcription.transcription_recorder import SelectedMicEnergyAndAudioRecorder, SelectedSpeakerEnergyAndAudioRecorder
from models.transcription.transcription_recorder import SelectedMicEnergyRecorder, SelectedSpeakeEnergyRecorder
from models.transcription.transcription_transcriber import AudioTranscriber
from models.xsoverlay.notification import xsoverlayForVRCT
@@ -321,19 +321,20 @@ class Model:
return
mic_audio_queue = Queue()
mic_energy_queue = Queue()
device = [device for device in getInputDevices()[config.CHOICE_MIC_HOST] if device["name"] == config.CHOICE_MIC_DEVICE][0]
record_timeout = config.INPUT_MIC_RECORD_TIMEOUT
phase_timeout = config.INPUT_MIC_PHRASE_TIMEOUT
if record_timeout > phase_timeout:
record_timeout = phase_timeout
self.mic_audio_recorder = SelectedMicRecorder(
self.mic_audio_recorder = SelectedMicEnergyAndAudioRecorder(
device=device,
energy_threshold=config.INPUT_MIC_ENERGY_THRESHOLD,
dynamic_energy_threshold=config.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD,
record_timeout=record_timeout,
)
self.mic_audio_recorder.recordIntoQueue(mic_audio_queue)
self.mic_audio_recorder.recordIntoQueue(mic_audio_queue, mic_energy_queue)
mic_transcriber = AudioTranscriber(
speaker=False,
source=self.mic_audio_recorder.source,
@@ -350,17 +351,34 @@ class Model:
except Exception:
pass
def sendMicEnergy():
if mic_energy_queue.empty() is False:
energy = mic_energy_queue.get()
# print("mic energy:", energy)
try:
fnc(energy)
except Exception:
pass
sleep(0.01)
self.mic_print_transcript = threadFnc(sendMicTranscript)
self.mic_print_transcript.daemon = True
self.mic_print_transcript.start()
self.mic_get_energy = threadFnc(sendMicEnergy)
self.mic_get_energy.daemon = True
self.mic_get_energy.start()
def stopMicTranscript(self):
if isinstance(self.mic_print_transcript, threadFnc):
self.mic_print_transcript.stop()
self.mic_print_transcript = None
if isinstance(self.mic_audio_recorder, SelectedMicRecorder):
if isinstance(self.mic_audio_recorder, SelectedMicEnergyAndAudioRecorder):
self.mic_audio_recorder.stop()
self.mic_audio_recorder = None
if isinstance(self.mic_get_energy, threadFnc):
self.mic_get_energy.stop()
self.mic_get_energy = None
def startCheckMicEnergy(self, fnc, end_fnc, error_fnc=None):
if config.CHOICE_MIC_HOST == "NoHost" or config.CHOICE_MIC_DEVICE == "NoDevice":
@@ -405,18 +423,19 @@ class Model:
return
speaker_audio_queue = Queue()
speaker_energy_queue = Queue()
record_timeout = config.INPUT_SPEAKER_RECORD_TIMEOUT
phase_timeout = config.INPUT_SPEAKER_PHRASE_TIMEOUT
if record_timeout > phase_timeout:
record_timeout = phase_timeout
self.speaker_audio_recorder = SelectedSpeakerRecorder(
self.speaker_audio_recorder = SelectedSpeakerEnergyAndAudioRecorder(
device=speaker_device,
energy_threshold=config.INPUT_SPEAKER_ENERGY_THRESHOLD,
dynamic_energy_threshold=config.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD,
record_timeout=record_timeout,
)
self.speaker_audio_recorder.recordIntoQueue(speaker_audio_queue)
self.speaker_audio_recorder.recordIntoQueue(speaker_audio_queue, speaker_energy_queue)
speaker_transcriber = AudioTranscriber(
speaker=True,
source=self.speaker_audio_recorder.source,
@@ -433,17 +452,34 @@ class Model:
except Exception:
pass
def sendSpeakerEnergy():
if speaker_energy_queue.empty() is False:
energy = speaker_energy_queue.get()
# print("speaker energy:", energy)
try:
fnc(energy)
except Exception:
pass
sleep(0.01)
self.speaker_print_transcript = threadFnc(sendSpeakerTranscript)
self.speaker_print_transcript.daemon = True
self.speaker_print_transcript.start()
self.speaker_get_energy = threadFnc(sendSpeakerEnergy)
self.speaker_get_energy.daemon = True
self.speaker_get_energy.start()
def stopSpeakerTranscript(self):
if isinstance(self.speaker_print_transcript, threadFnc):
self.speaker_print_transcript.stop()
self.speaker_print_transcript = None
if isinstance(self.speaker_audio_recorder, SelectedSpeakerRecorder):
if isinstance(self.speaker_audio_recorder, SelectedSpeakerEnergyAndAudioRecorder):
self.speaker_audio_recorder.stop()
self.speaker_audio_recorder = None
if isinstance(self.speaker_get_energy, threadFnc):
self.speaker_get_energy.stop()
self.speaker_get_energy = None
def startCheckSpeakerEnergy(self, fnc, end_fnc, error_fnc=None):
speaker_device = getDefaultOutputDevice()

View File

@@ -87,4 +87,51 @@ class SelectedSpeakeEnergyRecorder(BaseEnergyRecorder):
channels=device["maxInputChannels"]
)
super().__init__(source=source)
# self.adjustForNoise()
class BaseEnergyAndAudioRecorder:
def __init__(self, source, energy_threshold, dynamic_energy_threshold, record_timeout):
self.recorder = Recognizer()
self.recorder.energy_threshold = energy_threshold
self.recorder.dynamic_energy_threshold = dynamic_energy_threshold
self.record_timeout = record_timeout
self.stop = None
if source is None:
raise ValueError("audio source can't be None")
self.source = source
def adjustForNoise(self):
with self.source:
self.recorder.adjust_for_ambient_noise(self.source)
def recordIntoQueue(self, audio_queue, energy_queue):
def audioRecordCallback(_, audio):
audio_queue.put((audio.get_raw_data(), datetime.now()))
def energyRecordCallback(energy):
energy_queue.put(energy)
self.stop = self.recorder.listen_energy_and_audio_in_background(self.source, audioRecordCallback, phrase_time_limit=self.record_timeout, callback_energy=energyRecordCallback)
class SelectedMicEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder):
def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout):
source=Microphone(
device_index=device['index'],
sample_rate=int(device["defaultSampleRate"]),
)
super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout)
# self.adjustForNoise()
class SelectedSpeakerEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder):
def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout):
source = Microphone(speaker=True,
device_index= device["index"],
sample_rate=int(device["defaultSampleRate"]),
chunk_size=get_sample_size(paInt16),
channels=device["maxInputChannels"]
)
super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout)
# self.adjustForNoise()

View File

@@ -12,6 +12,6 @@ transformers[torch]==4.37.2
sentencepiece==0.1.99
ctranslate2==3.24.0
faster-whisper==0.10.0
translators @ git+https://github.com/misyaguziya/translators@master
SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@master
translators @ git+https://github.com/misyaguziya/translators@5.8.9
SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@3.10.2
tinyoscquery @ git+https://github.com/cyberkitsune/tinyoscquery@0.1.2