diff --git a/model.py b/model.py index 5b17e167..06a9cb7d 100644 --- a/model.py +++ b/model.py @@ -17,7 +17,7 @@ from flashtext import KeywordProcessor from models.translation.translation_translator import Translator from models.transcription.transcription_utils import getInputDevices, getDefaultOutputDevice from models.osc.osc_tools import sendTyping, sendMessage, sendTestAction, receiveOscParameters -from models.transcription.transcription_recorder import SelectedMicRecorder, SelectedSpeakerRecorder +from models.transcription.transcription_recorder import SelectedMicEnergyAndAudioRecorder, SelectedSpeakerEnergyAndAudioRecorder from models.transcription.transcription_recorder import SelectedMicEnergyRecorder, SelectedSpeakeEnergyRecorder from models.transcription.transcription_transcriber import AudioTranscriber from models.xsoverlay.notification import xsoverlayForVRCT @@ -321,19 +321,20 @@ class Model: return mic_audio_queue = Queue() + mic_energy_queue = Queue() device = [device for device in getInputDevices()[config.CHOICE_MIC_HOST] if device["name"] == config.CHOICE_MIC_DEVICE][0] record_timeout = config.INPUT_MIC_RECORD_TIMEOUT phase_timeout = config.INPUT_MIC_PHRASE_TIMEOUT if record_timeout > phase_timeout: record_timeout = phase_timeout - self.mic_audio_recorder = SelectedMicRecorder( + self.mic_audio_recorder = SelectedMicEnergyAndAudioRecorder( device=device, energy_threshold=config.INPUT_MIC_ENERGY_THRESHOLD, dynamic_energy_threshold=config.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD, record_timeout=record_timeout, ) - self.mic_audio_recorder.recordIntoQueue(mic_audio_queue) + self.mic_audio_recorder.recordIntoQueue(mic_audio_queue, mic_energy_queue) mic_transcriber = AudioTranscriber( speaker=False, source=self.mic_audio_recorder.source, @@ -350,17 +351,34 @@ class Model: except Exception: pass + def sendMicEnergy(): + if mic_energy_queue.empty() is False: + energy = mic_energy_queue.get() + # print("mic energy:", energy) + try: + fnc(energy) + except Exception: + pass + sleep(0.01) + self.mic_print_transcript = threadFnc(sendMicTranscript) self.mic_print_transcript.daemon = True self.mic_print_transcript.start() + self.mic_get_energy = threadFnc(sendMicEnergy) + self.mic_get_energy.daemon = True + self.mic_get_energy.start() + def stopMicTranscript(self): if isinstance(self.mic_print_transcript, threadFnc): self.mic_print_transcript.stop() self.mic_print_transcript = None - if isinstance(self.mic_audio_recorder, SelectedMicRecorder): + if isinstance(self.mic_audio_recorder, SelectedMicEnergyAndAudioRecorder): self.mic_audio_recorder.stop() self.mic_audio_recorder = None + if isinstance(self.mic_get_energy, threadFnc): + self.mic_get_energy.stop() + self.mic_get_energy = None def startCheckMicEnergy(self, fnc, end_fnc, error_fnc=None): if config.CHOICE_MIC_HOST == "NoHost" or config.CHOICE_MIC_DEVICE == "NoDevice": @@ -405,18 +423,19 @@ class Model: return speaker_audio_queue = Queue() + speaker_energy_queue = Queue() record_timeout = config.INPUT_SPEAKER_RECORD_TIMEOUT phase_timeout = config.INPUT_SPEAKER_PHRASE_TIMEOUT if record_timeout > phase_timeout: record_timeout = phase_timeout - self.speaker_audio_recorder = SelectedSpeakerRecorder( + self.speaker_audio_recorder = SelectedSpeakerEnergyAndAudioRecorder( device=speaker_device, energy_threshold=config.INPUT_SPEAKER_ENERGY_THRESHOLD, dynamic_energy_threshold=config.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD, record_timeout=record_timeout, ) - self.speaker_audio_recorder.recordIntoQueue(speaker_audio_queue) + self.speaker_audio_recorder.recordIntoQueue(speaker_audio_queue, speaker_energy_queue) speaker_transcriber = AudioTranscriber( speaker=True, source=self.speaker_audio_recorder.source, @@ -433,17 +452,34 @@ class Model: except Exception: pass + def sendSpeakerEnergy(): + if speaker_energy_queue.empty() is False: + energy = speaker_energy_queue.get() + # print("speaker energy:", energy) + try: + fnc(energy) + except Exception: + pass + sleep(0.01) + self.speaker_print_transcript = threadFnc(sendSpeakerTranscript) self.speaker_print_transcript.daemon = True self.speaker_print_transcript.start() + self.speaker_get_energy = threadFnc(sendSpeakerEnergy) + self.speaker_get_energy.daemon = True + self.speaker_get_energy.start() + def stopSpeakerTranscript(self): if isinstance(self.speaker_print_transcript, threadFnc): self.speaker_print_transcript.stop() self.speaker_print_transcript = None - if isinstance(self.speaker_audio_recorder, SelectedSpeakerRecorder): + if isinstance(self.speaker_audio_recorder, SelectedSpeakerEnergyAndAudioRecorder): self.speaker_audio_recorder.stop() self.speaker_audio_recorder = None + if isinstance(self.speaker_get_energy, threadFnc): + self.speaker_get_energy.stop() + self.speaker_get_energy = None def startCheckSpeakerEnergy(self, fnc, end_fnc, error_fnc=None): speaker_device = getDefaultOutputDevice() diff --git a/models/transcription/transcription_recorder.py b/models/transcription/transcription_recorder.py index 9abe5eb4..281c48b6 100644 --- a/models/transcription/transcription_recorder.py +++ b/models/transcription/transcription_recorder.py @@ -87,4 +87,51 @@ class SelectedSpeakeEnergyRecorder(BaseEnergyRecorder): channels=device["maxInputChannels"] ) super().__init__(source=source) + # self.adjustForNoise() + +class BaseEnergyAndAudioRecorder: + def __init__(self, source, energy_threshold, dynamic_energy_threshold, record_timeout): + self.recorder = Recognizer() + self.recorder.energy_threshold = energy_threshold + self.recorder.dynamic_energy_threshold = dynamic_energy_threshold + self.record_timeout = record_timeout + self.stop = None + + if source is None: + raise ValueError("audio source can't be None") + + self.source = source + + def adjustForNoise(self): + with self.source: + self.recorder.adjust_for_ambient_noise(self.source) + + def recordIntoQueue(self, audio_queue, energy_queue): + def audioRecordCallback(_, audio): + audio_queue.put((audio.get_raw_data(), datetime.now())) + + def energyRecordCallback(energy): + energy_queue.put(energy) + + self.stop = self.recorder.listen_energy_and_audio_in_background(self.source, audioRecordCallback, phrase_time_limit=self.record_timeout, callback_energy=energyRecordCallback) + +class SelectedMicEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder): + def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout): + source=Microphone( + device_index=device['index'], + sample_rate=int(device["defaultSampleRate"]), + ) + super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout) + # self.adjustForNoise() + +class SelectedSpeakerEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder): + def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout): + + source = Microphone(speaker=True, + device_index= device["index"], + sample_rate=int(device["defaultSampleRate"]), + chunk_size=get_sample_size(paInt16), + channels=device["maxInputChannels"] + ) + super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout) # self.adjustForNoise() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 3d0b771f..350a73fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,6 @@ transformers[torch]==4.37.2 sentencepiece==0.1.99 ctranslate2==3.24.0 faster-whisper==0.10.0 -translators @ git+https://github.com/misyaguziya/translators@master -SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@master +translators @ git+https://github.com/misyaguziya/translators@5.8.9 +SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@3.10.2 tinyoscquery @ git+https://github.com/cyberkitsune/tinyoscquery@0.1.2 \ No newline at end of file