From 2857d8f543f28e9cde764ac40334a438046d9e19 Mon Sep 17 00:00:00 2001 From: misyaguziya Date: Fri, 9 Feb 2024 02:03:44 +0900 Subject: [PATCH 1/3] [WIP/TEST] Model : Add energy indicator --- model.py | 22 ++++++++++-- .../transcription/transcription_recorder.py | 35 +++++++++++++++++++ 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/model.py b/model.py index 5b17e167..0b3b4887 100644 --- a/model.py +++ b/model.py @@ -18,6 +18,7 @@ from models.translation.translation_translator import Translator from models.transcription.transcription_utils import getInputDevices, getDefaultOutputDevice from models.osc.osc_tools import sendTyping, sendMessage, sendTestAction, receiveOscParameters from models.transcription.transcription_recorder import SelectedMicRecorder, SelectedSpeakerRecorder +from models.transcription.transcription_recorder import SelectedMicEnergyAndAudioRecorder from models.transcription.transcription_recorder import SelectedMicEnergyRecorder, SelectedSpeakeEnergyRecorder from models.transcription.transcription_transcriber import AudioTranscriber from models.xsoverlay.notification import xsoverlayForVRCT @@ -321,19 +322,20 @@ class Model: return mic_audio_queue = Queue() + mic_energy_queue = Queue() device = [device for device in getInputDevices()[config.CHOICE_MIC_HOST] if device["name"] == config.CHOICE_MIC_DEVICE][0] record_timeout = config.INPUT_MIC_RECORD_TIMEOUT phase_timeout = config.INPUT_MIC_PHRASE_TIMEOUT if record_timeout > phase_timeout: record_timeout = phase_timeout - self.mic_audio_recorder = SelectedMicRecorder( + self.mic_audio_recorder = SelectedMicEnergyAndAudioRecorder( device=device, energy_threshold=config.INPUT_MIC_ENERGY_THRESHOLD, dynamic_energy_threshold=config.INPUT_MIC_DYNAMIC_ENERGY_THRESHOLD, record_timeout=record_timeout, ) - self.mic_audio_recorder.recordIntoQueue(mic_audio_queue) + self.mic_audio_recorder.recordIntoQueue(mic_audio_queue, mic_energy_queue) mic_transcriber = AudioTranscriber( speaker=False, source=self.mic_audio_recorder.source, @@ -350,15 +352,29 @@ class Model: except Exception: pass + def sendMicEnergy(): + if mic_energy_queue.empty() is False: + energy = mic_energy_queue.get() + print("mic energy:", energy) + try: + fnc(energy) + except Exception: + pass + sleep(0.01) + self.mic_print_transcript = threadFnc(sendMicTranscript) self.mic_print_transcript.daemon = True self.mic_print_transcript.start() + self.mic_get_energy = threadFnc(sendMicEnergy) + self.mic_get_energy.daemon = True + self.mic_get_energy.start() + def stopMicTranscript(self): if isinstance(self.mic_print_transcript, threadFnc): self.mic_print_transcript.stop() self.mic_print_transcript = None - if isinstance(self.mic_audio_recorder, SelectedMicRecorder): + if isinstance(self.mic_audio_recorder, SelectedMicEnergyAndAudioRecorder): self.mic_audio_recorder.stop() self.mic_audio_recorder = None diff --git a/models/transcription/transcription_recorder.py b/models/transcription/transcription_recorder.py index 9abe5eb4..987e948c 100644 --- a/models/transcription/transcription_recorder.py +++ b/models/transcription/transcription_recorder.py @@ -87,4 +87,39 @@ class SelectedSpeakeEnergyRecorder(BaseEnergyRecorder): channels=device["maxInputChannels"] ) super().__init__(source=source) + # self.adjustForNoise() + +class BaseEnergyAndAudioRecorder: + def __init__(self, source, energy_threshold, dynamic_energy_threshold, record_timeout): + self.recorder = Recognizer() + self.recorder.energy_threshold = energy_threshold + self.recorder.dynamic_energy_threshold = dynamic_energy_threshold + self.record_timeout = record_timeout + self.stop = None + + if source is None: + raise ValueError("audio source can't be None") + + self.source = source + + def adjustForNoise(self): + with self.source: + self.recorder.adjust_for_ambient_noise(self.source) + + def recordIntoQueue(self, audio_queue, energy_queue): + def audioRecordCallback(_, audio): + audio_queue.put((audio.get_raw_data(), datetime.now())) + + def energyRecordCallback(energy): + energy_queue.put(energy) + + self.stop = self.recorder.listen_energy_and_audio_in_background(self.source, audioRecordCallback, phrase_time_limit=self.record_timeout, callback_energy=energyRecordCallback) + +class SelectedMicEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder): + def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout): + source=Microphone( + device_index=device['index'], + sample_rate=int(device["defaultSampleRate"]), + ) + super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout) # self.adjustForNoise() \ No newline at end of file From e08a3ab42d2d87d819ab5ca15c2f137a2c0040a8 Mon Sep 17 00:00:00 2001 From: misyaguziya Date: Fri, 9 Feb 2024 02:29:49 +0900 Subject: [PATCH 2/3] [WIP/TEST] Model : Add energy indicator for speaker --- model.py | 32 +++++++++++++++---- .../transcription/transcription_recorder.py | 12 +++++++ 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/model.py b/model.py index 0b3b4887..06a9cb7d 100644 --- a/model.py +++ b/model.py @@ -17,8 +17,7 @@ from flashtext import KeywordProcessor from models.translation.translation_translator import Translator from models.transcription.transcription_utils import getInputDevices, getDefaultOutputDevice from models.osc.osc_tools import sendTyping, sendMessage, sendTestAction, receiveOscParameters -from models.transcription.transcription_recorder import SelectedMicRecorder, SelectedSpeakerRecorder -from models.transcription.transcription_recorder import SelectedMicEnergyAndAudioRecorder +from models.transcription.transcription_recorder import SelectedMicEnergyAndAudioRecorder, SelectedSpeakerEnergyAndAudioRecorder from models.transcription.transcription_recorder import SelectedMicEnergyRecorder, SelectedSpeakeEnergyRecorder from models.transcription.transcription_transcriber import AudioTranscriber from models.xsoverlay.notification import xsoverlayForVRCT @@ -355,7 +354,7 @@ class Model: def sendMicEnergy(): if mic_energy_queue.empty() is False: energy = mic_energy_queue.get() - print("mic energy:", energy) + # print("mic energy:", energy) try: fnc(energy) except Exception: @@ -377,6 +376,9 @@ class Model: if isinstance(self.mic_audio_recorder, SelectedMicEnergyAndAudioRecorder): self.mic_audio_recorder.stop() self.mic_audio_recorder = None + if isinstance(self.mic_get_energy, threadFnc): + self.mic_get_energy.stop() + self.mic_get_energy = None def startCheckMicEnergy(self, fnc, end_fnc, error_fnc=None): if config.CHOICE_MIC_HOST == "NoHost" or config.CHOICE_MIC_DEVICE == "NoDevice": @@ -421,18 +423,19 @@ class Model: return speaker_audio_queue = Queue() + speaker_energy_queue = Queue() record_timeout = config.INPUT_SPEAKER_RECORD_TIMEOUT phase_timeout = config.INPUT_SPEAKER_PHRASE_TIMEOUT if record_timeout > phase_timeout: record_timeout = phase_timeout - self.speaker_audio_recorder = SelectedSpeakerRecorder( + self.speaker_audio_recorder = SelectedSpeakerEnergyAndAudioRecorder( device=speaker_device, energy_threshold=config.INPUT_SPEAKER_ENERGY_THRESHOLD, dynamic_energy_threshold=config.INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD, record_timeout=record_timeout, ) - self.speaker_audio_recorder.recordIntoQueue(speaker_audio_queue) + self.speaker_audio_recorder.recordIntoQueue(speaker_audio_queue, speaker_energy_queue) speaker_transcriber = AudioTranscriber( speaker=True, source=self.speaker_audio_recorder.source, @@ -449,17 +452,34 @@ class Model: except Exception: pass + def sendSpeakerEnergy(): + if speaker_energy_queue.empty() is False: + energy = speaker_energy_queue.get() + # print("speaker energy:", energy) + try: + fnc(energy) + except Exception: + pass + sleep(0.01) + self.speaker_print_transcript = threadFnc(sendSpeakerTranscript) self.speaker_print_transcript.daemon = True self.speaker_print_transcript.start() + self.speaker_get_energy = threadFnc(sendSpeakerEnergy) + self.speaker_get_energy.daemon = True + self.speaker_get_energy.start() + def stopSpeakerTranscript(self): if isinstance(self.speaker_print_transcript, threadFnc): self.speaker_print_transcript.stop() self.speaker_print_transcript = None - if isinstance(self.speaker_audio_recorder, SelectedSpeakerRecorder): + if isinstance(self.speaker_audio_recorder, SelectedSpeakerEnergyAndAudioRecorder): self.speaker_audio_recorder.stop() self.speaker_audio_recorder = None + if isinstance(self.speaker_get_energy, threadFnc): + self.speaker_get_energy.stop() + self.speaker_get_energy = None def startCheckSpeakerEnergy(self, fnc, end_fnc, error_fnc=None): speaker_device = getDefaultOutputDevice() diff --git a/models/transcription/transcription_recorder.py b/models/transcription/transcription_recorder.py index 987e948c..281c48b6 100644 --- a/models/transcription/transcription_recorder.py +++ b/models/transcription/transcription_recorder.py @@ -122,4 +122,16 @@ class SelectedMicEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder): sample_rate=int(device["defaultSampleRate"]), ) super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout) + # self.adjustForNoise() + +class SelectedSpeakerEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder): + def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout): + + source = Microphone(speaker=True, + device_index= device["index"], + sample_rate=int(device["defaultSampleRate"]), + chunk_size=get_sample_size(paInt16), + channels=device["maxInputChannels"] + ) + super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout) # self.adjustForNoise() \ No newline at end of file From d5554487f472dc0d6bc142ca9fa6250bbdfa2c74 Mon Sep 17 00:00:00 2001 From: misyaguziya Date: Fri, 9 Feb 2024 22:33:11 +0900 Subject: [PATCH 3/3] [Update] requirements.txt : install version fix --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3d0b771f..350a73fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,6 @@ transformers[torch]==4.37.2 sentencepiece==0.1.99 ctranslate2==3.24.0 faster-whisper==0.10.0 -translators @ git+https://github.com/misyaguziya/translators@master -SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@master +translators @ git+https://github.com/misyaguziya/translators@5.8.9 +SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@3.10.2 tinyoscquery @ git+https://github.com/cyberkitsune/tinyoscquery@0.1.2 \ No newline at end of file