From b545abce44343fb8a1f0ecce4eea2bda01cd56a4 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Sat, 21 Dec 2024 17:34:10 +0900 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B[bugfix]=20Model=20:=20result?= =?UTF-8?q?=E3=82=92=E5=8F=96=E5=BE=97=E3=81=99=E3=82=8B=E9=9A=9B=E3=81=AB?= =?UTF-8?q?=E3=82=A8=E3=83=A9=E3=83=BC=E3=81=8C=E5=87=BA=E3=82=8B=E5=95=8F?= =?UTF-8?q?=E9=A1=8C=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../models/transcription/transcription_transcriber.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src-python/models/transcription/transcription_transcriber.py b/src-python/models/transcription/transcription_transcriber.py index 35a80e56..5407253a 100644 --- a/src-python/models/transcription/transcription_transcriber.py +++ b/src-python/models/transcription/transcription_transcriber.py @@ -51,12 +51,11 @@ class AudioTranscriber: audio, time_spoken = audio_queue.get() self.updateLastSampleAndPhraseStatus(audio, time_spoken) - result = {"confidence": 0, "text": "", "language": None} + confidences = [{"confidence": 0, "text": "", "language": None}] try: audio_data = self.audio_sources["process_data_func"]() match self.transcription_engine: case "Google": - confidences = [] for language, country in zip(languages, countries): try: text, confidence = self.audio_recognizer.recognize_google( @@ -67,12 +66,7 @@ class AudioTranscriber: confidences.append({"confidence": confidence, "text": text, "language": language}) except Exception: pass - - result = max(confidences, key=lambda x: x["confidence"]) - case "Whisper": - confidences = [] - audio_data = np.frombuffer(audio_data.get_raw_data(convert_rate=16000, convert_width=2), np.int16).flatten().astype(np.float32) / 32768.0 if isinstance(audio_data, torch.Tensor): audio_data = audio_data.detach().numpy() @@ -99,7 +93,6 @@ class AudioTranscriber: confidences.append({"confidence": info.language_probability, "text": text, "language": language}) if (len(languages) == 1) or (transcription_lang[language][country][self.transcription_engine] == info.language): break - result = max(confidences, key=lambda x: x["confidence"]) except UnknownValueError: pass @@ -108,6 +101,7 @@ class AudioTranscriber: finally: pass + result = max(confidences, key=lambda x: x["confidence"]) if result["text"] != "": self.updateTranscript(result) return True