From b545abce44343fb8a1f0ecce4eea2bda01cd56a4 Mon Sep 17 00:00:00 2001
From: misyaguziya <53165965+misyaguziya@users.noreply.github.com>
Date: Sat, 21 Dec 2024 17:34:10 +0900
Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B[bugfix]=20Model=20:=20result?=
 =?UTF-8?q?=E3=82=92=E5=8F=96=E5=BE=97=E3=81=99=E3=82=8B=E9=9A=9B=E3=81=AB?=
 =?UTF-8?q?=E3=82=A8=E3=83=A9=E3=83=BC=E3=81=8C=E5=87=BA=E3=82=8B=E5=95=8F?=
 =?UTF-8?q?=E9=A1=8C=E3=82=92=E4=BF=AE=E6=AD=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../models/transcription/transcription_transcriber.py  | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/src-python/models/transcription/transcription_transcriber.py b/src-python/models/transcription/transcription_transcriber.py
index 35a80e56..5407253a 100644
--- a/src-python/models/transcription/transcription_transcriber.py
+++ b/src-python/models/transcription/transcription_transcriber.py
@@ -51,12 +51,11 @@ class AudioTranscriber:
         audio, time_spoken = audio_queue.get()
         self.updateLastSampleAndPhraseStatus(audio, time_spoken)
 
-        result = {"confidence": 0, "text": "", "language": None}
+        confidences = [{"confidence": 0, "text": "", "language": None}]
         try:
             audio_data = self.audio_sources["process_data_func"]()
             match self.transcription_engine:
                 case "Google":
-                    confidences = []
                     for language, country in zip(languages, countries):
                         try:
                             text, confidence = self.audio_recognizer.recognize_google(
@@ -67,12 +66,7 @@ class AudioTranscriber:
                             confidences.append({"confidence": confidence, "text": text, "language": language})
                         except Exception:
                             pass
-
-                    result = max(confidences, key=lambda x: x["confidence"])
-
                 case "Whisper":
-                    confidences = []
-
                     audio_data = np.frombuffer(audio_data.get_raw_data(convert_rate=16000, convert_width=2), np.int16).flatten().astype(np.float32) / 32768.0
                     if isinstance(audio_data, torch.Tensor):
                         audio_data = audio_data.detach().numpy()
@@ -99,7 +93,6 @@ class AudioTranscriber:
                         confidences.append({"confidence": info.language_probability, "text": text, "language": language})
                         if (len(languages) == 1) or (transcription_lang[language][country][self.transcription_engine] == info.language):
                             break
-                    result = max(confidences, key=lambda x: x["confidence"])
 
         except UnknownValueError:
             pass
@@ -108,6 +101,7 @@ class AudioTranscriber:
         finally:
             pass
 
+        result = max(confidences, key=lambda x: x["confidence"])
         if result["text"] != "":
             self.updateTranscript(result)
         return True