From 01f73bc1f8496596a872f03c9618b07cb4ff718e Mon Sep 17 00:00:00 2001
From: misyaguziya <misyaguziya@gmail.com>
Date: Tue, 18 Jun 2024 23:46:04 +0900
Subject: [PATCH] =?UTF-8?q?=F0=9F=91=8D=EF=B8=8F[Update]=20Model=20:=20whi?=
 =?UTF-8?q?sper=E3=81=AE=E8=A8=AD=E5=AE=9A=E9=A0=85=E7=9B=AE=E3=82=92?=
 =?UTF-8?q?=E8=BF=BD=E5=8A=A0(avg=5Flogprob,=20no=5Fspeech=5Fprob)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 config.py                                     | 48 +++++++++++++++++++
 model.py                                      | 16 ++++++-
 .../transcription_transcriber.py              |  4 +-
 3 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/config.py b/config.py
index 35ab252c..a520de05 100644
--- a/config.py
+++ b/config.py
@@ -546,6 +546,28 @@ class Config:
             self._INPUT_MIC_WORD_FILTER = sorted(set(value), key=value.index)
             saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
 
+    @property
+    @json_serializable('INPUT_MIC_AVG_LOGPROB')
+    def INPUT_MIC_AVG_LOGPROB(self):
+        return self._INPUT_MIC_AVG_LOGPROB
+
+    @INPUT_MIC_AVG_LOGPROB.setter
+    def INPUT_MIC_AVG_LOGPROB(self, value):
+        if isinstance(value, float) or isinstance(value, int):
+            self._INPUT_MIC_AVG_LOGPROB = value
+            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
+
+    @property
+    @json_serializable('INPUT_MIC_NO_SPEECH_PROB')
+    def INPUT_MIC_NO_SPEECH_PROB(self):
+        return self._INPUT_MIC_NO_SPEECH_PROB
+
+    @INPUT_MIC_NO_SPEECH_PROB.setter
+    def INPUT_MIC_NO_SPEECH_PROB(self, value):
+        if isinstance(value, float) or isinstance(value, int):
+            self._INPUT_MIC_NO_SPEECH_PROB = value
+            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
+
     @property
     @json_serializable('CHOICE_SPEAKER_DEVICE')
     def CHOICE_SPEAKER_DEVICE(self):
@@ -612,6 +634,28 @@ class Config:
             self._INPUT_SPEAKER_MAX_PHRASES = value
             saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
 
+    @property
+    @json_serializable('INPUT_SPEAKER_AVG_LOGPROB')
+    def INPUT_SPEAKER_AVG_LOGPROB(self):
+        return self._INPUT_SPEAKER_AVG_LOGPROB
+
+    @INPUT_SPEAKER_AVG_LOGPROB.setter
+    def INPUT_SPEAKER_AVG_LOGPROB(self, value):
+        if isinstance(value, float) or isinstance(value, int):
+            self._INPUT_SPEAKER_AVG_LOGPROB = value
+            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
+
+    @property
+    @json_serializable('INPUT_SPEAKER_NO_SPEECH_PROB')
+    def INPUT_SPEAKER_NO_SPEECH_PROB(self):
+        return self._INPUT_SPEAKER_NO_SPEECH_PROB
+
+    @INPUT_SPEAKER_NO_SPEECH_PROB.setter
+    def INPUT_SPEAKER_NO_SPEECH_PROB(self, value):
+        if isinstance(value, float) or isinstance(value, int):
+            self._INPUT_SPEAKER_NO_SPEECH_PROB = value
+            saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value)
+
     @property
     @json_serializable('OSC_IP_ADDRESS')
     def OSC_IP_ADDRESS(self):
@@ -1043,12 +1087,16 @@ class Config:
         self._INPUT_MIC_PHRASE_TIMEOUT = 3
         self._INPUT_MIC_MAX_PHRASES = 10
         self._INPUT_MIC_WORD_FILTER = []
+        self._INPUT_MIC_AVG_LOGPROB=-0.8
+        self._INPUT_MIC_NO_SPEECH_PROB=0.6
         self._CHOICE_SPEAKER_DEVICE = getDefaultOutputDevice()["device"]["name"]
         self._INPUT_SPEAKER_ENERGY_THRESHOLD = 300
         self._INPUT_SPEAKER_DYNAMIC_ENERGY_THRESHOLD = False
         self._INPUT_SPEAKER_RECORD_TIMEOUT = 3
         self._INPUT_SPEAKER_PHRASE_TIMEOUT = 3
         self._INPUT_SPEAKER_MAX_PHRASES = 10
+        self._INPUT_SPEAKER_AVG_LOGPROB=-0.8
+        self._INPUT_SPEAKER_NO_SPEECH_PROB=0.6
         self._OSC_IP_ADDRESS = "127.0.0.1"
         self._OSC_PORT = 9000
         self._AUTH_KEYS = {
diff --git a/model.py b/model.py
index 2017a787..dddb056f 100644
--- a/model.py
+++ b/model.py
@@ -427,7 +427,13 @@ class Model:
         )
         def sendMicTranscript():
             try:
-                res = self.mic_transcriber.transcribeAudioQueue(self.mic_audio_queue, config.SOURCE_LANGUAGE, config.SOURCE_COUNTRY)
+                res = self.mic_transcriber.transcribeAudioQueue(
+                    self.mic_audio_queue,
+                    config.SOURCE_LANGUAGE,
+                    config.SOURCE_COUNTRY,
+                    config.INPUT_MIC_AVG_LOGPROB,
+                    config.INPUT_MIC_NO_SPEECH_PROB
+                )
                 if res:
                     message = self.mic_transcriber.getTranscript()
                     fnc(message)
@@ -581,7 +587,13 @@ class Model:
         )
         def sendSpeakerTranscript():
             try:
-                res = self.speaker_transcriber.transcribeAudioQueue(speaker_audio_queue, config.TARGET_LANGUAGE, config.TARGET_COUNTRY)
+                res = self.speaker_transcriber.transcribeAudioQueue(
+                    speaker_audio_queue,
+                    config.TARGET_LANGUAGE,
+                    config.TARGET_COUNTRY,
+                    config.INPUT_SPEAKER_AVG_LOGPROB,
+                    config.INPUT_SPEAKER_NO_SPEECH_PROB
+                )
                 if res:
                     message = self.speaker_transcriber.getTranscript()
                     fnc(message)
diff --git a/models/transcription/transcription_transcriber.py b/models/transcription/transcription_transcriber.py
index 56d9d979..82f1e981 100644
--- a/models/transcription/transcription_transcriber.py
+++ b/models/transcription/transcription_transcriber.py
@@ -38,7 +38,7 @@ class AudioTranscriber:
             self.whisper_model = getWhisperModel(root, whisper_weight_type)
             self.transcription_engine = "Whisper"
 
-    def transcribeAudioQueue(self, audio_queue, language, country):
+    def transcribeAudioQueue(self, audio_queue, language, country, avg_logprob=-0.8, no_speech_prob=0.6):
         if audio_queue.empty():
             time.sleep(0.01)
             return False
@@ -68,7 +68,7 @@ class AudioTranscriber:
                         vad_filter=False,
                         )
                     for s in segments:
-                        if s.avg_logprob < -0.8 or s.no_speech_prob > 0.6:
+                        if s.avg_logprob < avg_logprob or s.no_speech_prob > no_speech_prob:
                             continue
                         text += s.text