[WIP/TEST] Model : faster-whisperを追加

2024-01-30 02:15:05 +09:00
parent 5e79cb5539
commit ba12e39bbc
2 changed files with 30 additions and 1 deletions
--- a/models/transcription/transcription_transcriber.py
+++ b/models/transcription/transcription_transcriber.py
@@ -6,6 +6,10 @@ from datetime import timedelta
 from pyaudiowpatch import get_sample_size, paInt16
 from .transcription_languages import transcription_lang

+import torch
+import numpy as np
+from faster_whisper import WhisperModel
+
 PHRASE_TIMEOUT = 3
 MAX_PHRASES = 10

@@ -26,6 +30,7 @@ class AudioTranscriber:
                "new_phrase": True,
                "process_data_func": self.processSpeakerData if speaker else self.processSpeakerData
        }
+        self.whisper_model = WhisperModel("base", device="cpu", device_index=0, compute_type="int8", cpu_threads=4, num_workers=1)

    def transcribeAudioQueue(self, audio_queue, language, country):
        # while True:
@@ -38,6 +43,29 @@ class AudioTranscriber:
            # os.close(fd)
            audio_data = self.audio_sources["process_data_func"]()
            text = self.audio_recognizer.recognize_google(audio_data, language=transcription_lang[language][country])
+
+            audio_data = np.frombuffer(audio_data.get_raw_data(convert_rate=16000, convert_width=2), np.int16).flatten().astype(np.float32) / 32768.0
+            if isinstance(audio_data, torch.Tensor):
+                audio_data = audio_data.detach().numpy()
+            segments, _ = self.whisper_model.transcribe(
+                audio_data,
+                beam_size=5,
+                temperature=0.0,
+                log_prob_threshold=-0.8,
+                no_speech_threshold=0.6,
+                language="ja",
+                word_timestamps=False,
+                without_timestamps=True,
+                task="transcribe",
+                vad_filter=False,
+                )
+            _text = ""
+            for s in segments:
+                if s.avg_logprob < -0.8 or s.no_speech_prob > 0.6:
+                    continue
+                _text += s.text
+            print(_text)
+
        except Exception:
            pass
        finally:
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,4 +10,5 @@ CTkToolTip == 0.8
 pyinstaller==6.2.0
 transformers[torch]
 sentencepiece==0.1.99
-ctranslate2==3.21.0
+ctranslate2==3.21.0
+faster-whisper==0.10.0