From 5bb3152d02c60114a2a6396424be431d1df1984a Mon Sep 17 00:00:00 2001 From: misyaguziya Date: Thu, 20 Jun 2024 16:59:59 +0900 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B[bugfix]=20Model=20:=20speaker?= =?UTF-8?q?=E3=81=8C=E3=82=B5=E3=83=A9=E3=82=A6=E3=83=B3=E3=83=89=E3=83=87?= =?UTF-8?q?=E3=83=90=E3=82=A4=E3=82=B9=E3=81=AE=E5=A0=B4=E5=90=88=E3=81=AB?= =?UTF-8?q?=E9=9F=B3=E5=A3=B0=E3=81=8C=E6=96=87=E5=AD=97=E8=B5=B7=E3=81=93?= =?UTF-8?q?=E3=81=97=E3=81=95=E3=82=8C=E3=81=AA=E3=81=84=E5=95=8F=E9=A1=8C?= =?UTF-8?q?=E3=82=92=E4=BF=AE=E6=AD=A3=20#10?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../transcription_transcriber.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/models/transcription/transcription_transcriber.py b/models/transcription/transcription_transcriber.py index c1856b34..a535cd8a 100644 --- a/models/transcription/transcription_transcriber.py +++ b/models/transcription/transcription_transcriber.py @@ -10,6 +10,7 @@ from .transcription_whisper import getWhisperModel, checkWhisperWeight import torch import numpy as np +from pydub import AudioSegment PHRASE_TIMEOUT = 3 MAX_PHRASES = 10 @@ -97,21 +98,21 @@ class AudioTranscriber: return audio_data def processSpeakerData(self): - original_channels = self.audio_sources["channels"] - if original_channels <= 2: - channels = original_channels - sample_rate = self.audio_sources["sample_rate"] - else: - channels = 2 - sample_rate = self.audio_sources["sample_rate"]*original_channels/2 - temp_file = BytesIO() with wave.open(temp_file, 'wb') as wf: - wf.setnchannels(channels) + wf.setnchannels(self.audio_sources["channels"]) wf.setsampwidth(get_sample_size(paInt16)) - wf.setframerate(sample_rate) + wf.setframerate(self.audio_sources["sample_rate"]) wf.writeframes(self.audio_sources["last_sample"]) temp_file.seek(0) + + if self.audio_sources["channels"] > 2: + audio = AudioSegment.from_file(temp_file, format="wav") + mono_audio = audio.set_channels(1) + temp_file = BytesIO() + mono_audio.export(temp_file, format="wav") + temp_file.seek(0) + with AudioFile(temp_file) as source: audio = self.audio_recognizer.record(source) return audio