👍️ [Update] pythonのメイン処理部分を移動/webui_mainloop.pyをビルドできるように修正

This commit is contained in:
misyaguziya
2024-07-27 01:30:36 +09:00
parent 7ce3bc9be9
commit 1be04cb571
21 changed files with 46 additions and 28 deletions

View File

@@ -0,0 +1,730 @@
transcription_lang = {
"Afrikaans":{
"South Africa":{
"Google": "af-ZA",
"Whisper": "af",
},
},
"Albanian":{
"Albania":{
"Google": "sq-AL",
"Whisper": "sq",
},
},
"Amharic":{
"Ethiopia":{
"Google": "am-ET",
"Whisper": "am",
},
},
"Arabic":{
"Algeria":{
"Google": "ar-DZ",
"Whisper": "ar",
},
"Bahrain":{
"Google": "ar-BH",
"Whisper": "ar",
},
"Egypt":{
"Google": "ar-EG",
"Whisper": "ar",
},
"Israel":{
"Google": "ar-IL",
"Whisper": "ar",
},
"Iraq":{
"Google": "ar-IQ",
"Whisper": "ar",
},
"Jordan":{
"Google": "ar-JO",
"Whisper": "ar",
},
"Kuwait":{
"Google": "ar-KW",
"Whisper": "ar",
},
"Lebanon":{
"Google": "ar-LB",
"Whisper": "ar",
},
"Mauritania":{
"Google": "ar-MR",
"Whisper": "ar",
},
"Morocco":{
"Google": "ar-MA",
"Whisper": "ar",
},
"Oman":{
"Google": "ar-OM",
"Whisper": "ar",
},
"Qatar":{
"Google": "ar-QA",
"Whisper": "ar",
},
"Saudi Arabia":{
"Google": "ar-SA",
"Whisper": "ar",
},
"Palestine":{
"Google": "ar-PS",
"Whisper": "ar",
},
"Syria":{
"Google": "ar-SY",
"Whisper": "ar",
},
"Tunisia":{
"Google": "ar-TN",
"Whisper": "ar",
},
"United Arab Emirates":{
"Google": "ar-AE",
"Whisper": "ar",
},
"Yemen":{
"Google": "ar-YE",
"Whisper": "ar",
},
},
"Armenian": {
"Armenia": {
"Google": "hy-AM",
"Whisper": "hy",
},
},
"Azerbaijani": {
"Azerbaijan": {
"Google": "az-AZ",
"Whisper": "az",
},
},
"Basque":{
"Spain":{
"Google": "eu-ES",
"Whisper": "eu",
},
},
"Bengali":{
"Bangladesh":{
"Google": "bn-BD",
"Whisper": "bn",
},
"India":{
"Google": "bn-IN",
"Whisper": "bn",
},
},
"Bosnian":{
"Bosnia and Herzegovina":{
"Google": "bs-BA",
"Whisper": "bs",
}
},
"Bulgarian":{
"Bulgaria":{
"Google": "bg-BG",
"Whisper": "bg",
},
},
"Burmese":{
"Myanmar":{
"Google": "my-MM",
"Whisper": "my",
},
},
"Catalan":{
"Spain":{
"Google": "ca-ES",
"Whisper": "ca",
},
},
"Chinese Simplified":{
"China":{
"Google": "cmn-Hans-CN",
"Whisper": "zh",
},
"Hong Kong":{
"Google": "cmn-Hans-HK",
"Whisper": "zh",
},
},
"Chinese Traditional":{
"Taiwan":{
"Google": "cmn-Hant-TW",
"Whisper": "zh",
},
"Hong Kong":{
"Google": "yue-Hant-HK",
"Whisper": "yue",
},
},
"Croatian":{
"Croatia":{
"Google": "hr-HR",
"Whisper": "hr",
},
},
"Czech":{
"Czech Republic":{
"Google": "cs-CZ",
"Whisper": "cs",
},
},
"Danish":{
"Denmark":{
"Google": "da-DK",
"Whisper": "da",
},
},
"Dutch":{
"Belgium":{
"Google": "nl-BE",
"Whisper": "nl",
},
"Netherlands":{
"Google": "nl-NL",
"Whisper": "nl",
},
},
"English": {
"Australia":{
"Google": "en-AU",
"Whisper": "en",
},
"Canada":{
"Google": "en-CA",
"Whisper": "en",
},
"Ghana":{
"Google": "en-GH",
"Whisper": "en",
},
"Hong Kong":{
"Google": "en-HK",
"Whisper": "en",
},
"India":{
"Google": "en-IN",
"Whisper": "en",
},
"Ireland":{
"Google": "en-IE",
"Whisper": "en",
},
"Kenya":{
"Google": "en-KE",
"Whisper": "en",
},
"New Zealand":{
"Google": "en-NZ",
"Whisper": "en",
},
"Nigeria":{
"Google": "en-NG",
"Whisper": "en",
},
"Philippines":{
"Google": "en-PH",
"Whisper": "en",
},
"Singapore":{
"Google": "en-SG",
"Whisper": "en",
},
"South Africa":{
"Google": "en-ZA",
"Whisper": "en",
},
"Tanzania":{
"Google": "en-TZ",
"Whisper": "en",
},
"United Kingdom":{
"Google": "en-GB",
"Whisper": "en",
},
"United States":{
"Google": "en-US",
"Whisper": "en",
},
},
"Estonian":{
"Estonia":{
"Google": "et-EE",
"Whisper": "et",
},
},
"Filipino":{
"Philippines":{
"Google": "fil-PH",
"Whisper": "tl",
},
},
"Finnish":{
"Finland":{
"Google": "fi-FI",
"Whisper": "fi",
},
},
"French":{
"Belgium":{
"Google": "fr-BE",
"Whisper": "fr",
},
"Canada":{
"Google": "fr-CA",
"Whisper": "fr",
},
"France":{
"Google": "fr-FR",
"Whisper": "fr",
},
"Switzerland":{
"Google": "fr-CH",
"Whisper": "fr",
},
},
"Galician":{
"Spain":{
"Google": "gl-ES",
"Whisper": "gl",
},
},
"Georgian":{
"Georgia":{
"Google": "ka-GE",
"Whisper": "ka",
},
},
"German":{
"Austria":{
"Google": "de-AT",
"Whisper": "de",
},
"Germany":{
"Google": "de-DE",
"Whisper": "de",
},
"Switzerland":{
"Google": "de-CH",
"Whisper": "de",
},
},
"Greek":{
"Greece":{
"Google": "el-GR",
"Whisper": "el",
},
},
"Gujarati":{
"India":{
"Google": "gu-IN",
"Whisper": "gu",
},
},
"Hebrew":{
"Israel":{
"Google": "iw-IL",
"Whisper": "he",
},
},
"Hindi": {
"India":{
"Google": "hi-IN",
"Whisper": "hi",
},
},
"Hungarian":{
"Hungary":{
"Google": "hu-HU",
"Whisper": "hu",
},
},
"Icelandic":{
"Iceland":{
"Google": "is-IS",
"Whisper": "is",
},
},
"Indonesian":{
"Indonesia":{
"Google": "id-ID",
"Whisper": "id",
},
},
"Italian":{
"Italy":{
"Google": "it-IT",
"Whisper": "it",
},
"Switzerland":{
"Google": "it-CH",
"Whisper": "it",
},
},
"Japanese":{
"Japan":{
"Google": "ja-JP",
"Whisper": "ja",
},
},
# "Javanese":{
# "Indonesia":{
# "Google": "jv-ID",
# },
# },
"Kannada":{
"India":{
"Google": "kn-IN",
"Whisper": "kn",
},
},
"Kazakh":{
"Kazakhstan":{
"Google": "kk-KZ",
"Whisper": "kk",
},
},
"Khmer":{
"Cambodia":{
"Google": "km-KH",
"Whisper": "km",
},
},
# "Kinyarwanda":{
# "rwanda":{
# "Google": "rw-RW",
# },
# },
"Korean":{
"South Korea":{
"Google": "ko-KR",
"Whisper": "ko",
},
},
"Lao":{
"Laos":{
"Google": "lo-LA",
"Whisper": "lo",
},
},
"Latvian":{
"Latvia":{
"Google": "lv-LV",
"Whisper": "lv",
},
},
"Lithuanian":{
"Lithuania":{
"Google": "lt-LT",
"Whisper": "lt",
},
},
"Macedonian":{
"North Macedonia":{
"Google": "mk-MK",
"Whisper": "mk",
},
},
"Malay":{
"Malaysia":{
"Google": "ms-MY",
"Whisper": "ms",
},
},
"Malayalam":{
"India":{
"Google": "ml-IN",
"Whisper": "ml",
},
},
"Mongolian":{
"Mongolia":{
"Google": "mn-MN",
"Whisper": "mn",
},
},
"Nepali":{
"Nepal":{
"Google": "ne-NP",
"Whisper": "ne",
},
},
"Norwegian":{
"Norway":{
"Google": "no-NO",
"Whisper": "no",
},
},
"Persian":{
"Iran":{
"Google": "fa-IR",
"Whisper": "fa",
},
},
"Polish":{
"Poland":{
"Google": "pl-PL",
"Whisper": "pl",
},
},
"Portuguese":{
"Brazil":{
"Google": "pt-BR",
"Whisper": "pt",
},
"Portugal":{
"Google": "pt-PT",
"Whisper": "pt",
},
},
# "Punjabi":{
# "India":{
# "Google": "pa-Guru-IN",
# },
# },
"Romanian":{
"Romania":{
"Google": "ro-RO",
"Whisper": "ro",
},
},
"Russian":{
"Russia":{
"Google": "ru-RU",
"Whisper": "ru",
},
},
"Serbian":{
"Serbia":{
"Google": "sr-RS",
"Whisper": "sr",
},
},
"Sinhala":{
"Sri Lanka":{
"Google": "si-LK",
"Whisper": "si",
},
},
"Slovak":{
"Slovakia":{
"Google": "sk-SK",
"Whisper": "sk",
},
},
"Slovenian":{
"Slovenia":{
"Google": "sl-SI",
"Whisper": "sl",
},
},
# "Sesotho":{
# "South Africa":{
# "Google": "st-ZA",
# },
# },
"Spanish":{
"Argentina":{
"Google": "es-AR",
"Whisper": "es",
},
"Bolivia":{
"Google": "es-BO",
"Whisper": "es",
},
"Chile":{
"Google": "es-CL",
"Whisper": "es",
},
"Colombia":{
"Google": "es-CO",
"Whisper": "es",
},
"Costa Rica":{
"Google": "es-CR",
"Whisper": "es",
},
"Dominican Republic":{
"Google": "es-DO",
"Whisper": "es",
},
"Ecuador":{
"Google": "es-EC",
"Whisper": "es",
},
"El Salvador":{
"Google": "es-SV",
"Whisper": "es",
},
"Guatemala":{
"Google": "es-GT",
"Whisper": "es",
},
"Honduras":{
"Google": "es-HN",
"Whisper": "es",
},
"Mexico":{
"Google": "es-MX",
"Whisper": "es",
},
"Nicaragua":{
"Google": "es-NI",
"Whisper": "es",
},
"Panama":{
"Google": "es-PA",
"Whisper": "es",
},
"Paraguay":{
"Google": "es-PY",
"Whisper": "es",
},
"Peru":{
"Google": "es-PE",
"Whisper": "es",
},
"Puerto Rico":{
"Google": "es-PR",
"Whisper": "es",
},
"Spain":{
"Google": "es-ES",
"Whisper": "es",
},
"United States":{
"Google": "es-US",
"Whisper": "es",
},
"Uruguay":{
"Google": "es-UY",
"Whisper": "es",
},
"Venezuela":{
"Google": "es-VE",
"Whisper": "es",
},
},
"Sundanese":{
"Indonesia":{
"Google": "su-ID",
"Whisper": "su",
},
},
"Swahili":{
"Kenya":{
"Google": "sw-KE",
"Whisper": "sw",
},
"Tanzania":{
"Google": "sw-TZ",
"Whisper": "sw",
},
},
# "Swazi":{
# "Eswatini":{
# "Google": "ss-Latn-ZA",
# },
# },
"Swedish":{
"Sweden":{
"Google": "sv-SE",
"Whisper": "sv",
},
},
"Tamil":{
"India":{
"Google": "ta-IN",
"Whisper": "ta",
},
"malaysia":{
"Google": "ta-MY",
"Whisper": "ta",
},
"Singapore":{
"Google": "ta-SG",
"Whisper": "ta",
},
"Sri Lanka":{
"Google": "ta-LK",
"Whisper": "ta",
},
},
"Telugu":{
"India":{
"Google": "te-IN",
"Whisper": "te",
},
},
"Thai":{
"Thailand":{
"Google": "th-TH",
"Whisper": "th",
},
},
# "Tsonga":{
# "South Africa":{
# "Google": "ts-ZA",
# },
# },
# "Setswana":{
# "South Africa":{
# "Google": "tn-Latn-ZA",
# },
# },
"Turkish":{
"Turkey":{
"Google": "tr-TR",
"Whisper": "tr",
},
},
"Ukrainian":{
"Ukraine":{
"Google": "uk-UA",
"Whisper": "uk",
},
},
"Urdu":{
"India":{
"Google": "ur-IN",
"Whisper": "ur",
},
"Pakistan":{
"Google": "ur-PK",
"Whisper": "ur",
},
},
"Uzbek":{
"Uzbekistan":{
"Google": "uz-UZ",
"Whisper": "uz",
},
},
# "Venda":{
# "South Africa":{
# "Google": "ve-ZA",
# },
# },
"Vietnamese":{
"Vietnam":{
"Google": "vi-VN",
"Whisper": "vi",
},
},
# "Xhosa":{
# "South Africa":{
# "Google": "xh-ZA",
# },
# },
# "Zulu":{
# "South Africa":{
# "Google": "zu-ZA",
# },
# },
}

View File

@@ -0,0 +1,142 @@
from speech_recognition import Recognizer, Microphone
from pyaudiowpatch import get_sample_size, paInt16
from datetime import datetime
from queue import Queue
class BaseRecorder:
def __init__(self, source, energy_threshold, dynamic_energy_threshold, record_timeout):
self.recorder = Recognizer()
self.recorder.energy_threshold = energy_threshold
self.recorder.dynamic_energy_threshold = dynamic_energy_threshold
self.record_timeout = record_timeout
self.stop = None
if source is None:
raise ValueError("audio source can't be None")
self.source = source
def adjustForNoise(self):
with self.source:
self.recorder.adjust_for_ambient_noise(self.source)
def recordIntoQueue(self, audio_queue):
def record_callback(_, audio):
audio_queue.put((audio.get_raw_data(), datetime.now()))
self.stop, self.pause, self.resume = self.recorder.listen_in_background(self.source, record_callback, phrase_time_limit=self.record_timeout)
class SelectedMicRecorder(BaseRecorder):
def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout):
source=Microphone(
device_index=device['index'],
sample_rate=int(device["defaultSampleRate"]),
)
super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout)
# self.adjustForNoise()
class SelectedSpeakerRecorder(BaseRecorder):
def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout):
source = Microphone(speaker=True,
device_index= device["index"],
sample_rate=int(device["defaultSampleRate"]),
chunk_size=get_sample_size(paInt16),
channels=device["maxInputChannels"]
)
super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout)
# self.adjustForNoise()
class BaseEnergyRecorder:
def __init__(self, source):
self.recorder = Recognizer()
self.recorder.energy_threshold = 0
self.recorder.dynamic_energy_threshold = False
self.record_timeout = 0
self.stop = None
if source is None:
raise ValueError("audio source can't be None")
self.source = source
def adjustForNoise(self):
with self.source:
self.recorder.adjust_for_ambient_noise(self.source)
def recordIntoQueue(self, energy_queue):
def recordCallback(_, energy):
energy_queue.put(energy)
self.stop, self.pause, self.resume = self.recorder.listen_energy_in_background(self.source, recordCallback)
class SelectedMicEnergyRecorder(BaseEnergyRecorder):
def __init__(self, device):
source=Microphone(
device_index=device['index'],
sample_rate=int(device["defaultSampleRate"]),
)
super().__init__(source=source)
# self.adjustForNoise()
class SelectedSpeakerEnergyRecorder(BaseEnergyRecorder):
def __init__(self, device):
source = Microphone(speaker=True,
device_index= device["index"],
sample_rate=int(device["defaultSampleRate"]),
channels=device["maxInputChannels"]
)
super().__init__(source=source)
# self.adjustForNoise()
class BaseEnergyAndAudioRecorder:
def __init__(self, source, energy_threshold, dynamic_energy_threshold, record_timeout):
self.recorder = Recognizer()
self.recorder.energy_threshold = energy_threshold
self.recorder.dynamic_energy_threshold = dynamic_energy_threshold
self.record_timeout = record_timeout
self.stop = None
if source is None:
raise ValueError("audio source can't be None")
self.source = source
def adjustForNoise(self):
with self.source:
self.recorder.adjust_for_ambient_noise(self.source)
def recordIntoQueue(self, audio_queue, energy_queue=None):
def audioRecordCallback(_, audio):
audio_queue.put((audio.get_raw_data(), datetime.now()))
def energyRecordCallback(energy):
energy_queue.put(energy)
self.stop, self.pause, self.resume = self.recorder.listen_energy_and_audio_in_background(
source=self.source,
callback=audioRecordCallback,
phrase_time_limit=self.record_timeout,
callback_energy=energyRecordCallback if energy_queue is not None else None)
class SelectedMicEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder):
def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout):
source=Microphone(
device_index=device['index'],
sample_rate=int(device["defaultSampleRate"]),
)
super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout)
# self.adjustForNoise()
class SelectedSpeakerEnergyAndAudioRecorder(BaseEnergyAndAudioRecorder):
def __init__(self, device, energy_threshold, dynamic_energy_threshold, record_timeout):
source = Microphone(speaker=True,
device_index= device["index"],
sample_rate=int(device["defaultSampleRate"]),
chunk_size=get_sample_size(paInt16),
channels=device["maxInputChannels"]
)
super().__init__(source=source, energy_threshold=energy_threshold, dynamic_energy_threshold=dynamic_energy_threshold, record_timeout=record_timeout)
# self.adjustForNoise()

View File

@@ -0,0 +1,141 @@
import time
from io import BytesIO
from threading import Event
import wave
from speech_recognition import Recognizer, AudioData, AudioFile
from datetime import timedelta
from pyaudiowpatch import get_sample_size, paInt16
from .transcription_languages import transcription_lang
from .transcription_whisper import getWhisperModel, checkWhisperWeight
import torch
import numpy as np
from pydub import AudioSegment
PHRASE_TIMEOUT = 3
MAX_PHRASES = 10
class AudioTranscriber:
def __init__(self, speaker, source, phrase_timeout, max_phrases, transcription_engine, root=None, whisper_weight_type=None):
self.speaker = speaker
self.phrase_timeout = phrase_timeout
self.max_phrases = max_phrases
self.transcript_data = []
self.transcript_changed_event = Event()
self.audio_recognizer = Recognizer()
self.transcription_engine = "Google"
self.whisper_model = None
self.audio_sources = {
"sample_rate": source.SAMPLE_RATE,
"sample_width": source.SAMPLE_WIDTH,
"channels": source.channels,
"last_sample": bytes(),
"last_spoken": None,
"new_phrase": True,
"process_data_func": self.processSpeakerData if speaker else self.processSpeakerData
}
if transcription_engine == "Whisper" and checkWhisperWeight(root, whisper_weight_type) is True:
self.whisper_model = getWhisperModel(root, whisper_weight_type)
self.transcription_engine = "Whisper"
def transcribeAudioQueue(self, audio_queue, language, country, avg_logprob=-0.8, no_speech_prob=0.6):
if audio_queue.empty():
time.sleep(0.01)
return False
audio, time_spoken = audio_queue.get()
self.updateLastSampleAndPhraseStatus(audio, time_spoken)
text = ''
try:
audio_data = self.audio_sources["process_data_func"]()
match self.transcription_engine:
case "Google":
text = self.audio_recognizer.recognize_google(audio_data, language=transcription_lang[language][country][self.transcription_engine])
case "Whisper":
audio_data = np.frombuffer(audio_data.get_raw_data(convert_rate=16000, convert_width=2), np.int16).flatten().astype(np.float32) / 32768.0
if isinstance(audio_data, torch.Tensor):
audio_data = audio_data.detach().numpy()
segments, _ = self.whisper_model.transcribe(
audio_data,
beam_size=5,
temperature=0.0,
log_prob_threshold=-0.8,
no_speech_threshold=0.6,
language=transcription_lang[language][country][self.transcription_engine],
word_timestamps=False,
without_timestamps=True,
task="transcribe",
vad_filter=False,
)
for s in segments:
if s.avg_logprob < avg_logprob or s.no_speech_prob > no_speech_prob:
continue
text += s.text
except Exception:
pass
finally:
pass
if text != '':
self.updateTranscript(text)
return True
def updateLastSampleAndPhraseStatus(self, data, time_spoken):
source_info = self.audio_sources
if source_info["last_spoken"] and time_spoken - source_info["last_spoken"] > timedelta(seconds=self.phrase_timeout):
source_info["last_sample"] = bytes()
source_info["new_phrase"] = True
else:
source_info["new_phrase"] = False
source_info["last_sample"] += data
source_info["last_spoken"] = time_spoken
def processMicData(self):
audio_data = AudioData(self.audio_sources["last_sample"], self.audio_sources["sample_rate"], self.audio_sources["sample_width"])
return audio_data
def processSpeakerData(self):
temp_file = BytesIO()
with wave.open(temp_file, 'wb') as wf:
wf.setnchannels(self.audio_sources["channels"])
wf.setsampwidth(get_sample_size(paInt16))
wf.setframerate(self.audio_sources["sample_rate"])
wf.writeframes(self.audio_sources["last_sample"])
temp_file.seek(0)
if self.audio_sources["channels"] > 2:
audio = AudioSegment.from_file(temp_file, format="wav")
mono_audio = audio.set_channels(1)
temp_file = BytesIO()
mono_audio.export(temp_file, format="wav")
temp_file.seek(0)
with AudioFile(temp_file) as source:
audio = self.audio_recognizer.record(source)
return audio
def updateTranscript(self, text):
source_info = self.audio_sources
transcript = self.transcript_data
if source_info["new_phrase"] or len(transcript) == 0:
if len(transcript) > self.max_phrases:
transcript.pop(-1)
transcript.insert(0, text)
else:
transcript[0] = text
def getTranscript(self):
if len(self.transcript_data) > 0:
text = self.transcript_data.pop(-1)
else:
text = ""
return text
def clearTranscriptData(self):
self.transcript_data.clear()
self.audio_sources["last_sample"] = bytes()
self.audio_sources["new_phrase"] = True

View File

@@ -0,0 +1,70 @@
from pyaudiowpatch import PyAudio, paWASAPI
def getInputDevices():
devices = {}
with PyAudio() as p:
for host_index in range(0, p.get_host_api_count()):
host = p.get_host_api_info_by_index(host_index)
for device_index in range(0, p.get_host_api_info_by_index(host_index)['deviceCount']):
device = p.get_device_info_by_host_api_device_index(host_index, device_index)
if device["maxInputChannels"] > 0 and device["isLoopbackDevice"] is False:
if host["name"] in devices.keys():
devices[host["name"]].append(device)
else:
devices[host["name"]] = [device]
if len(devices) == 0:
devices = {"NoHost": [{"name": "NoDevice"}]}
return devices
def getDefaultInputDevice():
with PyAudio() as p:
api_info = p.get_default_host_api_info()
defaultInputDevice = api_info["defaultInputDevice"]
for host_index in range(0, p.get_host_api_count()):
host = p.get_host_api_info_by_index(host_index)
for device_index in range(0, p.get_host_api_info_by_index(host_index)['deviceCount']):
device = p.get_device_info_by_host_api_device_index(host_index, device_index)
if device["index"] == defaultInputDevice:
return {"host": host, "device": device}
return {"host": {"name": "NoHost"}, "device": {"name": "NoDevice"}}
def getOutputDevices():
devices = []
with PyAudio() as p:
wasapi_info = p.get_host_api_info_by_type(paWASAPI)
for host_index in range(0, p.get_host_api_count()):
host = p.get_host_api_info_by_index(host_index)
if host["name"] == wasapi_info["name"]:
for device_index in range(0, p.get_host_api_info_by_index(host_index)['deviceCount']):
device = p.get_device_info_by_host_api_device_index(host_index, device_index)
if not device["isLoopbackDevice"]:
for loopback in p.get_loopback_device_info_generator():
if device["name"] in loopback["name"]:
devices.append(loopback)
if len(devices) == 0:
devices = [{"name": "NoDevice"}]
else:
devices = [dict(t) for t in {tuple(d.items()) for d in devices}]
return devices
def getDefaultOutputDevice():
with PyAudio() as p:
wasapi_info = p.get_host_api_info_by_type(paWASAPI)
defaultOutputDevice = wasapi_info["defaultOutputDevice"]
for host_index in range(0, p.get_host_api_count()):
for device_index in range(0, p. get_host_api_info_by_index(host_index)['deviceCount']):
device = p.get_device_info_by_host_api_device_index(host_index, device_index)
if device["index"] == defaultOutputDevice:
default_speakers = device
if not default_speakers["isLoopbackDevice"]:
for loopback in p.get_loopback_device_info_generator():
if default_speakers["name"] in loopback["name"]:
return {"device": loopback}
return {"device": {"name": "NoDevice"}}
if __name__ == "__main__":
print("getOutputDevices()", getOutputDevices())
print("getDefaultOutputDevice()", getDefaultOutputDevice())

View File

@@ -0,0 +1,98 @@
from os import path as os_path, makedirs as os_makedirs
from requests import get as requests_get
from typing import Callable
import huggingface_hub
from faster_whisper import WhisperModel
import logging
logger = logging.getLogger('faster_whisper')
logger.setLevel(logging.CRITICAL)
_MODELS = {
"tiny": "Systran/faster-whisper-tiny",
"base": "Systran/faster-whisper-base",
"small": "Systran/faster-whisper-small",
"medium": "Systran/faster-whisper-medium",
"large-v1": "Systran/faster-whisper-large-v1",
"large-v2": "Systran/faster-whisper-large-v2",
"large-v3": "Systran/faster-whisper-large-v3",
}
_FILENAMES = [
"config.json",
"preprocessor_config.json",
"model.bin",
"tokenizer.json",
"vocabulary.txt",
"vocabulary.json",
]
def downloadFile(url, path, func=None):
try:
res = requests_get(url, stream=True)
res.raise_for_status()
file_size = int(res.headers.get('content-length', 0))
total_chunk = 0
with open(os_path.join(path), 'wb') as file:
for chunk in res.iter_content(chunk_size=1024*5):
file.write(chunk)
if isinstance(func, Callable):
total_chunk += len(chunk)
func(total_chunk/file_size)
except Exception as e:
print("error:downloadFile()", e)
def checkWhisperWeight(root, weight_type):
path = os_path.join(root, "weights", "whisper", weight_type)
result = False
try:
WhisperModel(
path,
device="cpu",
device_index=0,
compute_type="int8",
cpu_threads=4,
num_workers=1,
local_files_only=True,
)
result = True
except Exception:
pass
return result
def downloadWhisperWeight(root, weight_type, callbackFunc):
path = os_path.join(root, "weights", "whisper", weight_type)
os_makedirs(path, exist_ok=True)
if checkWhisperWeight(root, weight_type) is True:
return
for filename in _FILENAMES:
print("Downloading", filename, "...")
file_path = os_path.join(path, filename)
url = huggingface_hub.hf_hub_url(_MODELS[weight_type], filename)
downloadFile(url, file_path, func=callbackFunc)
def getWhisperModel(root, weight_type):
path = os_path.join(root, "weights", "whisper", weight_type)
return WhisperModel(
path,
device="cpu",
device_index=0,
compute_type="int8",
cpu_threads=4,
num_workers=1,
local_files_only=True,
)
if __name__ == "__main__":
def callback(value):
print(value)
pass
downloadWhisperWeight("./", "tiny", callback)
downloadWhisperWeight("./", "base", callback)
downloadWhisperWeight("./", "small", callback)
downloadWhisperWeight("./", "medium", callback)
downloadWhisperWeight("./", "large-v1", callback)
downloadWhisperWeight("./", "large-v2", callback)
downloadWhisperWeight("./", "large-v3", callback)