[WIP/TEST] Model : モデルの保存位置の変更

- speakerの文字起こし処理のバグを修正
This commit is contained in:
misyaguziya
2024-02-01 15:49:17 +09:00
parent 7cb8c473d4
commit 1de239549f
6 changed files with 23 additions and 27 deletions

2
.gitignore vendored
View File

@@ -6,7 +6,7 @@ VRCT.spec
*.pyc *.pyc
logs/ logs/
.venv/ .venv/
weight/ weights/
.vscode .vscode
error.log error.log
*.exe *.exe

View File

@@ -8,14 +8,13 @@ if __name__ == "__main__":
splash.showSplash() splash.showSplash()
from config import config from config import config
from models.translation.utils import downloadCTranslate2Weight from models.translation.translation_utils import downloadCTranslate2Weight
if config.USE_TRANSLATION_FEATURE is True: if config.USE_TRANSLATION_FEATURE is True:
downloadCTranslate2Weight(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE, splash.updateDownloadProgress) downloadCTranslate2Weight(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE, splash.updateDownloadProgress)
splash.toProgress(0)
from models.transcription.transcription_whisper import downloadWhisperWeight
# whisperのダウンロードの説明に変更する必要あり # whisperのダウンロードの説明に変更する必要あり
if config.USE_WHISPER_FEATURE is True: if config.USE_WHISPER_FEATURE is True:
from models.transcription.transcription_whisper import downloadWhisperWeight
downloadWhisperWeight(config.PATH_LOCAL, config.WHISPER_WEIGHT_TYPE, splash.updateDownloadProgress) downloadWhisperWeight(config.PATH_LOCAL, config.WHISPER_WEIGHT_TYPE, splash.updateDownloadProgress)
splash.toProgress(0) splash.toProgress(0)

View File

@@ -23,7 +23,7 @@ from models.transcription.transcription_transcriber import AudioTranscriber
from models.xsoverlay.notification import xsoverlayForVRCT from models.xsoverlay.notification import xsoverlayForVRCT
from models.translation.translation_languages import translation_lang from models.translation.translation_languages import translation_lang
from models.transcription.transcription_languages import transcription_lang from models.transcription.transcription_languages import transcription_lang
from models.translation.utils import checkCTranslate2Weight from models.translation.translation_utils import checkCTranslate2Weight
from config import config from config import config
class threadFnc(Thread): class threadFnc(Thread):
@@ -424,7 +424,7 @@ class Model:
root=config.PATH_LOCAL, root=config.PATH_LOCAL,
) )
def sendSpeakerTranscript(): def sendSpeakerTranscript():
speaker_transcriber.transcribeAudioQueue(speaker_audio_queue, config.TARGET_LANGUAGE, config.TARGET_COUNTRY) speaker_transcriber.transcribeAudioQueue(config.SELECTED_RECOGNIZER, speaker_audio_queue, config.TARGET_LANGUAGE, config.TARGET_COUNTRY)
message = speaker_transcriber.getTranscript() message = speaker_transcriber.getTranscript()
try: try:
fnc(message) fnc(message)

View File

@@ -60,10 +60,9 @@ def checkWhisperWeight(path):
return result return result
def downloadWhisperWeight(root, weight_type, callbackFunc): def downloadWhisperWeight(root, weight_type, callbackFunc):
path = os_path.join(root, "weight", "whisper", weight_type) path = os_path.join(root, "weights", "whisper", weight_type)
os_makedirs(path, exist_ok=True) os_makedirs(path, exist_ok=True)
if checkWhisperWeight(path) is True: if checkWhisperWeight(path) is True:
print("weight_type:", weight_type, checkWhisperWeight(path))
return return
for filename in _FILENAMES: for filename in _FILENAMES:
@@ -72,10 +71,8 @@ def downloadWhisperWeight(root, weight_type, callbackFunc):
url = huggingface_hub.hf_hub_url(_MODELS[weight_type], filename) url = huggingface_hub.hf_hub_url(_MODELS[weight_type], filename)
downloadFile(url, file_path, func=callbackFunc) downloadFile(url, file_path, func=callbackFunc)
print("weight_type:", weight_type, checkWhisperWeight(path))
def getWhisperModel(root, weight_type): def getWhisperModel(root, weight_type):
path = os_path.join(root, "weight", "whisper", weight_type) path = os_path.join(root, "weights", "whisper", weight_type)
return WhisperModel( return WhisperModel(
path, path,
device="cpu", device="cpu",

View File

@@ -2,7 +2,7 @@ import os
from deepl import Translator as deepl_Translator from deepl import Translator as deepl_Translator
from translators import translate_text as other_web_Translator from translators import translate_text as other_web_Translator
from .translation_languages import translation_lang from .translation_languages import translation_lang
from .utils import ctranslate2_weights from .translation_utils import ctranslate2_weights
import ctranslate2 import ctranslate2
import transformers import transformers
@@ -27,8 +27,8 @@ class Translator():
def changeCTranslate2Model(self, path, model_type): def changeCTranslate2Model(self, path, model_type):
directory_name = ctranslate2_weights[model_type]["directory_name"] directory_name = ctranslate2_weights[model_type]["directory_name"]
tokenizer = ctranslate2_weights[model_type]["tokenizer"] tokenizer = ctranslate2_weights[model_type]["tokenizer"]
weight_path = os.path.join(path, "weight", directory_name) weight_path = os.path.join(path, "weights", "ctranslate2", directory_name)
tokenizer_path = os.path.join(path, "weight", directory_name, "tokenizer") tokenizer_path = os.path.join(path, "weights", "ctranslate2", directory_name, "tokenizer")
self.ctranslate2_translator = ctranslate2.Translator( self.ctranslate2_translator = ctranslate2.Translator(
weight_path, weight_path,
device="cpu", device="cpu",
@@ -41,7 +41,7 @@ class Translator():
self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path) self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)
except Exception as e: except Exception as e:
print("Error: changeCTranslate2Model()", e) print("Error: changeCTranslate2Model()", e)
tokenizer_path = os.path.join("./weight", directory_name, "tokenizer") tokenizer_path = os.path.join("./weights", "ctranslate2", directory_name, "tokenizer")
self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path) self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)
@staticmethod @staticmethod

View File

@@ -39,36 +39,36 @@ def calculate_file_hash(file_path, block_size=65536):
return hash_object.hexdigest() return hash_object.hexdigest()
def checkCTranslate2Weight(path, weight_type="Small"): def checkCTranslate2Weight(path, weight_type="Small"):
directory_name = 'weight'
current_directory = path
weight_directory_name = ctranslate2_weights[weight_type]["directory_name"] weight_directory_name = ctranslate2_weights[weight_type]["directory_name"]
hash_data = ctranslate2_weights[weight_type]["hash"] hash_data = ctranslate2_weights[weight_type]["hash"]
files = ["model.bin", "sentencepiece.model", "shared_vocabulary.txt"] files = [
"model.bin",
"sentencepiece.model",
"shared_vocabulary.txt"
]
# check already downloaded # check already downloaded
already_downloaded = False already_downloaded = False
if all(os_path.exists(os_path.join(current_directory, directory_name, weight_directory_name, file)) for file in files): if all(os_path.exists(os_path.join(path, weight_directory_name, file)) for file in files):
# check hash # check hash
for file in files: for file in files:
original_hash = hash_data[file] original_hash = hash_data[file]
current_hash = calculate_file_hash(os_path.join(current_directory, directory_name, weight_directory_name, file)) current_hash = calculate_file_hash(os_path.join(path, weight_directory_name, file))
if original_hash != current_hash: if original_hash != current_hash:
break break
already_downloaded = True already_downloaded = True
return already_downloaded return already_downloaded
def downloadCTranslate2Weight(path, weight_type="Small", func=None): def downloadCTranslate2Weight(root, weight_type="Small", func=None):
url = ctranslate2_weights[weight_type]["url"] url = ctranslate2_weights[weight_type]["url"]
filename = 'weight.zip' filename = "weight.zip"
directory_name = 'weight' path = os_path.join(root, "weights", "ctranslate2")
current_directory = path os_makedirs(path, exist_ok=True)
if checkCTranslate2Weight(path, weight_type): if checkCTranslate2Weight(path, weight_type):
return return
try: try:
os_makedirs(os_path.join(current_directory, directory_name), exist_ok=True)
print(os_path.join(current_directory, directory_name))
with tempfile.TemporaryDirectory() as tmp_path: with tempfile.TemporaryDirectory() as tmp_path:
res = requests_get(url, stream=True) res = requests_get(url, stream=True)
file_size = int(res.headers.get('content-length', 0)) file_size = int(res.headers.get('content-length', 0))
@@ -81,6 +81,6 @@ def downloadCTranslate2Weight(path, weight_type="Small", func=None):
func(total_chunk/file_size) func(total_chunk/file_size)
with ZipFile(os_path.join(tmp_path, filename)) as zf: with ZipFile(os_path.join(tmp_path, filename)) as zf:
zf.extractall(os_path.join(current_directory, directory_name)) zf.extractall(path)
except Exception as e: except Exception as e:
print("error:downloadCTranslate2Weight()", e) print("error:downloadCTranslate2Weight()", e)