[WIP/TEST] Model : モデルの保存位置の変更
- speakerの文字起こし処理のバグを修正
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -6,7 +6,7 @@ VRCT.spec
|
|||||||
*.pyc
|
*.pyc
|
||||||
logs/
|
logs/
|
||||||
.venv/
|
.venv/
|
||||||
weight/
|
weights/
|
||||||
.vscode
|
.vscode
|
||||||
error.log
|
error.log
|
||||||
*.exe
|
*.exe
|
||||||
|
|||||||
5
main.py
5
main.py
@@ -8,14 +8,13 @@ if __name__ == "__main__":
|
|||||||
splash.showSplash()
|
splash.showSplash()
|
||||||
|
|
||||||
from config import config
|
from config import config
|
||||||
from models.translation.utils import downloadCTranslate2Weight
|
from models.translation.translation_utils import downloadCTranslate2Weight
|
||||||
if config.USE_TRANSLATION_FEATURE is True:
|
if config.USE_TRANSLATION_FEATURE is True:
|
||||||
downloadCTranslate2Weight(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE, splash.updateDownloadProgress)
|
downloadCTranslate2Weight(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHT_TYPE, splash.updateDownloadProgress)
|
||||||
splash.toProgress(0)
|
|
||||||
|
|
||||||
|
from models.transcription.transcription_whisper import downloadWhisperWeight
|
||||||
# whisperのダウンロードの説明に変更する必要あり
|
# whisperのダウンロードの説明に変更する必要あり
|
||||||
if config.USE_WHISPER_FEATURE is True:
|
if config.USE_WHISPER_FEATURE is True:
|
||||||
from models.transcription.transcription_whisper import downloadWhisperWeight
|
|
||||||
downloadWhisperWeight(config.PATH_LOCAL, config.WHISPER_WEIGHT_TYPE, splash.updateDownloadProgress)
|
downloadWhisperWeight(config.PATH_LOCAL, config.WHISPER_WEIGHT_TYPE, splash.updateDownloadProgress)
|
||||||
splash.toProgress(0)
|
splash.toProgress(0)
|
||||||
|
|
||||||
|
|||||||
4
model.py
4
model.py
@@ -23,7 +23,7 @@ from models.transcription.transcription_transcriber import AudioTranscriber
|
|||||||
from models.xsoverlay.notification import xsoverlayForVRCT
|
from models.xsoverlay.notification import xsoverlayForVRCT
|
||||||
from models.translation.translation_languages import translation_lang
|
from models.translation.translation_languages import translation_lang
|
||||||
from models.transcription.transcription_languages import transcription_lang
|
from models.transcription.transcription_languages import transcription_lang
|
||||||
from models.translation.utils import checkCTranslate2Weight
|
from models.translation.translation_utils import checkCTranslate2Weight
|
||||||
from config import config
|
from config import config
|
||||||
|
|
||||||
class threadFnc(Thread):
|
class threadFnc(Thread):
|
||||||
@@ -424,7 +424,7 @@ class Model:
|
|||||||
root=config.PATH_LOCAL,
|
root=config.PATH_LOCAL,
|
||||||
)
|
)
|
||||||
def sendSpeakerTranscript():
|
def sendSpeakerTranscript():
|
||||||
speaker_transcriber.transcribeAudioQueue(speaker_audio_queue, config.TARGET_LANGUAGE, config.TARGET_COUNTRY)
|
speaker_transcriber.transcribeAudioQueue(config.SELECTED_RECOGNIZER, speaker_audio_queue, config.TARGET_LANGUAGE, config.TARGET_COUNTRY)
|
||||||
message = speaker_transcriber.getTranscript()
|
message = speaker_transcriber.getTranscript()
|
||||||
try:
|
try:
|
||||||
fnc(message)
|
fnc(message)
|
||||||
|
|||||||
@@ -60,10 +60,9 @@ def checkWhisperWeight(path):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
def downloadWhisperWeight(root, weight_type, callbackFunc):
|
def downloadWhisperWeight(root, weight_type, callbackFunc):
|
||||||
path = os_path.join(root, "weight", "whisper", weight_type)
|
path = os_path.join(root, "weights", "whisper", weight_type)
|
||||||
os_makedirs(path, exist_ok=True)
|
os_makedirs(path, exist_ok=True)
|
||||||
if checkWhisperWeight(path) is True:
|
if checkWhisperWeight(path) is True:
|
||||||
print("weight_type:", weight_type, checkWhisperWeight(path))
|
|
||||||
return
|
return
|
||||||
|
|
||||||
for filename in _FILENAMES:
|
for filename in _FILENAMES:
|
||||||
@@ -72,10 +71,8 @@ def downloadWhisperWeight(root, weight_type, callbackFunc):
|
|||||||
url = huggingface_hub.hf_hub_url(_MODELS[weight_type], filename)
|
url = huggingface_hub.hf_hub_url(_MODELS[weight_type], filename)
|
||||||
downloadFile(url, file_path, func=callbackFunc)
|
downloadFile(url, file_path, func=callbackFunc)
|
||||||
|
|
||||||
print("weight_type:", weight_type, checkWhisperWeight(path))
|
|
||||||
|
|
||||||
def getWhisperModel(root, weight_type):
|
def getWhisperModel(root, weight_type):
|
||||||
path = os_path.join(root, "weight", "whisper", weight_type)
|
path = os_path.join(root, "weights", "whisper", weight_type)
|
||||||
return WhisperModel(
|
return WhisperModel(
|
||||||
path,
|
path,
|
||||||
device="cpu",
|
device="cpu",
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import os
|
|||||||
from deepl import Translator as deepl_Translator
|
from deepl import Translator as deepl_Translator
|
||||||
from translators import translate_text as other_web_Translator
|
from translators import translate_text as other_web_Translator
|
||||||
from .translation_languages import translation_lang
|
from .translation_languages import translation_lang
|
||||||
from .utils import ctranslate2_weights
|
from .translation_utils import ctranslate2_weights
|
||||||
|
|
||||||
import ctranslate2
|
import ctranslate2
|
||||||
import transformers
|
import transformers
|
||||||
@@ -27,8 +27,8 @@ class Translator():
|
|||||||
def changeCTranslate2Model(self, path, model_type):
|
def changeCTranslate2Model(self, path, model_type):
|
||||||
directory_name = ctranslate2_weights[model_type]["directory_name"]
|
directory_name = ctranslate2_weights[model_type]["directory_name"]
|
||||||
tokenizer = ctranslate2_weights[model_type]["tokenizer"]
|
tokenizer = ctranslate2_weights[model_type]["tokenizer"]
|
||||||
weight_path = os.path.join(path, "weight", directory_name)
|
weight_path = os.path.join(path, "weights", "ctranslate2", directory_name)
|
||||||
tokenizer_path = os.path.join(path, "weight", directory_name, "tokenizer")
|
tokenizer_path = os.path.join(path, "weights", "ctranslate2", directory_name, "tokenizer")
|
||||||
self.ctranslate2_translator = ctranslate2.Translator(
|
self.ctranslate2_translator = ctranslate2.Translator(
|
||||||
weight_path,
|
weight_path,
|
||||||
device="cpu",
|
device="cpu",
|
||||||
@@ -41,7 +41,7 @@ class Translator():
|
|||||||
self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)
|
self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Error: changeCTranslate2Model()", e)
|
print("Error: changeCTranslate2Model()", e)
|
||||||
tokenizer_path = os.path.join("./weight", directory_name, "tokenizer")
|
tokenizer_path = os.path.join("./weights", "ctranslate2", directory_name, "tokenizer")
|
||||||
self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)
|
self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
@@ -39,36 +39,36 @@ def calculate_file_hash(file_path, block_size=65536):
|
|||||||
return hash_object.hexdigest()
|
return hash_object.hexdigest()
|
||||||
|
|
||||||
def checkCTranslate2Weight(path, weight_type="Small"):
|
def checkCTranslate2Weight(path, weight_type="Small"):
|
||||||
directory_name = 'weight'
|
|
||||||
current_directory = path
|
|
||||||
weight_directory_name = ctranslate2_weights[weight_type]["directory_name"]
|
weight_directory_name = ctranslate2_weights[weight_type]["directory_name"]
|
||||||
hash_data = ctranslate2_weights[weight_type]["hash"]
|
hash_data = ctranslate2_weights[weight_type]["hash"]
|
||||||
files = ["model.bin", "sentencepiece.model", "shared_vocabulary.txt"]
|
files = [
|
||||||
|
"model.bin",
|
||||||
|
"sentencepiece.model",
|
||||||
|
"shared_vocabulary.txt"
|
||||||
|
]
|
||||||
|
|
||||||
# check already downloaded
|
# check already downloaded
|
||||||
already_downloaded = False
|
already_downloaded = False
|
||||||
if all(os_path.exists(os_path.join(current_directory, directory_name, weight_directory_name, file)) for file in files):
|
if all(os_path.exists(os_path.join(path, weight_directory_name, file)) for file in files):
|
||||||
# check hash
|
# check hash
|
||||||
for file in files:
|
for file in files:
|
||||||
original_hash = hash_data[file]
|
original_hash = hash_data[file]
|
||||||
current_hash = calculate_file_hash(os_path.join(current_directory, directory_name, weight_directory_name, file))
|
current_hash = calculate_file_hash(os_path.join(path, weight_directory_name, file))
|
||||||
if original_hash != current_hash:
|
if original_hash != current_hash:
|
||||||
break
|
break
|
||||||
already_downloaded = True
|
already_downloaded = True
|
||||||
return already_downloaded
|
return already_downloaded
|
||||||
|
|
||||||
def downloadCTranslate2Weight(path, weight_type="Small", func=None):
|
def downloadCTranslate2Weight(root, weight_type="Small", func=None):
|
||||||
url = ctranslate2_weights[weight_type]["url"]
|
url = ctranslate2_weights[weight_type]["url"]
|
||||||
filename = 'weight.zip'
|
filename = "weight.zip"
|
||||||
directory_name = 'weight'
|
path = os_path.join(root, "weights", "ctranslate2")
|
||||||
current_directory = path
|
os_makedirs(path, exist_ok=True)
|
||||||
|
|
||||||
if checkCTranslate2Weight(path, weight_type):
|
if checkCTranslate2Weight(path, weight_type):
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
os_makedirs(os_path.join(current_directory, directory_name), exist_ok=True)
|
|
||||||
print(os_path.join(current_directory, directory_name))
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_path:
|
with tempfile.TemporaryDirectory() as tmp_path:
|
||||||
res = requests_get(url, stream=True)
|
res = requests_get(url, stream=True)
|
||||||
file_size = int(res.headers.get('content-length', 0))
|
file_size = int(res.headers.get('content-length', 0))
|
||||||
@@ -81,6 +81,6 @@ def downloadCTranslate2Weight(path, weight_type="Small", func=None):
|
|||||||
func(total_chunk/file_size)
|
func(total_chunk/file_size)
|
||||||
|
|
||||||
with ZipFile(os_path.join(tmp_path, filename)) as zf:
|
with ZipFile(os_path.join(tmp_path, filename)) as zf:
|
||||||
zf.extractall(os_path.join(current_directory, directory_name))
|
zf.extractall(path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("error:downloadCTranslate2Weight()", e)
|
print("error:downloadCTranslate2Weight()", e)
|
||||||
Reference in New Issue
Block a user