CTranslate2に対応する翻訳言語の設定を追加し、翻訳ロジックを更新
This commit is contained in:
@@ -222,6 +222,10 @@ class Model:
|
||||
transcription_langs = list(transcription_lang.keys())
|
||||
translation_langs = []
|
||||
for tl_key in translation_lang.keys():
|
||||
if tl_key == "CTranslate2":
|
||||
for lang in translation_lang[tl_key][config.CTRANSLATE2_WEIGHT_TYPE]["source"]:
|
||||
translation_langs.append(lang)
|
||||
else:
|
||||
for lang in translation_lang[tl_key]["source"]:
|
||||
translation_langs.append(lang)
|
||||
translation_langs = list(set(translation_langs))
|
||||
@@ -243,6 +247,9 @@ class Model:
|
||||
selectable_engines = [key for key, value in engines_status.items() if value is True]
|
||||
compatible_engines = []
|
||||
for engine in list(translation_lang.keys()):
|
||||
if engine == "CTranslate2":
|
||||
languages = translation_lang.get(engine, {}).get(config.CTRANSLATE2_WEIGHT_TYPE, {}).get("source", {})
|
||||
else:
|
||||
languages = translation_lang.get(engine, {}).get("source", {})
|
||||
source_langs = [e["language"] for e in list(source_lang.values()) if e["enable"] is True]
|
||||
target_langs = [e["language"] for e in list(target_lang.values()) if e["enable"] is True]
|
||||
|
||||
@@ -372,8 +372,9 @@ dict_m2m100_languages = {
|
||||
"Sundanese": "su"
|
||||
}
|
||||
|
||||
translation_lang["m2m100_418M-ct2-int8"] = {"source":dict_m2m100_languages, "target":dict_m2m100_languages}
|
||||
translation_lang["m2m100_1.2B-ct2-int8"] = {"source":dict_m2m100_languages, "target":dict_m2m100_languages}
|
||||
translation_lang["CTranslate2"] = {}
|
||||
translation_lang["CTranslate2"]["m2m100_418M-ct2-int8"] = {"source":dict_m2m100_languages, "target":dict_m2m100_languages}
|
||||
translation_lang["CTranslate2"]["m2m100_1.2B-ct2-int8"] = {"source":dict_m2m100_languages, "target":dict_m2m100_languages}
|
||||
|
||||
dict_nllb_languages = {
|
||||
"Acehnese (Arabic script)": "ace_Arab",
|
||||
@@ -582,8 +583,8 @@ dict_nllb_languages = {
|
||||
"Zulu": "zul_Latn"
|
||||
}
|
||||
|
||||
translation_lang["nllb-200-distilled-1.3B-ct2-int8"] = {"source":dict_nllb_languages, "target":dict_nllb_languages}
|
||||
translation_lang["nllb-200-3.3B-ct2-int8"] = {"source":dict_nllb_languages, "target":dict_nllb_languages}
|
||||
translation_lang["CTranslate2"]["nllb-200-distilled-1.3B-ct2-int8"] = {"source":dict_nllb_languages, "target":dict_nllb_languages}
|
||||
translation_lang["CTranslate2"]["nllb-200-3.3B-ct2-int8"] = {"source":dict_nllb_languages, "target":dict_nllb_languages}
|
||||
|
||||
dict_plamo_languages = {
|
||||
"English": "English",
|
||||
|
||||
@@ -177,10 +177,12 @@ class Translator:
|
||||
target_language = "Portuguese European"
|
||||
else:
|
||||
target_language = "Portuguese Brazilian"
|
||||
source_language = translation_lang[translator_name]["source"][source_language]
|
||||
target_language = translation_lang[translator_name]["target"][target_language]
|
||||
case "CTranslate2":
|
||||
translator_name = weight_type
|
||||
source_language = translation_lang[translator_name][weight_type]["source"][source_language]
|
||||
target_language = translation_lang[translator_name][weight_type]["target"][target_language]
|
||||
case _:
|
||||
pass
|
||||
source_language = translation_lang[translator_name]["source"][source_language]
|
||||
target_language = translation_lang[translator_name]["target"][target_language]
|
||||
return source_language, target_language
|
||||
|
||||
@@ -95,8 +95,8 @@ def downloadCTranslate2Tokenizer(path: str, weight_type: str = "m2m100_418M-ct2-
|
||||
tokenizer = ctranslate2_weights[weight_type]["tokenizer"]
|
||||
tokenizer_path = os_path.join(path, "weights", "ctranslate2", directory_name, "tokenizer")
|
||||
try:
|
||||
os_makedirs(tokenizer_cache, exist_ok=True)
|
||||
transformers.AutoTokenizer.from_pretrained(tokenizer_name, cache_dir=tokenizer_cache)
|
||||
os_makedirs(tokenizer_path, exist_ok=True)
|
||||
transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)
|
||||
except Exception:
|
||||
errorLogging()
|
||||
tokenizer_path = os_path.join("./weights", "ctranslate2", directory_name, "tokenizer")
|
||||
|
||||
Reference in New Issue
Block a user