CTranslate2に対応する翻訳言語の設定を追加し、翻訳ロジックを更新

This commit is contained in:
misyaguziya
2025-10-14 23:15:49 +09:00
parent 6fc89fbb3e
commit 7d0f63c118
4 changed files with 23 additions and 13 deletions

View File

@@ -222,8 +222,12 @@ class Model:
transcription_langs = list(transcription_lang.keys()) transcription_langs = list(transcription_lang.keys())
translation_langs = [] translation_langs = []
for tl_key in translation_lang.keys(): for tl_key in translation_lang.keys():
for lang in translation_lang[tl_key]["source"]: if tl_key == "CTranslate2":
translation_langs.append(lang) for lang in translation_lang[tl_key][config.CTRANSLATE2_WEIGHT_TYPE]["source"]:
translation_langs.append(lang)
else:
for lang in translation_lang[tl_key]["source"]:
translation_langs.append(lang)
translation_langs = list(set(translation_langs)) translation_langs = list(set(translation_langs))
supported_langs = list(filter(lambda x: x in transcription_langs, translation_langs)) supported_langs = list(filter(lambda x: x in transcription_langs, translation_langs))
@@ -243,7 +247,10 @@ class Model:
selectable_engines = [key for key, value in engines_status.items() if value is True] selectable_engines = [key for key, value in engines_status.items() if value is True]
compatible_engines = [] compatible_engines = []
for engine in list(translation_lang.keys()): for engine in list(translation_lang.keys()):
languages = translation_lang.get(engine, {}).get("source", {}) if engine == "CTranslate2":
languages = translation_lang.get(engine, {}).get(config.CTRANSLATE2_WEIGHT_TYPE, {}).get("source", {})
else:
languages = translation_lang.get(engine, {}).get("source", {})
source_langs = [e["language"] for e in list(source_lang.values()) if e["enable"] is True] source_langs = [e["language"] for e in list(source_lang.values()) if e["enable"] is True]
target_langs = [e["language"] for e in list(target_lang.values()) if e["enable"] is True] target_langs = [e["language"] for e in list(target_lang.values()) if e["enable"] is True]
language_list = list(languages.keys()) language_list = list(languages.keys())

View File

@@ -372,8 +372,9 @@ dict_m2m100_languages = {
"Sundanese": "su" "Sundanese": "su"
} }
translation_lang["m2m100_418M-ct2-int8"] = {"source":dict_m2m100_languages, "target":dict_m2m100_languages} translation_lang["CTranslate2"] = {}
translation_lang["m2m100_1.2B-ct2-int8"] = {"source":dict_m2m100_languages, "target":dict_m2m100_languages} translation_lang["CTranslate2"]["m2m100_418M-ct2-int8"] = {"source":dict_m2m100_languages, "target":dict_m2m100_languages}
translation_lang["CTranslate2"]["m2m100_1.2B-ct2-int8"] = {"source":dict_m2m100_languages, "target":dict_m2m100_languages}
dict_nllb_languages = { dict_nllb_languages = {
"Acehnese (Arabic script)": "ace_Arab", "Acehnese (Arabic script)": "ace_Arab",
@@ -582,8 +583,8 @@ dict_nllb_languages = {
"Zulu": "zul_Latn" "Zulu": "zul_Latn"
} }
translation_lang["nllb-200-distilled-1.3B-ct2-int8"] = {"source":dict_nllb_languages, "target":dict_nllb_languages} translation_lang["CTranslate2"]["nllb-200-distilled-1.3B-ct2-int8"] = {"source":dict_nllb_languages, "target":dict_nllb_languages}
translation_lang["nllb-200-3.3B-ct2-int8"] = {"source":dict_nllb_languages, "target":dict_nllb_languages} translation_lang["CTranslate2"]["nllb-200-3.3B-ct2-int8"] = {"source":dict_nllb_languages, "target":dict_nllb_languages}
dict_plamo_languages = { dict_plamo_languages = {
"English": "English", "English": "English",

View File

@@ -177,12 +177,14 @@ class Translator:
target_language = "Portuguese European" target_language = "Portuguese European"
else: else:
target_language = "Portuguese Brazilian" target_language = "Portuguese Brazilian"
source_language = translation_lang[translator_name]["source"][source_language]
target_language = translation_lang[translator_name]["target"][target_language]
case "CTranslate2": case "CTranslate2":
translator_name = weight_type source_language = translation_lang[translator_name][weight_type]["source"][source_language]
target_language = translation_lang[translator_name][weight_type]["target"][target_language]
case _: case _:
pass source_language = translation_lang[translator_name]["source"][source_language]
source_language = translation_lang[translator_name]["source"][source_language] target_language = translation_lang[translator_name]["target"][target_language]
target_language = translation_lang[translator_name]["target"][target_language]
return source_language, target_language return source_language, target_language
def translate(self, translator_name: str, weight_type: str, source_language: str, target_language: str, target_country: str, message: str) -> Any: def translate(self, translator_name: str, weight_type: str, source_language: str, target_language: str, target_country: str, message: str) -> Any:

View File

@@ -95,8 +95,8 @@ def downloadCTranslate2Tokenizer(path: str, weight_type: str = "m2m100_418M-ct2-
tokenizer = ctranslate2_weights[weight_type]["tokenizer"] tokenizer = ctranslate2_weights[weight_type]["tokenizer"]
tokenizer_path = os_path.join(path, "weights", "ctranslate2", directory_name, "tokenizer") tokenizer_path = os_path.join(path, "weights", "ctranslate2", directory_name, "tokenizer")
try: try:
os_makedirs(tokenizer_cache, exist_ok=True) os_makedirs(tokenizer_path, exist_ok=True)
transformers.AutoTokenizer.from_pretrained(tokenizer_name, cache_dir=tokenizer_cache) transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)
except Exception: except Exception:
errorLogging() errorLogging()
tokenizer_path = os_path.join("./weights", "ctranslate2", directory_name, "tokenizer") tokenizer_path = os_path.join("./weights", "ctranslate2", directory_name, "tokenizer")