[Add] Model: ctranslate2のテストコードを追加
This commit is contained in:
4
model.py
4
model.py
@@ -161,7 +161,7 @@ class Model:
|
||||
elif target_language in ["Portuguese European", "Portuguese Brazilian"]:
|
||||
target_language = "Portuguese"
|
||||
|
||||
translation = self.translator.translate(
|
||||
translation = self.translator.translate_ctranslate2(
|
||||
translator_name=translator_name,
|
||||
source_language=source_language,
|
||||
target_language=target_language,
|
||||
@@ -192,7 +192,7 @@ class Model:
|
||||
elif target_language in ["Portuguese European", "Portuguese Brazilian"]:
|
||||
target_language = "Portuguese"
|
||||
|
||||
translation = self.translator.translate(
|
||||
translation = self.translator.translate_ctranslate2(
|
||||
translator_name=translator_name,
|
||||
source_language=source_language,
|
||||
target_language=target_language,
|
||||
|
||||
@@ -241,3 +241,110 @@ translation_lang["Bing"] = {
|
||||
"source":dict_bing_languages,
|
||||
"target":dict_bing_languages,
|
||||
}
|
||||
|
||||
dict_ctranslate2_lang = {
|
||||
'English': 'en',
|
||||
'Chinese': 'zh',
|
||||
'German': 'de',
|
||||
'Spanish': 'es',
|
||||
'Russian': 'ru',
|
||||
'Korean': 'ko',
|
||||
'French': 'fr',
|
||||
'Japanese': 'ja',
|
||||
'Portuguese': 'pt',
|
||||
'Turkish': 'tr',
|
||||
'Polish': 'pl',
|
||||
'Catalan': 'ca',
|
||||
'Dutch': 'nl',
|
||||
'Arabic': 'ar',
|
||||
'Swedish': 'sv',
|
||||
'Italian': 'it',
|
||||
'Indonesian': 'id',
|
||||
'Hindi': 'hi',
|
||||
'Finnish': 'fi',
|
||||
'Vietnamese': 'vi',
|
||||
'Hebrew': 'he',
|
||||
'Ukrainian': 'uk',
|
||||
'Greek': 'el',
|
||||
'Malay': 'ms',
|
||||
'Czech': 'cs',
|
||||
'Romanian': 'ro',
|
||||
'Danish': 'da',
|
||||
'Hungarian': 'hu',
|
||||
'Tamil': 'ta',
|
||||
'Norwegian': 'no',
|
||||
'Thai': 'th',
|
||||
'Urdu': 'ur',
|
||||
'Croatian': 'hr',
|
||||
'Bulgarian': 'bg',
|
||||
'Lithuanian': 'lt',
|
||||
'Latin': 'la',
|
||||
'Maori': 'mi',
|
||||
'Malayalam': 'ml',
|
||||
'Welsh': 'cy',
|
||||
'Slovak': 'sk',
|
||||
'Telugu': 'te',
|
||||
'Persian': 'fa',
|
||||
'Latvian': 'lv',
|
||||
'Bengali': 'bn',
|
||||
'Serbian': 'sr',
|
||||
'Azerbaijani': 'az',
|
||||
'Slovenian': 'sl',
|
||||
'Kannada': 'kn',
|
||||
'Estonian': 'et',
|
||||
'Macedonian': 'mk',
|
||||
'Breton': 'br',
|
||||
'Basque': 'eu',
|
||||
'Icelandic': 'is',
|
||||
'Armenian': 'hy',
|
||||
'Nepali': 'ne',
|
||||
'Mongolian': 'mn',
|
||||
'Bosnian': 'bs',
|
||||
'Kazakh': 'kk',
|
||||
'Albanian': 'sq',
|
||||
'Swahili': 'sw',
|
||||
'Galician': 'gl',
|
||||
'Marathi': 'mr',
|
||||
'Punjabi': 'pa',
|
||||
'Sinhala': 'si',
|
||||
'Khmer': 'km',
|
||||
'Shona': 'sn',
|
||||
'Yoruba': 'yo',
|
||||
'Somali': 'so',
|
||||
'Afrikaans': 'af',
|
||||
'Occitan': 'oc',
|
||||
'Georgian': 'ka',
|
||||
'Belarusian': 'be',
|
||||
'Tajik': 'tg',
|
||||
'Sindhi': 'sd',
|
||||
'Gujarati': 'gu',
|
||||
'Amharic': 'am',
|
||||
'Yiddish': 'yi',
|
||||
'Lao': 'lo',
|
||||
'Uzbek': 'uz',
|
||||
'Faroese': 'fo',
|
||||
'Haitian creole': 'ht',
|
||||
'Pashto': 'ps',
|
||||
'Turkmen': 'tk',
|
||||
'Nynorsk': 'nn',
|
||||
'Maltese': 'mt',
|
||||
'Sanskrit': 'sa',
|
||||
'Luxembourgish': 'lb',
|
||||
'Myanmar': 'my',
|
||||
'Tibetan': 'bo',
|
||||
'Tagalog': 'tl',
|
||||
'Malagasy': 'mg',
|
||||
'Assamese': 'as',
|
||||
'Tatar': 'tt',
|
||||
'Hawaiian': 'haw',
|
||||
'Lingala': 'ln',
|
||||
'Hausa': 'ha',
|
||||
'Bashkir': 'ba',
|
||||
'Javanese': 'jw',
|
||||
'Sundanese': 'su'
|
||||
}
|
||||
|
||||
translation_lang["ctranslate2"] = {
|
||||
"source":dict_ctranslate2_lang,
|
||||
"target":dict_ctranslate2_lang,
|
||||
}
|
||||
@@ -3,12 +3,24 @@ from deepl_translate import translate as deepl_web_Translator
|
||||
from translators import translate_text as other_web_Translator
|
||||
from .translation_languages import translation_lang
|
||||
|
||||
from ctranslate2.converters import TransformersConverter
|
||||
import ctranslate2
|
||||
import transformers
|
||||
|
||||
TRANSLATE_MODELS = {
|
||||
"small": "facebook/m2m100_418M",
|
||||
"large": "facebook/m2m100_1.2B"
|
||||
}
|
||||
|
||||
# Translator
|
||||
class Translator():
|
||||
def __init__(self):
|
||||
pass
|
||||
self.translator_status = {}
|
||||
|
||||
self.translator = ctranslate2.Translator("D:\\WORKSPACE\\WORK\\VRChatProject\\VRCT\\weight", device="cpu", device_index=0, compute_type="int8", inter_threads=1, intra_threads=4)
|
||||
self.tokenizer = transformers.AutoTokenizer.from_pretrained("facebook/m2m100_418M")
|
||||
|
||||
def authentication(self, translator_name, authkey=None):
|
||||
result = True
|
||||
match translator_name:
|
||||
@@ -58,3 +70,18 @@ class Translator():
|
||||
traceback.print_exc(file=f)
|
||||
result = False
|
||||
return result
|
||||
|
||||
def translate_ctranslate2(self, translator_name, source_language, target_language, message):
|
||||
|
||||
source_language=translation_lang["ctranslate2"]["source"][source_language]
|
||||
target_language=translation_lang["ctranslate2"]["target"][target_language]
|
||||
|
||||
self.tokenizer.src_lang = source_language
|
||||
source = self.tokenizer.convert_ids_to_tokens(self.tokenizer.encode(message))
|
||||
target_prefix = [self.tokenizer.lang_code_to_token[target_language]]
|
||||
results = self.translator.translate_batch([source], target_prefix=[target_prefix])
|
||||
target = results[0].hypotheses[0][1:]
|
||||
|
||||
result = self.tokenizer.decode(self.tokenizer.convert_tokens_to_ids(target))
|
||||
print(result)
|
||||
return result
|
||||
@@ -7,3 +7,6 @@ flashtext == 2.7
|
||||
pyyaml == 6.0.1
|
||||
python-i18n == 0.3.9
|
||||
CTkToolTip == 0.8
|
||||
transformers[torch]
|
||||
sentencepiece==0.1.99
|
||||
ctranslate2==3.21.0
|
||||
Reference in New Issue
Block a user