👍[Update] Model : 翻訳失敗時にCTranslate2での翻訳処理を追加修正

不要なコードを削除
This commit is contained in:
misyaguziya
2024-01-07 01:46:29 +09:00
parent 0742d635af
commit 82dde149ee
3 changed files with 58 additions and 72 deletions

View File

@@ -1,4 +1,3 @@
import tempfile
from zipfile import ZipFile from zipfile import ZipFile
from subprocess import Popen from subprocess import Popen
from os import makedirs as os_makedirs from os import makedirs as os_makedirs
@@ -10,10 +9,9 @@ from logging import getLogger, FileHandler, Formatter, INFO
from time import sleep from time import sleep
from queue import Queue from queue import Queue
from threading import Thread, Event from threading import Thread, Event
from requests import get as requests_get, head as requests_head from requests import get as requests_get
import webbrowser import webbrowser
from tqdm import tqdm
from flashtext import KeywordProcessor from flashtext import KeywordProcessor
from models.translation.translation_translator import Translator from models.translation.translation_translator import Translator
from models.transcription.transcription_utils import getInputDevices, getDefaultOutputDevice from models.transcription.transcription_utils import getInputDevices, getDefaultOutputDevice
@@ -68,7 +66,7 @@ class Model:
def resetTranslator(self): def resetTranslator(self):
del self.translator del self.translator
self.translator = Translator(config.PATH_LOCAL) self.translator = Translator(config.PATH_LOCAL, config.CTRANSLATE2_WEIGHTS[config.WEIGHT_TYPE])
def resetKeywordProcessor(self): def resetKeywordProcessor(self):
del self.keyword_processor del self.keyword_processor
@@ -101,8 +99,6 @@ class Model:
for tl_key in tl_keys: for tl_key in tl_keys:
for lang in translation_lang[tl_key]["source"]: for lang in translation_lang[tl_key]["source"]:
translation_langs.append(lang) translation_langs.append(lang)
for lang in translation_lang[tl_key]["target"]:
translation_langs.append(lang)
translation_langs = list(set(translation_langs)) translation_langs = list(set(translation_langs))
supported_langs = list(filter(lambda x: x in transcription_langs, translation_langs)) supported_langs = list(filter(lambda x: x in transcription_langs, translation_langs))
@@ -122,81 +118,70 @@ class Model:
def findTranslationEngines(self, source_lang, target_lang): def findTranslationEngines(self, source_lang, target_lang):
compatible_engines = [] compatible_engines = []
for engine in translatorEngine: for engine in translatorEngine:
source_languages = translation_lang.get(engine, {}).get("source", {}) languages = translation_lang.get(engine, {}).get("source", {})
target_languages = translation_lang.get(engine, {}).get("target", {}) if source_lang in languages and target_lang in languages:
if source_lang in source_languages and target_lang in target_languages:
compatible_engines.append(engine) compatible_engines.append(engine)
if "DeepL_API" in compatible_engines:
if self.translator.deepl_client is None:
compatible_engines.remove('DeepL_API')
return compatible_engines return compatible_engines
def getInputTranslate(self, message): def getInputTranslate(self, message, fnc=None):
translator_name=config.CHOICE_INPUT_TRANSLATOR translator_name=config.CHOICE_INPUT_TRANSLATOR
source_language=config.SOURCE_LANGUAGE source_language=config.SOURCE_LANGUAGE
target_language=config.TARGET_LANGUAGE target_language=config.TARGET_LANGUAGE
target_country = config.TARGET_COUNTRY target_country = config.TARGET_COUNTRY
if translator_name == "DeepL_API":
if target_language == "English":
if target_country in ["United States", "Canada", "Philippines"]:
target_language = "English American"
else:
target_language = "English British"
elif target_language == "Portuguese":
if target_country in ["Portugal"]:
target_language = "Portuguese European"
else:
target_language = "Portuguese Brazilian"
else:
if target_language in ["English American", "English British"]:
target_language = "English"
elif target_language in ["Portuguese European", "Portuguese Brazilian"]:
target_language = "Portuguese"
translation = self.translator.translate( translation = self.translator.translate(
translator_name=translator_name, translator_name=translator_name,
source_language=source_language, source_language=source_language,
target_language=target_language, target_language=target_language,
target_country=target_country,
message=message message=message
) )
# 翻訳失敗時のフェールセーフ処理
if translation is False:
translation = self.translator.translate(
translator_name="CTranslate2",
source_language=source_language,
target_language=target_language,
target_country=target_country,
message=message
)
try:
fnc()
except Exception:
pass
return translation return translation
def getOutputTranslate(self, message): def getOutputTranslate(self, message, fnc=None):
translator_name=config.CHOICE_OUTPUT_TRANSLATOR translator_name=config.CHOICE_OUTPUT_TRANSLATOR
source_language=config.TARGET_LANGUAGE source_language=config.TARGET_LANGUAGE
target_language=config.SOURCE_LANGUAGE target_language=config.SOURCE_LANGUAGE
target_country=config.SOURCE_COUNTRY target_country=config.SOURCE_COUNTRY
if translator_name == "DeepL_API":
if target_language == "English":
if target_country in ["United States", "Canada", "Philippines"]:
target_language = "English American"
else:
target_language = "English British"
elif target_language == "Portuguese":
if target_country in ["Portugal"]:
target_language = "Portuguese European"
else:
target_language = "Portuguese Brazilian"
else:
if target_language in ["English American", "English British"]:
target_language = "English"
elif target_language in ["Portuguese European", "Portuguese Brazilian"]:
target_language = "Portuguese"
translation = self.translator.translate( translation = self.translator.translate(
translator_name=translator_name, translator_name=translator_name,
source_language=source_language, source_language=source_language,
target_language=target_language, target_language=target_language,
target_country=target_country,
message=message message=message
) )
# 翻訳失敗時のフェールセーフ処理 # 翻訳失敗時のフェールセーフ処理
if translation is False and "Filipino": if translation is False:
translation = self.translator.translate( translation = self.translator.translate(
translator_name="CTranslate2", translator_name="CTranslate2",
source_language=config.TARGET_LANGUAGE, source_language=source_language,
target_language=config.SOURCE_LANGUAGE, target_language=target_language,
target_country=target_country,
message=message message=message
) )
try:
fnc()
except Exception:
pass
return translation return translation
def addKeywords(self): def addKeywords(self):

View File

@@ -360,7 +360,7 @@ dict_ctranslate2_languages = {
'Luxembourgish': 'lb', 'Luxembourgish': 'lb',
'Myanmar': 'my', 'Myanmar': 'my',
'Tibetan': 'bo', 'Tibetan': 'bo',
'Tagalog': 'tl', 'Filipino': 'tl',
'Malagasy': 'mg', 'Malagasy': 'mg',
'Assamese': 'as', 'Assamese': 'as',
'Tatar': 'tt', 'Tatar': 'tt',
@@ -372,7 +372,7 @@ dict_ctranslate2_languages = {
'Sundanese': 'su' 'Sundanese': 'su'
} }
translation_lang["ctranslate2"] = { translation_lang["CTranslate2"] = {
"source":dict_ctranslate2_languages, "source":dict_ctranslate2_languages,
"target":dict_ctranslate2_languages, "target":dict_ctranslate2_languages,
} }

View File

@@ -20,6 +20,7 @@ class Translator():
self.weight_path = os.path.join(path, "weight", directory_name) self.weight_path = os.path.join(path, "weight", directory_name)
self.translator = ctranslate2.Translator(self.weight_path, device="cpu", device_index=0, compute_type="int8", inter_threads=1, intra_threads=4) self.translator = ctranslate2.Translator(self.weight_path, device="cpu", device_index=0, compute_type="int8", inter_threads=1, intra_threads=4)
self.tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer) self.tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer)
self.deepl_client = None
def authenticationDeepLAuthKey(self, authkey): def authenticationDeepLAuthKey(self, authkey):
result = True result = True
@@ -27,10 +28,11 @@ class Translator():
self.deepl_client = deepl_Translator(authkey) self.deepl_client = deepl_Translator(authkey)
self.deepl_client.translate_text(" ", target_lang="EN-US") self.deepl_client.translate_text(" ", target_lang="EN-US")
except Exception: except Exception:
self.deepl_client = None
result = False result = False
return result return result
def translate(self, translator_name, source_language, target_language, message): def translate(self, translator_name, source_language, target_language, target_country, message):
try: try:
result = "" result = ""
source_language=translation_lang[translator_name]["source"][source_language] source_language=translation_lang[translator_name]["source"][source_language]
@@ -44,11 +46,24 @@ class Translator():
to_language=target_language, to_language=target_language,
) )
case "DeepL_API": case "DeepL_API":
result = self.deepl_client.translate_text( if self.deepl_client is None:
message, result = False
source_lang=source_language, else:
target_lang=target_language, if target_language == "English":
).text if target_country in ["United States", "Canada", "Philippines"]:
target_language = "English American"
else:
target_language = "English British"
elif target_language == "Portuguese":
if target_country in ["Portugal"]:
target_language = "Portuguese European"
else:
target_language = "Portuguese Brazilian"
result = self.deepl_client.translate_text(
message,
source_lang=source_language,
target_lang=target_language,
).text
case "Google": case "Google":
result = other_web_Translator( result = other_web_Translator(
query_text=message, query_text=message,
@@ -83,17 +98,3 @@ class Translator():
traceback.print_exc(file=f) traceback.print_exc(file=f)
result = False result = False
return result return result
# def translate_ctranslate2(self, translator_name, source_language, target_language, message):
# source_language=translation_lang["ctranslate2"]["source"][source_language]
# target_language=translation_lang["ctranslate2"]["target"][target_language]
# self.tokenizer.src_lang = source_language
# source = self.tokenizer.convert_ids_to_tokens(self.tokenizer.encode(message))
# target_prefix = [self.tokenizer.lang_code_to_token[target_language]]
# results = self.translator.translate_batch([source], target_prefix=[target_prefix])
# target = results[0].hypotheses[0][1:]
# result = self.tokenizer.decode(self.tokenizer.convert_tokens_to_ids(target))
# return result