[bugfix] Model : AutoTokenizer.from_pretrainedは非ASCII文字に対応していないため、修正

ユーザー名が非ASCII文字の場合、絶対バスの場合失敗するので相対パスで対応
2024-01-29 23:27:33 +09:00
parent 86e43371e6
commit 82eab0db3c
1 changed files with 6 additions and 1 deletions
--- a/models/translation/translation_translator.py
+++ b/models/translation/translation_translator.py
@@ -37,7 +37,12 @@ class Translator():
            inter_threads=1,
            intra_threads=4
        )
-        self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)
+        try:
+            self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)
+        except Exception as e:
+            print("Error: changeCTranslate2Model()", e)
+            tokenizer_path = os.path.join("./weight", directory_name, "tokenizer")
+            self.ctranslate2_tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer, cache_dir=tokenizer_path)

    @staticmethod
    def getLanguageCode(translator_name, target_country, source_language, target_language):