From 6596743b6bfb8fa9f763eca08530105abadbc879 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Sat, 18 Oct 2025 23:16:22 +0900 Subject: [PATCH] =?UTF-8?q?=E7=BF=BB=E8=A8=B3=E8=A8=80=E8=AA=9E=E5=AE=9A?= =?UTF-8?q?=E7=BE=A9=E3=82=92=20YAML=20=E3=81=AB=E7=A7=BB=E8=A1=8C?= =?UTF-8?q?=E3=81=97=E3=83=AD=E3=83=BC=E3=83=89=E5=87=A6=E7=90=86=E3=82=92?= =?UTF-8?q?=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 言語マッピングを src-python/models/translation/languages/languages.yml として追加 - translation_languages.py を全面リファクタ:YAML から読み込み、構造検証、スレッドロック、キャッシュ対応を導入 - config.py に loadTranslationLanguages のインポート/フォールバックを追加し、Config 初期化時に言語定義を読み込むよう変更 - PyYAML を requirements.txt / requirements_cuda.txt に追記 - PyInstaller spec (backend.spec / backend_cuda.spec) に言語ファイル配布先を追加 - translation_ollama.py の loadPromptConfig 呼び出しを修正(ファイル名を明示) --- backend.spec | 1 + backend_cuda.spec | 1 + requirements.txt | 1 + requirements_cuda.txt | 1 + src-python/config.py | 5 +- .../translation/languages/languages.yml | 771 +++++++++++++++++ .../translation/translation_languages.py | 815 +++--------------- .../models/translation/translation_ollama.py | 2 +- 8 files changed, 903 insertions(+), 694 deletions(-) create mode 100644 src-python/models/translation/languages/languages.yml diff --git a/backend.spec b/backend.spec index b13ce288..605c82fb 100644 --- a/backend.spec +++ b/backend.spec @@ -8,6 +8,7 @@ a = Analysis( datas=[ ('./src-python/models/overlay/fonts', 'fonts/'), ('./src-python/models/translation/prompt', 'prompt/'), + ('./src-python/models/translation/languages', 'languages/'), ('.venv/Lib/site-packages/zeroconf', 'zeroconf/'), ('.venv/Lib/site-packages/openvr', 'openvr/'), ('.venv/Lib/site-packages/faster_whisper', 'faster_whisper/'), diff --git a/backend_cuda.spec b/backend_cuda.spec index 3ed416ac..8d80b6ec 100644 --- a/backend_cuda.spec +++ b/backend_cuda.spec @@ -8,6 +8,7 @@ a = Analysis( datas=[ ('./src-python/models/overlay/fonts', 'fonts/'), ('./src-python/models/translation/prompt', 'prompt/'), + ('./src-python/models/translation/languages', 'languages/'), ('.venv_cuda/Lib/site-packages/zeroconf', 'zeroconf/'), ('.venv_cuda/Lib/site-packages/openvr', 'openvr/'), ('.venv_cuda/Lib/site-packages/faster_whisper', 'faster_whisper/'), diff --git a/requirements.txt b/requirements.txt index 3d0a6670..6b269f15 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ transformers==4.40.2 pillow == 10.0.0 PyAudioWPatch == 0.2.12.6 python-osc == 1.9.0 +PyYAML==6.0.2 deepl == 1.22.0 flashtext ==2.7 pyinstaller==6.10.0 diff --git a/requirements_cuda.txt b/requirements_cuda.txt index 60f0e9e1..a93729f8 100644 --- a/requirements_cuda.txt +++ b/requirements_cuda.txt @@ -6,6 +6,7 @@ transformers==4.40.2 pillow == 10.0.0 PyAudioWPatch == 0.2.12.6 python-osc == 1.9.0 +PyYAML==6.0.2 deepl == 1.22.0 flashtext ==2.7 pyinstaller==6.10.0 diff --git a/src-python/config.py b/src-python/config.py index 9cd41589..165919b2 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -16,9 +16,11 @@ except Exception: # pragma: no cover - optional runtime device_manager = None # type: ignore try: - from models.translation.translation_languages import translation_lang + from models.translation.translation_languages import translation_lang, loadTranslationLanguages except Exception: # pragma: no cover - optional runtime translation_lang = {} # type: ignore + def loadTranslationLanguages(path: str, force: bool = False) -> Dict[str, Any]: + return {} try: from models.translation.translation_utils import ctranslate2_weights @@ -1227,6 +1229,7 @@ class Config: # these external mappings may be empty dicts if the optional modules failed to import self._SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_LIST = getattr(ctranslate2_weights, 'keys', lambda: [])() self._SELECTABLE_WHISPER_WEIGHT_TYPE_LIST = getattr(whisper_models, 'keys', lambda: [])() + translation_lang = loadTranslationLanguages(self.PATH_LOCAL) self._SELECTABLE_TRANSLATION_ENGINE_LIST = getattr(translation_lang, 'keys', lambda: [])() try: # transcription_lang is nested dict; attempt to extract keys defensively diff --git a/src-python/models/translation/languages/languages.yml b/src-python/models/translation/languages/languages.yml new file mode 100644 index 00000000..cfde921e --- /dev/null +++ b/src-python/models/translation/languages/languages.yml @@ -0,0 +1,771 @@ +# Translation Language Mappings +# Each backend defines source (input) and target (output) language codes + +DeepL: + source: &deepl_langs + Arabic: ar + Bulgarian: bg + Czech: cs + Danish: da + German: de + Greek: el + English: en + Spanish: es + Estonian: et + Finnish: fi + French: fr + Irish: ga + Croatian: hr + Hungarian: hu + Indonesian: id + Italian: it + Japanese: ja + Korean: ko + Lithuanian: lt + Latvian: lv + Maltese: mt + Norwegian: 'no' + Dutch: nl + Polish: pl + Portuguese: pt + Romanian: ro + Russian: ru + Slovak: sk + Slovenian: sl + Swedish: sv + Turkish: tr + Ukrainian: uk + Chinese Simplified: zh + Chinese Traditional: zh + target: *deepl_langs + +DeepL_API: + source: + Japanese: ja + English: en + Bulgarian: bg + Czech: cs + Danish: da + German: de + Greek: el + Spanish: es + Estonian: et + Finnish: fi + French: fr + Hungarian: hu + Indonesian: id + Italian: it + Korean: ko + Lithuanian: lt + Latvian: lv + Norwegian: 'no' + Dutch: nl + Polish: pl + Portuguese: pt + Romanian: ro + Russian: ru + Slovak: sk + Slovenian: sl + Swedish: sv + Turkish: tr + Ukrainian: uk + Chinese Simplified: zh + Chinese Traditional: zh + target: + Japanese: ja + English: en + English (American): en-US + English (British): en-GB + Bulgarian: bg + Czech: cs + Danish: da + German: de + Greek: el + Spanish: es + Estonian: et + Finnish: fi + French: fr + Hungarian: hu + Indonesian: id + Italian: it + Korean: ko + Lithuanian: lt + Latvian: lv + Norwegian: 'no' + Dutch: nl + Polish: pl + Portuguese (Brazilian): pt-BR + Portuguese (European): pt-PT + Romanian: ro + Russian: ru + Slovak: sk + Slovenian: sl + Swedish: sv + Turkish: tr + Ukrainian: uk + Chinese Simplified: zh + Chinese Traditional: zh + +Google: + source: &google_langs + Japanese: ja + English: en + Chinese Simplified: zh + Chinese Traditional: zh-TW + Arabic: ar + Russian: ru + French: fr + German: de + Spanish: es + Portuguese: pt + Italian: it + Korean: ko + Greek: el + Dutch: nl + Hindi: hi + Turkish: tr + Malay: ms + Thai: th + Vietnamese: vi + Indonesian: id + Hebrew: he + Polish: pl + Mongolian: mn + Czech: cs + Hungarian: hu + Estonian: et + Bulgarian: bg + Danish: da + Finnish: fi + Romanian: ro + Swedish: sv + Slovenian: sl + Persian/Farsi: fa + Bosnian: bs + Serbian: sr + Croatian: hr + Slovak: sk + Albanian: sq + Lithuanian: lt + Latvian: lv + Macedonian: mk + Ukrainian: uk + Belarusian: be + Kazakh: kk + Uzbek: uz + Armenian: hy + Azerbaijani: az + Georgian: ka + Kyrgyz: ky + Tajik: tg + Turkmen: tk + Nepali: ne + Sinhala: si + Khmer: km + Lao: lo + Burmese: my + Malayalam: ml + Telugu: te + Tamil: ta + Kannada: kn + Marathi: mr + Gujarati: gu + Punjabi: pa + Bengali: bn + Odia: or + Assamese: as + Urdu: ur + Amharic: am + Tigrinya: ti + Oromo: om + Somali: so + Swahili: sw + Kinyarwanda: rw + Yoruba: yo + Zulu: zu + Xhosa: xh + Afrikaans: af + Sesotho: st + Chichewa: ny + Malagasy: mg + Esperanto: eo + Hawaiian: haw + Samoan: sm + Shona: sn + Sindhi: sd + Pashto: ps + Kurdish: ku + Hausa: ha + Igbo: ig + Maltese: mt + Welsh: cy + Luxembourgish: lb + Icelandic: is + Irish: ga + Scottish Gaelic: gd + Basque: eu + Galician: gl + Catalan: ca + Corsican: co + Latin: la + Maori: mi + Hmong: hmn + Cebuano: ceb + Filipino: tl + Javanese: jw + Sundanese: su + Yiddish: yi + Frisian: fy + target: *google_langs + +Bing: + source: &bing_langs + Japanese: ja + English: en + Chinese Simplified: zh + Chinese Traditional: zh-Hant + Arabic: ar + Russian: ru + French: fr + German: de + Spanish: es + Portuguese: pt + Italian: it + Korean: ko + Greek: el + Dutch: nl + Hindi: hi + Turkish: tr + Malay: ms + Thai: th + Vietnamese: vi + Indonesian: id + Hebrew: he + Polish: pl + Czech: cs + Hungarian: hu + Estonian: et + Bulgarian: bg + Danish: da + Finnish: fi + Romanian: ro + Swedish: sv + Slovenian: sl + Persian/Farsi: fa + Bosnian: bs + Serbian: sr + Croatian: hr + Albanian: sq + Lithuanian: lt + Latvian: lv + Ukrainian: uk + Welsh: cy + Belarusian: be + Icelandic: is + Irish: ga + Maltese: mt + Yiddish: yi + Afrikaans: af + Norwegian: 'no' + Bengali: bn + Malagasy: mg + Samoan: sm + Slovak: sk + Swahili: sw + Filipino: tl + Urdu: ur + Haitian Creole: ht + Catalan: ca + Fijian: fj + Kiswahili: sw + Tahitian: ty + Tongan: to + Klingon: tlh + Queretaro Otomi: otl + Gujarati: gu + Tamil: ta + Telugu: te + Punjabi: pa + target: *bing_langs + +Papago: + source: &papago_langs + German: de + English: en + Spanish: es + French: fr + Hindi: hi + Indonesian: id + Italian: it + Japanese: ja + Korean: ko + Portuguese: pt + Russian: ru + Thai: th + Vietnamese: vi + Chinese Simplified: zh-CN + Chinese Traditional: zh-TW + target: *papago_langs + +CTranslate2: + m2m100_418M-ct2-int8: + source: &m2m100_langs + English: en + Chinese Simplified: zh + Chinese Traditional: zh + German: de + Spanish: es + Russian: ru + Korean: ko + French: fr + Japanese: ja + Portuguese: pt + Turkish: tr + Polish: pl + Catalan: ca + Dutch: nl + Arabic: ar + Swedish: sv + Italian: it + Indonesian: id + Hindi: hi + Finnish: fi + Vietnamese: vi + Hebrew: he + Ukrainian: uk + Greek: el + Malay: ms + Czech: cs + Romanian: ro + Danish: da + Hungarian: hu + Tamil: ta + Norwegian: 'no' + Thai: th + Urdu: ur + Croatian: hr + Bulgarian: bg + Lithuanian: lt + Latin: la + Maori: mi + Malayalam: ml + Welsh: cy + Slovak: sk + Telugu: te + Persian: fa + Latvian: lv + Bengali: bn + Serbian: sr + Azerbaijani: az + Slovenian: sl + Kannada: kn + Estonian: et + Macedonian: mk + Breton: br + Basque: eu + Icelandic: is + Armenian: hy + Nepali: ne + Mongolian: mn + Bosnian: bs + Kazakh: kk + Albanian: sq + Swahili: sw + Galician: gl + Marathi: mr + Punjabi: pa + Sinhala: si + Khmer: km + Shona: sn + Yoruba: yo + Somali: so + Afrikaans: af + Occitan: oc + Georgian: ka + Belarusian: be + Tajik: tg + Sindhi: sd + Gujarati: gu + Amharic: am + Yiddish: yi + Lao: lo + Uzbek: uz + Faroese: fo + Haitian creole: ht + Pashto: ps + Turkmen: tk + Nynorsk: nn + Maltese: mt + Sanskrit: sa + Luxembourgish: lb + Myanmar: my + Tibetan: bo + Filipino: tl + Malagasy: mg + Assamese: as + Tatar: tt + Hawaiian: haw + Lingala: ln + Hausa: ha + Bashkir: ba + Javanese: jw + Sundanese: su + target: *m2m100_langs + m2m100_1.2B-ct2-int8: + source: *m2m100_langs + target: *m2m100_langs + nllb-200-distilled-1.3B-ct2-int8: + source: &nllb_langs + Acehnese (Arabic script): ace_Arab + Acehnese (Latin script): ace_Latn + Mesopotamian Arabic: acm_Arab + Ta'izzi-Adeni Arabic: acq_Arab + Tunisian Arabic: aeb_Arab + Afrikaans: afr_Latn + South Levantine Arabic: ajp_Arab + Akan: aka_Latn + Amharic: amh_Ethi + North Levantine Arabic: apc_Arab + Standard Arabic: arb_Arab + Modern Standard Arabic (Romanized): arb_Latn + Najdi Arabic: ars_Arab + Moroccan Arabic: ary_Arab + Egyptian Arabic: arz_Arab + Assamese: asm_Beng + Asturian: ast_Latn + Awadhi: awa_Deva + Central Aymara: ayr_Latn + South Azerbaijani: azb_Arab + North Azerbaijani: azj_Latn + Bashkir: bak_Cyrl + Bambara: bam_Latn + Balinese: ban_Latn + Belarusian: bel_Cyrl + Bemba: bem_Latn + Bengali: ben_Beng + Bhojpuri: bho_Deva + Banjar (Arabic script): bjn_Arab + Banjar (Latin script): bjn_Latn + Standard Tibetan: bod_Tibt + Bosnian: bos_Latn + Buginese: bug_Latn + Bulgarian: bul_Cyrl + Catalan: cat_Latn + Cebuano: ceb_Latn + Czech: ces_Latn + Chokwe: cjk_Latn + Central Kurdish: ckb_Arab + Crimean Tatar: crh_Latn + Welsh: cym_Latn + Danish: dan_Latn + German: deu_Latn + Southwestern Dinka: dik_Latn + Dyula: dyu_Latn + Dzongkha: dzo_Tibt + Greek: ell_Grek + English: eng_Latn + Esperanto: epo_Latn + Estonian: est_Latn + Basque: eus_Latn + Ewe: ewe_Latn + Faroese: fao_Latn + Fijian: fij_Latn + Finnish: fin_Latn + Fon: fon_Latn + French: fra_Latn + Friulian: fur_Latn + Nigerian Fulfulde: fuv_Latn + Scottish Gaelic: gla_Latn + Irish: gle_Latn + Galician: glg_Latn + Guarani: grn_Latn + Gujarati: guj_Gujr + Haitian Creole: hat_Latn + Hausa: hau_Latn + Hebrew: heb_Hebr + Hindi: hin_Deva + Chhattisgarhi: hne_Deva + Croatian: hrv_Latn + Hungarian: hun_Latn + Armenian: hye_Armn + Igbo: ibo_Latn + Ilocano: ilo_Latn + Indonesian: ind_Latn + Icelandic: isl_Latn + Italian: ita_Latn + Javanese: jav_Latn + Japanese: jpn_Jpan + Kabyle: kab_Latn + Jingpho: kac_Latn + Kamba: kam_Latn + Kannada: kan_Knda + Kashmiri (Arabic script): kas_Arab + Kashmiri (Devanagari script): kas_Deva + Georgian: kat_Geor + Central Kanuri (Arabic script): knc_Arab + Central Kanuri (Latin script): knc_Latn + Kazakh: kaz_Cyrl + Kabiyè: kbp_Latn + Kabuverdianu: kea_Latn + Khmer: khm_Khmr + Kikuyu: kik_Latn + Kinyarwanda: kin_Latn + Kyrgyz: kir_Cyrl + Kimbundu: kmb_Latn + Northern Kurdish: kmr_Latn + Kikongo: kon_Latn + Korean: kor_Hang + Lao: lao_Laoo + Ligurian: lij_Latn + Limburgish: lim_Latn + Lingala: lin_Latn + Lithuanian: lit_Latn + Lombard: lmo_Latn + Latgalian: ltg_Latn + Luxembourgish: ltz_Latn + Luba-Kasai: lua_Latn + Ganda: lug_Latn + Luo: luo_Latn + Mizo: lus_Latn + Standard Latvian: lvs_Latn + Magahi: mag_Deva + Maithili: mai_Deva + Malayalam: mal_Mlym + Marathi: mar_Deva + Minangkabau (Arabic script): min_Arab + Minangkabau (Latin script): min_Latn + Macedonian: mkd_Cyrl + Plateau Malagasy: plt_Latn + Maltese: mlt_Latn + Meitei (Bengali script): mni_Beng + Halh Mongolian: khk_Cyrl + Mossi: mos_Latn + Maori: mri_Latn + Burmese: mya_Mymr + Dutch: nld_Latn + Norwegian Nynorsk: nno_Latn + Norwegian Bokmål: nob_Latn + Nepali: npi_Deva + Northern Sotho: nso_Latn + Nuer: nus_Latn + Nyanja: nya_Latn + Occitan: oci_Latn + West Central Oromo: gaz_Latn + Odia: ory_Orya + Pangasinan: pag_Latn + Eastern Panjabi: pan_Guru + Papiamento: pap_Latn + Western Persian: pes_Arab + Polish: pol_Latn + Portuguese: por_Latn + Dari: prs_Arab + Southern Pashto: pbt_Arab + Ayacucho Quechua: quy_Latn + Romanian: ron_Latn + Rundi: run_Latn + Russian: rus_Cyrl + Sango: sag_Latn + Sanskrit: san_Deva + Santali: sat_Olck + Sicilian: scn_Latn + Shan: shn_Mymr + Sinhala: sin_Sinh + Slovak: slk_Latn + Slovenian: slv_Latn + Samoan: smo_Latn + Shona: sna_Latn + Sindhi: snd_Arab + Somali: som_Latn + Southern Sotho: sot_Latn + Spanish: spa_Latn + Tosk Albanian: als_Latn + Sardinian: srd_Latn + Serbian: srp_Cyrl + Swati: ssw_Latn + Sundanese: sun_Latn + Swedish: swe_Latn + Swahili: swh_Latn + Silesian: szl_Latn + Tamil: tam_Taml + Tatar: tat_Cyrl + Telugu: tel_Telu + Tajik: tgk_Cyrl + Tagalog: tgl_Latn + Thai: tha_Thai + Tigrinya: tir_Ethi + Tamasheq (Latin script): taq_Latn + Tamasheq (Tifinagh script): taq_Tfng + Tok Pisin: tpi_Latn + Tswana: tsn_Latn + Tsonga: tso_Latn + Turkmen: tuk_Latn + Tumbuka: tum_Latn + Turkish: tur_Latn + Twi: twi_Latn + Central Atlas Tamazight: tzm_Tfng + Uyghur: uig_Arab + Ukrainian: ukr_Cyrl + Umbundu: umb_Latn + Urdu: urd_Arab + Northern Uzbek: uzn_Latn + Venetian: vec_Latn + Vietnamese: vie_Latn + Waray: war_Latn + Wolof: wol_Latn + Xhosa: xho_Latn + Eastern Yiddish: ydd_Hebr + Yoruba: yor_Latn + Yue Chinese: yue_Hant + Chinese Simplified: zho_Hans + Chinese Traditional: zho_Hant + Standard Malay: zsm_Latn + Zulu: zul_Latn + target: *nllb_langs + nllb-200-3.3B-ct2-int8: + source: *nllb_langs + target: *nllb_langs + +Plamo_API: + source: &plamo_langs + English: English + Japanese: Japanese + Korean: Korean + French: French + German: German + Spanish: Spanish + Portuguese: Portuguese + Russian: Russian + Italian: Italian + Dutch: Dutch + Polish: Polish + Turkish: Turkish + Arabic: Arabic + Hindi: Hindi + Thai: Thai + Vietnamese: Vietnamese + Indonesian: Indonesian + Malay: Malay + Filipino: Filipino + Swedish: Swedish + Finnish: Finnish + Danish: Danish + Norwegian: Norwegian + Romanian: Romanian + Czech: Czech + Hungarian: Hungarian + Greek: Greek + Hebrew: Hebrew + Chinese Simplified: Simplified Chinese + Chinese Traditional: Traditional Chinese + target: *plamo_langs + +Gemini_API: + source: &gemini_langs + Arabic: Arabic + Bengali: Bengali + Bulgarian: Bulgarian + Chinese Simplified: Simplified Chinese + Chinese Traditional: Traditional Chinese + Croatian: Croatian + Czech: Czech + Danish: Danish + Dutch: Dutch + English: English + Estonian: Estonian + Finnish: Finnish + French: French + German: German + Greek: Greek + Hebrew: Hebrew + Hindi: Hindi + Hungarian: Hungarian + Indonesian: Indonesian + Italian: Italian + Japanese: Japanese + Korean: Korean + Latvian: Latvian + Lithuanian: Lithuanian + Norwegian: Norwegian + Polish: Polish + Portuguese: Portuguese + Romanian: Romanian + Russian: Russian + Serbian: Serbian + Slovak: Slovak + Slovenian: Slovenian + Spanish: Spanish + Swedish: Swedish + Thai: Thai + Turkish: Turkish + Ukrainian: Ukrainian + Vietnamese: Vietnamese + target: *gemini_langs + +OpenAI_API: + source: &openai_langs + Afrikaans: Afrikaans + Arabic: Arabic + Armenian: Armenian + Azerbaijani: Azerbaijani + Belarusian: Belarusian + Bosnian: Bosnian + Bulgarian: Bulgarian + Catalan: Catalan + Chinese: Chinese + Croatian: Croatian + Czech: Czech + Danish: Danish + Dutch: Dutch + English: English + Estonian: Estonian + Finnish: Finnish + French: French + Galician: Galician + German: German + Greek: Greek + Hebrew: Hebrew + Hindi: Hindi + Hungarian: Hungarian + Icelandic: Icelandic + Indonesian: Indonesian + Italian: Italian + Japanese: Japanese + Kannada: Kannada + Kazakh: Kazakh + Korean: Korean + Latvian: Latvian + Lithuanian: Lithuanian + Macedonian: Macedonian + Malay: Malay + Marathi: Marathi + Maori: Maori + Nepali: Nepali + Norwegian: Norwegian + Persian: Persian + Polish: Polish + Portuguese: Portuguese + Romanian: Romanian + Russian: Russian + Serbian: Serbian + Slovak: Slovak + Slovenian: Slovenian + Spanish: Spanish + Swahili: Swahili + Swedish: Swedish + Tagalog: Tagalog + Tamil: Tamil + Thai: Thai + Turkish: Turkish + Ukrainian: Ukrainian + Urdu: Urdu + Vietnamese: Vietnamese + Welsh: Welsh + target: *openai_langs + +LMStudio: + source: *openai_langs + target: *openai_langs + +Ollama: + source: *openai_langs + target: *openai_langs diff --git a/src-python/models/translation/translation_languages.py b/src-python/models/translation/translation_languages.py index 69a594b8..eccd5665 100644 --- a/src-python/models/translation/translation_languages.py +++ b/src-python/models/translation/translation_languages.py @@ -1,713 +1,144 @@ -"""Language code mappings for supported translation backends. +"""Load translation language code mappings from YAML. -Provides `translation_lang` mapping keyed by backend name with `source` and -`target` maps used by `Translator.getLanguageCode`. +YAML ファイル: languages/languages.yml +構造: + : + source: { DisplayName: Code, ... } + target: { DisplayName: Code, ... } + CTranslate2: + : + source: {...} + target: {...} """ -from typing import Dict +import os +import threading +from typing import Any, Dict +import yaml +try: + from utils import printLog, errorLogging +except ImportError: + def printLog(data, *args, **kwargs): + print(data, *args, **kwargs) + def errorLogging(): + import traceback + traceback.print_exc() + + +# 型: translation_lang[backend][(model)?]['source'|'target'][display_name] = code translation_lang: Dict[str, Dict[str, Dict[str, str]]] = {} +_loaded = False +_lock = threading.Lock() -dict_deepl_languages = { - "Arabic":"ar", - "Bulgarian":"bg", - "Czech":"cs", - "Danish":"da", - "German":"de", - "Greek":"el", - "English":"en", - "Spanish":"es", - "Estonian":"et", - "Finnish":"fi", - "French":"fr", - "Irish":"ga", - "Croatian":"hr", - "Hungarian":"hu", - "Indonesian":"id", - "Icelandic":"is", - "Italian":"it", - "Japanese":"ja", - "Korean":"ko", - "Lithuanian":"lt", - "Latvian":"lv", - "Maltese":"mt", - "Bokmal":"nb", - "Dutch":"nl", - "Norwegian":"no", - "Polish":"pl", - "Portuguese":"pt", - "Romanian":"ro", - "Russian":"ru", - "Slovak":"sk", - "Slovenian":"sl", - "Swedish":"sv", - "Turkish":"tr", - "Ukrainian":"uk", - "Chinese Simplified":"zh", - "Chinese Traditional":"zh" -} -translation_lang["DeepL"] = {"source": dict_deepl_languages, "target": dict_deepl_languages} -dict_deepl_api_source_languages = { - "Japanese":"ja", - "English":"en", - "Bulgarian":"bg", - "Czech":"cs", - "Danish":"da", - "German":"de", - "Greek":"el", - "Spanish":"es", - "Estonian":"et", - "Finnish":"fi", - "French":"fr", - "Hungarian":"hu", - "Indonesian":"id", - "Italian":"it", - "Korean":"ko", - "Lithuanian":"lt", - "Latvian":"lv", - "Norwegian":"nb", - "Dutch":"nl", - "Polish":"pl", - "Portuguese":"pt", - "Romanian":"ro", - "Russian":"ru", - "Slovak":"sk", - "Slovenian":"sl", - "Swedish":"sv", - "Turkish":"tr", - "Ukrainian":"uk", - "Chinese Simplified":"zh", - "Chinese Traditional":"zh" -} -dict_deepl_api_target_languages = { - "Japanese":"ja", - "English American":"en-US", - "English British":"en-GB", - "Bulgarian":"bg", - "Czech":"cs", - "Danish":"da", - "German":"de", - "Greek":"el", - "English":"en", - "Spanish":"es", - "Estonian":"et", - "Finnish":"fi", - "French":"fr", - "Hungarian":"hu", - "Indonesian":"id", - "Italian":"it", - "Korean":"ko", - "Lithuanian":"lt", - "Latvian":"lv", - "Norwegian":"nb", - "Dutch":"nl", - "Polish":"pl", - "Portuguese Brazilian":"pt-BR", - "Portuguese European":"pt-PT", - "Romanian":"ro", - "Russian":"ru", - "Slovak":"sk", - "Slovenian":"sl", - "Swedish":"sv", - "Turkish":"tr", - "Ukrainian":"uk", - "Chinese Simplified":"zh", - "Chinese Traditional":"zh" -} -translation_lang["DeepL_API"] = {"source": dict_deepl_api_source_languages, "target": dict_deepl_api_target_languages} +def _load_languages(path: str, filename: str) -> str: + """Get absolute path to resource file relative to this module. -dict_google_languages = { - "Japanese":"ja", - "English":"en", - "Chinese Simplified":"zh", - "Chinese Traditional":"zh-TW", - "Arabic":"ar", - "Russian":"ru", - "French":"fr", - "German":"de", - "Spanish":"es", - "Portuguese":"pt", - "Italian":"it", - "Korean":"ko", - "Greek":"el", - "Dutch":"nl", - "Hindi":"hi", - "Turkish":"tr", - "Malay":"ms", - "Thai":"th", - "Vietnamese":"vi", - "Indonesian":"id", - "Hebrew":"he", - "Polish":"pl", - "Mongolian":"mn", - "Czech":"cs", - "Hungarian":"hu", - "Estonian":"et", - "Bulgarian":"bg", - "Danish":"da", - "Finnish":"fi", - "Romanian":"ro", - "Swedish":"sv", - "Slovenian":"sl", - "Persian/Farsi":"fa", - "Bosnian":"bs", - "Serbian":"sr", - "Filipino":"tl", - "Haitiancreole":"ht", - "Catalan":"ca", - "Croatian":"hr", - "Latvian":"lv", - "Lithuanian":"lt", - "Urdu":"ur", - "Ukrainian":"uk", - "Welsh":"cy", - "Swahili":"sw", - "Samoan":"sm", - "Slovak":"sk", - "Afrikaans":"af", - "Norwegian":"no", - "Bengali":"bn", - "Malagasy":"mg", - "Maltese":"mt", - "Gujarati":"gu", - "Tamil":"ta", - "Telugu":"te", - "Punjabi":"pa", - "Amharic":"am", - "Azerbaijani":"az", - "Belarusian":"be", - "Cebuano":"ceb", - "Esperanto":"eo", - # "Basque":"eu", - "Irish":"ga" -} -translation_lang["Google"] = {"source": dict_google_languages, "target": dict_google_languages} + Args: + filename: relative filename from this module's directory -dict_bing_languages = { - "Japanese":"ja", - "English":"en", - "Chinese Simplified":"zh", - "Chinese Traditional":"zh-Hant", - "Arabic":"ar", - "Russian":"ru", - "French":"fr", - "German":"de", - "Spanish":"es", - "Portuguese":"pt", - "Italian":"it", - "Korean":"ko", - "Greek":"el", - "Dutch":"nl", - "Hindi":"hi", - "Turkish":"tr", - "Malay":"ms", - "Thai":"th", - "Vietnamese":"vi", - "Indonesian":"id", - "Hebrew":"he", - "Polish":"pl", - "Czech":"cs", - "Hungarian":"hu", - "Estonian":"et", - "Bulgarian":"bg", - "Danish":"da", - "Finnish":"fi", - "Romanian":"ro", - "Swedish":"sv", - "Slovenian":"sl", - "Persian/Farsi":"fa", - "Bosnian":"bs", - "Serbian":"sr", - "Fijian":"fj", - "Filipino":"tl", - "Haitiancreole":"ht", - "Catalan":"ca", - "Croatian":"hr", - "Latvian":"lv", - "Lithuanian":"lt", - "Urdu":"ur", - "Ukrainian":"uk", - "Welsh":"cy", - "Tahiti":"ty", - "Tongan":"to", - "Swahili":"sw", - "Samoan":"sm", - "Slovak":"sk", - "Afrikaans":"af", - "Norwegian":"no", - "Bengali":"bn", - "Malagasy":"mg", - "Maltese":"mt", - "Queretaro otomi":"otq", - "Klingon/tlhingan Hol":"tlh", - "Gujarati":"gu", - "Tamil":"ta", - "Telugu":"te", - "Punjabi":"pa", - "Irish":"ga" -} -translation_lang["Bing"] = {"source": dict_bing_languages, "target": dict_bing_languages} + Returns: + Absolute path to the resource file + """ + if os.path.exists(os.path.join(path, "_internal", "languages", "languages.yml")): + languages_path = os.path.join(path, "_internal", "languages", "languages.yml") + elif os.path.exists(os.path.join(os.path.dirname(os.path.abspath(__file__)), "models", "translation", "languages", "languages.yml")): + languages_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models", "translation", "languages", "languages.yml") + elif os.path.exists(os.path.join(os.path.dirname(os.path.abspath(__file__)), "languages", "languages.yml")): + languages_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "languages", "languages.yml") + else: + raise FileNotFoundError(f"Prompt file not found: {filename}") + with open(languages_path, "r", encoding="utf-8") as f: + return yaml.safe_load(f) -dict_papago_languages = { - "German": "de", - "English": "en", - "Spanish":"es", - "French": "fr", - "Hindi": "hi", - "Indonesian": "id", - "Italian": "it", - "Japanese": "ja", - "Korean": "ko", - "Portuguese": "pt", - "Russian": "ru", - "Thai": "th", - "Vietnamese": "vi", - "Chinese Simplified":"zh-CN", - "Chinese Traditional":"zh-TW", -} +def _validate_source_target(backend: str, mapping: Any) -> None: + """Validate that a backend mapping has proper source/target structure. -translation_lang["Papago"] = {"source": dict_papago_languages, "target": dict_papago_languages} + Args: + backend: backend name for error messages + mapping: mapping to validate -dict_m2m100_languages = { - "English": "en", - "Chinese Simplified": "zh", - "Chinese Traditional":"zh", - "German": "de", - "Spanish": "es", - "Russian": "ru", - "Korean": "ko", - "French": "fr", - "Japanese": "ja", - "Portuguese": "pt", - "Turkish": "tr", - "Polish": "pl", - "Catalan": "ca", - "Dutch": "nl", - "Arabic": "ar", - "Swedish": "sv", - "Italian": "it", - "Indonesian": "id", - "Hindi": "hi", - "Finnish": "fi", - "Vietnamese": "vi", - "Hebrew": "he", - "Ukrainian": "uk", - "Greek": "el", - "Malay": "ms", - "Czech": "cs", - "Romanian": "ro", - "Danish": "da", - "Hungarian": "hu", - "Tamil": "ta", - "Norwegian": "no", - "Thai": "th", - "Urdu": "ur", - "Croatian": "hr", - "Bulgarian": "bg", - "Lithuanian": "lt", - "Latin": "la", - "Maori": "mi", - "Malayalam": "ml", - "Welsh": "cy", - "Slovak": "sk", - # "Telugu": "te", - "Persian": "fa", - "Latvian": "lv", - "Bengali": "bn", - "Serbian": "sr", - "Azerbaijani": "az", - "Slovenian": "sl", - "Kannada": "kn", - "Estonian": "et", - "Macedonian": "mk", - "Breton": "br", - # "Basque": "eu", - "Icelandic": "is", - "Armenian": "hy", - "Nepali": "ne", - "Mongolian": "mn", - "Bosnian": "bs", - "Kazakh": "kk", - "Albanian": "sq", - "Swahili": "sw", - "Galician": "gl", - "Marathi": "mr", - "Punjabi": "pa", - "Sinhala": "si", - "Khmer": "km", - "Shona": "sn", - "Yoruba": "yo", - "Somali": "so", - "Afrikaans": "af", - "Occitan": "oc", - "Georgian": "ka", - "Belarusian": "be", - "Tajik": "tg", - "Sindhi": "sd", - "Gujarati": "gu", - "Amharic": "am", - "Yiddish": "yi", - "Lao": "lo", - "Uzbek": "uz", - "Faroese": "fo", - "Haitian creole": "ht", - "Pashto": "ps", - "Turkmen": "tk", - "Nynorsk": "nn", - "Maltese": "mt", - "Sanskrit": "sa", - "Luxembourgish": "lb", - "Myanmar": "my", - "Tibetan": "bo", - "Filipino": "tl", - "Malagasy": "mg", - "Assamese": "as", - "Tatar": "tt", - "Hawaiian": "haw", - "Lingala": "ln", - "Hausa": "ha", - "Bashkir": "ba", - "Javanese": "jw", - "Sundanese": "su" -} + Raises: + ValueError: If mapping structure is invalid + """ + if not isinstance(mapping, dict): + raise ValueError(f"{backend}: 値は dict である必要があります。") + if "source" not in mapping or "target" not in mapping: + raise ValueError(f"{backend}: 'source' と 'target' が必要です。") -translation_lang["CTranslate2"] = {} -translation_lang["CTranslate2"]["m2m100_418M-ct2-int8"] = {"source":dict_m2m100_languages, "target":dict_m2m100_languages} -translation_lang["CTranslate2"]["m2m100_1.2B-ct2-int8"] = {"source":dict_m2m100_languages, "target":dict_m2m100_languages} + for key in ("source", "target"): + if not isinstance(mapping[key], dict): + raise ValueError(f"{backend}: '{key}' は dict である必要があります。") + # value は str を想定 + for disp, code in mapping[key].items(): + if not isinstance(disp, str) or not isinstance(code, str): + raise ValueError( + f"{backend}: '{key}' のエントリは str: str である必要があります。 ({disp} => {code})" + ) -dict_nllb_languages = { - "Acehnese (Arabic script)": "ace_Arab", - "Acehnese (Latin script)": "ace_Latn", - "Mesopotamian Arabic": "acm_Arab", - "Ta’izzi-Adeni Arabic": "acq_Arab", - "Tunisian Arabic": "aeb_Arab", - "Afrikaans": "afr_Latn", - "South Levantine Arabic": "ajp_Arab", - "Akan": "aka_Latn", - "Amharic": "amh_Ethi", - "North Levantine Arabic": "apc_Arab", - "Modern Standard Arabic": "arb_Arab", - "Modern Standard Arabic (Romanized)": "arb_Latn", - "Najdi Arabic": "ars_Arab", - "Moroccan Arabic": "ary_Arab", - "Egyptian Arabic": "arz_Arab", - "Assamese": "asm_Beng", - "Asturian": "ast_Latn", - "Awadhi": "awa_Deva", - "Central Aymara": "ayr_Latn", - "South Azerbaijani": "azb_Arab", - "North Azerbaijani": "azj_Latn", - "Bashkir": "bak_Cyrl", - "Bambara": "bam_Latn", - "Balinese": "ban_Latn", - "Belarusian": "bel_Cyrl", - "Bemba": "bem_Latn", - "Bengali": "ben_Beng", - "Bhojpuri": "bho_Deva", - "Banjar (Arabic script)": "bjn_Arab", - "Banjar (Latin script)": "bjn_Latn", - "Standard Tibetan": "bod_Tibt", - "Bosnian": "bos_Latn", - "Buginese": "bug_Latn", - "Bulgarian": "bul_Cyrl", - "Catalan": "cat_Latn", - "Cebuano": "ceb_Latn", - "Czech": "ces_Latn", - "Chokwe": "cjk_Latn", - "Central Kurdish": "ckb_Arab", - "Crimean Tatar": "crh_Latn", - "Welsh": "cym_Latn", - "Danish": "dan_Latn", - "German": "deu_Latn", - "Southwestern Dinka": "dik_Latn", - "Dyula": "dyu_Latn", - "Dzongkha": "dzo_Tibt", - "Greek": "ell_Grek", - "English": "eng_Latn", - "Esperanto": "epo_Latn", - "Estonian": "est_Latn", - "Basque": "eus_Latn", - "Ewe": "ewe_Latn", - "Faroese": "fao_Latn", - "Fijian": "fij_Latn", - "Finnish": "fin_Latn", - "Fon": "fon_Latn", - "French": "fra_Latn", - "Friulian": "fur_Latn", - "Nigerian Fulfulde": "fuv_Latn", - "Scottish Gaelic": "gla_Latn", - "Irish": "gle_Latn", - "Galician": "glg_Latn", - "Guarani": "grn_Latn", - "Gujarati": "guj_Gujr", - "Haitian Creole": "hat_Latn", - "Hausa": "hau_Latn", - "Hebrew": "heb_Hebr", - "Hindi": "hin_Deva", - "Chhattisgarhi": "hne_Deva", - "Croatian": "hrv_Latn", - "Hungarian": "hun_Latn", - "Armenian": "hye_Armn", - "Igbo": "ibo_Latn", - "Ilocano": "ilo_Latn", - "Indonesian": "ind_Latn", - "Icelandic": "isl_Latn", - "Italian": "ita_Latn", - "Javanese": "jav_Latn", - "Japanese": "jpn_Jpan", - "Kabyle": "kab_Latn", - "Jingpho": "kac_Latn", - "Kamba": "kam_Latn", - "Kannada": "kan_Knda", - "Kashmiri (Arabic script)": "kas_Arab", - "Kashmiri (Devanagari script)": "kas_Deva", - "Georgian": "kat_Geor", - "Central Kanuri (Arabic script)": "knc_Arab", - "Central Kanuri (Latin script)": "knc_Latn", - "Kazakh": "kaz_Cyrl", - "Kabiyè": "kbp_Latn", - "Kabuverdianu": "kea_Latn", - "Khmer": "khm_Khmr", - "Kikuyu": "kik_Latn", - "Kinyarwanda": "kin_Latn", - "Kyrgyz": "kir_Cyrl", - "Kimbundu": "kmb_Latn", - "Northern Kurdish": "kmr_Latn", - "Kikongo": "kon_Latn", - "Korean": "kor_Hang", - "Lao": "lao_Laoo", - "Ligurian": "lij_Latn", - "Limburgish": "lim_Latn", - "Lingala": "lin_Latn", - "Lithuanian": "lit_Latn", - "Lombard": "lmo_Latn", - "Latgalian": "ltg_Latn", - "Luxembourgish": "ltz_Latn", - "Luba-Kasai": "lua_Latn", - "Ganda": "lug_Latn", - "Luo": "luo_Latn", - "Mizo": "lus_Latn", - "Standard Latvian": "lvs_Latn", - "Magahi": "mag_Deva", - "Maithili": "mai_Deva", - "Malayalam": "mal_Mlym", - "Marathi": "mar_Deva", - "Minangkabau (Arabic script)": "min_Arab", - "Minangkabau (Latin script)": "min_Latn", - "Macedonian": "mkd_Cyrl", - "Plateau Malagasy": "plt_Latn", - "Maltese": "mlt_Latn", - "Meitei (Bengali script)": "mni_Beng", - "Halh Mongolian": "khk_Cyrl", - "Mossi": "mos_Latn", - "Maori": "mri_Latn", - "Burmese": "mya_Mymr", - "Dutch": "nld_Latn", - "Norwegian Nynorsk": "nno_Latn", - "Norwegian Bokmål": "nob_Latn", - "Nepali": "npi_Deva", - "Northern Sotho": "nso_Latn", - "Nuer": "nus_Latn", - "Nyanja": "nya_Latn", - "Occitan": "oci_Latn", - "West Central Oromo": "gaz_Latn", - "Odia": "ory_Orya", - "Pangasinan": "pag_Latn", - "Eastern Panjabi": "pan_Guru", - "Papiamento": "pap_Latn", - "Western Persian": "pes_Arab", - "Polish": "pol_Latn", - "Portuguese": "por_Latn", - "Dari": "prs_Arab", - "Southern Pashto": "pbt_Arab", - "Ayacucho Quechua": "quy_Latn", - "Romanian": "ron_Latn", - "Rundi": "run_Latn", - "Russian": "rus_Cyrl", - "Sango": "sag_Latn", - "Sanskrit": "san_Deva", - "Santali": "sat_Olck", - "Sicilian": "scn_Latn", - "Shan": "shn_Mymr", - "Sinhala": "sin_Sinh", - "Slovak": "slk_Latn", - "Slovenian": "slv_Latn", - "Samoan": "smo_Latn", - "Shona": "sna_Latn", - "Sindhi": "snd_Arab", - "Somali": "som_Latn", - "Southern Sotho": "sot_Latn", - "Spanish": "spa_Latn", - "Tosk Albanian": "als_Latn", - "Sardinian": "srd_Latn", - "Serbian": "srp_Cyrl", - "Swati": "ssw_Latn", - "Sundanese": "sun_Latn", - "Swedish": "swe_Latn", - "Swahili": "swh_Latn", - "Silesian": "szl_Latn", - "Tamil": "tam_Taml", - "Tatar": "tat_Cyrl", - "Telugu": "tel_Telu", - "Tajik": "tgk_Cyrl", - "Tagalog": "tgl_Latn", - "Thai": "tha_Thai", - "Tigrinya": "tir_Ethi", - "Tamasheq (Latin script)": "taq_Latn", - "Tamasheq (Tifinagh script)": "taq_Tfng", - "Tok Pisin": "tpi_Latn", - "Tswana": "tsn_Latn", - "Tsonga": "tso_Latn", - "Turkmen": "tuk_Latn", - "Tumbuka": "tum_Latn", - "Turkish": "tur_Latn", - "Twi": "twi_Latn", - "Central Atlas Tamazight": "tzm_Tfng", - "Uyghur": "uig_Arab", - "Ukrainian": "ukr_Cyrl", - "Umbundu": "umb_Latn", - "Urdu": "urd_Arab", - "Northern Uzbek": "uzn_Latn", - "Venetian": "vec_Latn", - "Vietnamese": "vie_Latn", - "Waray": "war_Latn", - "Wolof": "wol_Latn", - "Xhosa": "xho_Latn", - "Eastern Yiddish": "ydd_Hebr", - "Yoruba": "yor_Latn", - "Yue Chinese": "yue_Hant", - "Chinese Simplified": "zho_Hans", - "Chinese Traditional": "zho_Hant", - "Standard Malay": "zsm_Latn", - "Zulu": "zul_Latn" -} +def loadTranslationLanguages(path: str, force: bool = False) -> Dict[str, Any]: + """Load translation language mappings from YAML file. -translation_lang["CTranslate2"]["nllb-200-distilled-1.3B-ct2-int8"] = {"source":dict_nllb_languages, "target":dict_nllb_languages} -translation_lang["CTranslate2"]["nllb-200-3.3B-ct2-int8"] = {"source":dict_nllb_languages, "target":dict_nllb_languages} + Args: + path: Path to the YAML file + force: If True, reload even if already loaded -dict_plamo_languages = { - "English": "English", - "Japanese": "Japanese", - "Korean": "Korean", - "French": "French", - "German": "German", - "Spanish": "Spanish", - "Portuguese": "Portuguese", - "Russian": "Russian", - "Italian": "Italian", - "Dutch": "Dutch", - "Polish": "Polish", - "Turkish": "Turkish", - "Arabic": "Arabic", - "Hindi": "Hindi", - "Thai": "Thai", - "Vietnamese": "Vietnamese", - "Indonesian": "Indonesian", - "Malay": "Malay", - "Filipino": "Filipino", - "Swedish": "Swedish", - "Finnish": "Finnish", - "Danish": "Danish", - "Norwegian": "Norwegian", - "Romanian": "Romanian", - "Czech": "Czech", - "Hungarian": "Hungarian", - "Greek": "Greek", - "Hebrew": "Hebrew", - "Chinese Simplified":"Simplified Chinese", - "Chinese Traditional":"Traditional Chinese" -} + Returns: + Dictionary of translation language mappings -translation_lang["Plamo_API"] = {"source":dict_plamo_languages, "target":dict_plamo_languages} + Raises: + FileNotFoundError: If languages/languages.yml is not found + ValueError: If YAML structure is invalid + """ + global _loaded, translation_lang + if _loaded and not force: + return translation_lang -dict_gemini_languages = { - "Arabic": "Arabic", - "Bengali": "Bengali", - "Bulgarian": "Bulgarian", - "Chinese Simplified": "Simplified Chinese", - "Chinese Traditional": "Traditional Chinese", - "Croatian": "Croatian", - "Czech": "Czech", - "Danish": "Danish", - "Dutch": "Dutch", - "English": "English", - "Estonian": "Estonian", - "Finnish": "Finnish", - "French": "French", - "German": "German", - "Greek": "Greek", - "Hebrew": "Hebrew", - "Hindi": "Hindi", - "Hungarian": "Hungarian", - "Indonesian": "Indonesian", - "Italian": "Italian", - "Japanese": "Japanese", - "Korean": "Korean", - "Latvian": "Latvian", - "Lithuanian": "Lithuanian", - "Norwegian": "Norwegian", - "Polish": "Polish", - "Portuguese": "Portuguese", - "Romanian": "Romanian", - "Russian": "Russian", - "Serbian": "Serbian", - "Slovak": "Slovak", - "Slovenian": "Slovenian", - "Spanish": "Spanish", - "Swahili": "Swahili", - "Swedish": "Swedish", - "Thai": "Thai", - "Turkish": "Turkish", - "Ukrainian": "Ukrainian", - "Vietnamese": "Vietnamese", -} + with _lock: + if _loaded and not force: + return translation_lang -translation_lang["Gemini_API"] = {"source":dict_gemini_languages, "target":dict_gemini_languages} + data = _load_languages(path, "languages/languages.yml") -dict_openai_languages = { - "Arabic": "Arabic", - "Bengali": "Bengali", - "Bulgarian": "Bulgarian", - "Catalan": "Catalan", - "Chinese Simplified": "Simplified Chinese", - "Chinese Traditional": "Traditional Chinese", - "Croatian": "Croatian", - "Czech": "Czech", - "Danish": "Danish", - "Dutch": "Dutch", - "English": "English", - "Estonian": "Estonian", - "Finnish": "Finnish", - "French": "French", - "German": "German", - "Greek": "Greek", - "Hebrew": "Hebrew", - "Hindi": "Hindi", - "Hungarian": "Hungarian", - "Indonesian": "Indonesian", - "Italian": "Italian", - "Japanese": "Japanese", - "Korean": "Korean", - "Latvian": "Latvian", - "Lithuanian": "Lithuanian", - "Norwegian": "Norwegian", - "Polish": "Polish", - "Portuguese": "Portuguese", - "Romanian": "Romanian", - "Russian": "Russian", - "Serbian": "Serbian", - "Slovak": "Slovak", - "Slovenian": "Slovenian", - "Spanish": "Spanish", - "Swahili": "Swahili", - "Swedish": "Swedish", - "Thai": "Thai", - "Turkish": "Turkish", - "Ukrainian": "Ukrainian", - "Vietnamese": "Vietnamese", -} + if not isinstance(data, dict): + raise ValueError( + "languages/languages.yml のルートはマッピング(dict)である必要があります。" + ) -translation_lang["OpenAI_API"] = {"source": dict_openai_languages, "target": dict_openai_languages} -translation_lang["LMStudio"] = {"source": dict_openai_languages, "target": dict_openai_languages} -translation_lang["Ollama"] = {"source": dict_openai_languages, "target": dict_openai_languages} \ No newline at end of file + # 検証と正規化 + validated: Dict[str, Dict[str, Dict[str, str]]] = {} + for backend, value in data.items(): + if backend == "CTranslate2": + # NOTE: CTranslate2 はモデルごとに異なる言語セットを持つ + if not isinstance(value, dict): + raise ValueError( + "CTranslate2 の値はモデル名→ {source:, target:} の dict である必要があります。" + ) + validated["CTranslate2"] = {} + for model_name, model_map in value.items(): + _validate_source_target( + backend=f"CTranslate2/{model_name}", mapping=model_map + ) + validated["CTranslate2"][model_name] = { + "source": model_map["source"], + "target": model_map["target"], + } + else: + _validate_source_target(backend=backend, mapping=value) + validated[backend] = { + "source": value["source"], + "target": value["target"], + } + + translation_lang = validated + _loaded = True + return translation_lang + +if __name__ == "__main__": + try: + langs = loadTranslationLanguages(path=".", force=True) + printLog("Loaded translation languages:") + printLog(langs) + except Exception: + errorLogging() \ No newline at end of file diff --git a/src-python/models/translation/translation_ollama.py b/src-python/models/translation/translation_ollama.py index 8b99d328..7e5b5468 100644 --- a/src-python/models/translation/translation_ollama.py +++ b/src-python/models/translation/translation_ollama.py @@ -42,7 +42,7 @@ class OllamaClient: self.model = None self.base_url = "http://localhost:11434" - prompt_config = loadPromptConfig(root_path) + prompt_config = loadPromptConfig(root_path, "translation_ollama.yml") self.supported_languages = prompt_config["supported_languages"] self.prompt_template = prompt_config["system_prompt"]