diff --git a/backend.spec b/backend.spec
index 3933d942..c5c33da0 100644
--- a/backend.spec
+++ b/backend.spec
@@ -5,7 +5,7 @@ a = Analysis(
['src-python\\mainloop.py'],
pathex=[],
binaries=[],
- datas=[('./fonts', 'fonts/'), ('.venv/Lib/site-packages/zeroconf', 'zeroconf/'), ('.venv/Lib/site-packages/openvr', 'openvr/'), ('.venv/Lib/site-packages/pykakasi', 'pykakasi/'), ('.venv/Lib/site-packages/faster_whisper', 'faster_whisper/'), ('.venv/Lib/site-packages/hf_xet', 'hf_xet/')],
+ datas=[('./fonts', 'fonts/'), ('.venv/Lib/site-packages/zeroconf', 'zeroconf/'), ('.venv/Lib/site-packages/openvr', 'openvr/'), ('.venv/Lib/site-packages/faster_whisper', 'faster_whisper/'), ('.venv/Lib/site-packages/hf_xet', 'hf_xet/')],
hiddenimports=[],
hookspath=[],
hooksconfig={},
diff --git a/backend_cuda.spec b/backend_cuda.spec
index 08ba5fd7..e9cb35c8 100644
--- a/backend_cuda.spec
+++ b/backend_cuda.spec
@@ -5,7 +5,7 @@ a = Analysis(
['src-python\\mainloop.py'],
pathex=[],
binaries=[],
- datas=[('./fonts', 'fonts/'), ('.venv_cuda/Lib/site-packages/zeroconf', 'zeroconf/'), ('.venv_cuda/Lib/site-packages/openvr', 'openvr/'), ('.venv_cuda/Lib/site-packages/pykakasi', 'pykakasi/'), ('.venv_cuda/Lib/site-packages/faster_whisper', 'faster_whisper/'), ('.venv/Lib/site-packages/hf_xet', 'hf_xet/')],
+ datas=[('./fonts', 'fonts/'), ('.venv_cuda/Lib/site-packages/zeroconf', 'zeroconf/'), ('.venv_cuda/Lib/site-packages/openvr', 'openvr/'), ('.venv_cuda/Lib/site-packages/faster_whisper', 'faster_whisper/'), ('.venv/Lib/site-packages/hf_xet', 'hf_xet/')],
hiddenimports=[],
hookspath=[],
hooksconfig={},
diff --git a/locales/en.yml b/locales/en.yml
index 1faf9639..5dabb4b2 100644
--- a/locales/en.yml
+++ b/locales/en.yml
@@ -250,6 +250,13 @@ config_page:
received_message_format:
label: "Message Format (Speaker2Chatbox)"
desc: "Currently, it is used in Speaker2Chatbox."
+ convert_message_to_romaji:
+ label: Show Romaji
+ desc: Supported only when Japanese is selected as the translation language. When enabled along with '{{convert_message_to_hiragana}}', romaji will be shown on mouse hover.
+ convert_message_to_hiragana:
+ label: Show Hiragana
+ desc: Supported only when Japanese is selected as the translation language.
+
hotkeys:
toggle_vrct_visibility:
diff --git a/locales/ja.yml b/locales/ja.yml
index d3da554e..b4097a9a 100644
--- a/locales/ja.yml
+++ b/locales/ja.yml
@@ -250,6 +250,12 @@ config_page:
received_message_format:
label: メッセージフォーマット(Speaker2Chatbox)
desc: 今のところ、Speaker2Chatboxで送信した時の表示に使われます。
+ convert_message_to_romaji:
+ label: ローマ字を表示
+ desc: 翻訳言語として日本語を選択した時のみサポート。「{{convert_message_to_hiragana}}」と同時に有効にした場合は、マウスホバーで表示されます。
+ convert_message_to_hiragana:
+ label: ひらがなを表示
+ desc: 翻訳言語として日本語を選択した時のみサポート。
hotkeys:
toggle_vrct_visibility:
diff --git a/requirements.txt b/requirements.txt
index 244a9238..11f1d694 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,12 +13,14 @@ sentencepiece==0.2.0
openvr==1.26.701
pydub==0.25.1
psutil==5.9.8
-pykakasi==2.3.0
pycaw==20240210
websockets==15.0.1
huggingface_hub==0.32.2
hf-xet==1.1.2
setuptools==80.8.0
+SudachiPy==0.6.10
+SudachiDict-core==20250825
+SudachiDict-full==20250825
translators @ git+https://github.com/misyaguziya/translators@5.9.2.1
SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@3.10.4.1
tinyoscquery @ git+https://github.com/cyberkitsune/tinyoscquery@0.1.3
\ No newline at end of file
diff --git a/requirements_cuda.txt b/requirements_cuda.txt
index 4dadf192..cc0743df 100644
--- a/requirements_cuda.txt
+++ b/requirements_cuda.txt
@@ -14,12 +14,14 @@ sentencepiece==0.2.0
openvr==1.26.701
pydub==0.25.1
psutil==5.9.8
-pykakasi==2.3.0
pycaw==20240210
websockets==15.0.1
huggingface_hub==0.32.2
hf-xet==1.1.2
setuptools==80.8.0
+SudachiPy==0.6.10
+SudachiDict-core==20250825
+SudachiDict-full==20250825
translators @ git+https://github.com/misyaguziya/translators@5.9.2.1
SpeechRecognition @ git+https://github.com/misyaguziya/custom_speech_recognition@3.10.4.1
tinyoscquery @ git+https://github.com/cyberkitsune/tinyoscquery@0.1.3
\ No newline at end of file
diff --git a/src-python/config.py b/src-python/config.py
index c544727d..d99114cc 100644
--- a/src-python/config.py
+++ b/src-python/config.py
@@ -107,6 +107,10 @@ class Config:
def SELECTABLE_TAB_NO_LIST(self):
return self._SELECTABLE_TAB_NO_LIST
+ @property
+ def SELECTED_TAB_TARGET_LANGUAGES_NO_LIST(self):
+ return self._SELECTED_TAB_TARGET_LANGUAGES_NO_LIST
+
@property
def SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_LIST(self):
return self._SELECTABLE_CTRANSLATE2_WEIGHT_TYPE_LIST
@@ -1115,24 +1119,17 @@ class Config:
},
}
self._SELECTED_TARGET_LANGUAGES = {}
+ self._SELECTED_TAB_TARGET_LANGUAGES_NO_LIST = ["1", "2", "3"]
for tab_no in self.SELECTABLE_TAB_NO_LIST:
- self._SELECTED_TARGET_LANGUAGES[tab_no] = {
- "1": {
- "language": "English",
- "country": "United States",
- "enable": True,
- },
- "2": {
- "language": "English",
- "country": "United States",
- "enable": False,
- },
- "3": {
- "language": "English",
- "country": "United States",
- "enable": False,
- },
- }
+ for tab_target_lang_no in self.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST:
+ if tab_no not in self._SELECTED_TARGET_LANGUAGES:
+ self.SELECTED_TARGET_LANGUAGES[tab_no] = {}
+ if tab_target_lang_no not in self._SELECTED_TARGET_LANGUAGES[tab_no]:
+ self.SELECTED_TARGET_LANGUAGES[tab_no][tab_target_lang_no] = {
+ "language": "English",
+ "country": "United States",
+ "enable": True,
+ }
self._SELECTED_TRANSCRIPTION_ENGINE = "Google"
self._CONVERT_MESSAGE_TO_ROMAJI = False
self._CONVERT_MESSAGE_TO_HIRAGANA = False
diff --git a/src-python/controller.py b/src-python/controller.py
index c34abaf8..b10d5617 100644
--- a/src-python/controller.py
+++ b/src-python/controller.py
@@ -246,7 +246,8 @@ class Controller:
elif isinstance(message, str) and len(message) > 0:
translation = []
- transliteration = []
+ transliteration_message = []
+ transliteration_translation = []
if model.checkKeywords(message):
self.run(
200,
@@ -298,9 +299,30 @@ class Controller:
# その他のエラーは通常通り処理
raise
- if config.CONVERT_MESSAGE_TO_ROMAJI is True or config.CONVERT_MESSAGE_TO_HIRAGANA is True:
- if config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese":
- transliteration = model.convertMessageToTransliteration(translation[0])
+ if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True:
+ if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese":
+ transliteration_message = model.convertMessageToTransliteration(
+ message,
+ hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA,
+ romaji=config.CONVERT_MESSAGE_TO_ROMAJI
+ )
+
+ for i, no in enumerate(config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST):
+ if (config.ENABLE_TRANSLATION is True and
+ config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["language"] == "Japanese" and
+ config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["enable"] is True
+ ):
+ transliteration_translation.append(
+ model.convertMessageToTransliteration(
+ translation[i],
+ hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA,
+ romaji=config.CONVERT_MESSAGE_TO_ROMAJI
+ )
+ )
+ else:
+ transliteration_translation.append([])
+ else:
+ transliteration_translation = [[] for _ in config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST]
if config.ENABLE_TRANSCRIPTION_SEND is True:
if config.SEND_MESSAGE_TO_VRC is True:
@@ -317,9 +339,16 @@ class Controller:
200,
self.run_mapping["transcription_mic"],
{
- "message":message,
- "translation":translation,
- "transliteration":transliteration
+ "original": {
+ "message": message,
+ "transliteration": transliteration_message
+ },
+ "translations": [
+ {
+ "message": translation_message,
+ "transliteration": transliteration
+ } for translation_message, transliteration in zip(translation, transliteration_translation)
+ ]
})
if config.OVERLAY_LARGE_LOG is True and model.overlay.initialized is True:
@@ -351,7 +380,7 @@ class Controller:
"dst_languages":config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO],
"message":message,
"translation":translation,
- "transliteration":transliteration
+ "transliteration":transliteration_translation
}
)
@@ -373,7 +402,8 @@ class Controller:
)
elif isinstance(message, str) and len(message) > 0:
translation = []
- transliteration = []
+ transliteration_message = []
+ transliteration_translation = []
if model.checkKeywords(message):
self.run(
200,
@@ -425,9 +455,28 @@ class Controller:
# その他のエラーは通常通り処理
raise
- if config.CONVERT_MESSAGE_TO_ROMAJI is True or config.CONVERT_MESSAGE_TO_HIRAGANA is True:
- if config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese":
- transliteration = model.convertMessageToTransliteration(message)
+ if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True:
+ if language == "Japanese":
+ transliteration_message = model.convertMessageToTransliteration(
+ message,
+ hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA,
+ romaji=config.CONVERT_MESSAGE_TO_ROMAJI
+ )
+
+ if (config.ENABLE_TRANSLATION is True and
+ config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese"
+ ):
+ transliteration_translation.append(
+ model.convertMessageToTransliteration(
+ translation[0],
+ hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA,
+ romaji=config.CONVERT_MESSAGE_TO_ROMAJI
+ )
+ )
+ else:
+ transliteration_translation.append([])
+ else:
+ transliteration_translation = [[]]
if config.ENABLE_TRANSCRIPTION_RECEIVE is True:
if config.OVERLAY_SMALL_LOG is True and model.overlay.initialized is True:
@@ -484,9 +533,16 @@ class Controller:
200,
self.run_mapping["transcription_speaker"],
{
- "message":message,
- "translation":translation,
- "transliteration":transliteration,
+ "original": {
+ "message": message,
+ "transliteration": transliteration_message
+ },
+ "translations": [
+ {
+ "message": translation_message,
+ "transliteration": transliteration
+ } for translation_message, transliteration in zip(translation, transliteration_translation)
+ ]
})
if model.checkWebSocketServerAlive() is True:
@@ -497,7 +553,7 @@ class Controller:
"dst_languages":config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO],
"message":message,
"translation":translation,
- "transliteration":transliteration
+ "transliteration":transliteration_translation
}
)
@@ -510,7 +566,8 @@ class Controller:
message = data["message"]
if len(message) > 0:
translation = []
- transliteration = []
+ transliteration_message = []
+ transliteration_translation = []
if config.ENABLE_TRANSLATION is False:
pass
else:
@@ -562,18 +619,45 @@ class Controller:
"result":
{
"id":id,
- "message":message,
- "translation":[],
- "transliteration":[],
+ "original": {
+ "message":message,
+ "transliteration":[]
},
- }
+ "translations": [
+ {
+ "message": "",
+ "transliteration": []
+ } for _ in config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST
+ ]
+ },
+ }
else:
# その他のエラーは通常通り処理
raise
- if config.CONVERT_MESSAGE_TO_ROMAJI is True or config.CONVERT_MESSAGE_TO_HIRAGANA is True:
- if config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese":
- transliteration = model.convertMessageToTransliteration(translation[0])
+ if config.CONVERT_MESSAGE_TO_HIRAGANA is True or config.CONVERT_MESSAGE_TO_ROMAJI is True:
+ if config.SELECTED_YOUR_LANGUAGES[config.SELECTED_TAB_NO]["1"]["language"] == "Japanese":
+ transliteration_message = model.convertMessageToTransliteration(
+ message,
+ hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA,
+ romaji=config.CONVERT_MESSAGE_TO_ROMAJI
+ )
+ for i, no in enumerate(config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST):
+ if (config.ENABLE_TRANSLATION is True and
+ config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["language"] == "Japanese" and
+ config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO][no]["enable"] is True
+ ):
+ transliteration_translation.append(
+ model.convertMessageToTransliteration(
+ translation[i],
+ hiragana=config.CONVERT_MESSAGE_TO_HIRAGANA,
+ romaji=config.CONVERT_MESSAGE_TO_ROMAJI
+ )
+ )
+ else:
+ transliteration_translation.append([])
+ else:
+ transliteration_translation = [[] for _ in config.SELECTED_TAB_TARGET_LANGUAGES_NO_LIST]
# send OSC message
if config.SEND_MESSAGE_TO_VRC is True:
@@ -615,7 +699,7 @@ class Controller:
"dst_languages":config.SELECTED_TARGET_LANGUAGES[config.SELECTED_TAB_NO],
"message":message,
"translation":translation,
- "transliteration":transliteration
+ "transliteration":transliteration_translation
}
)
@@ -623,14 +707,21 @@ class Controller:
translation_text = f" ({'/'.join(translation)})" if translation else ""
model.logger.info(f"[CHAT] {message}{translation_text}")
- return {"status":200,
+ return {
+ "status":200,
"result":{
"id":id,
- "message":message,
- "translation":translation,
- "transliteration":transliteration,
+ "original": {
+ "message":message,
+ "transliteration":transliteration_message
},
- }
+ "translations": [
+ {
+ "message": translation_message,
+ "transliteration": transliteration
+ } for translation_message, transliteration in zip(translation, transliteration_translation)
+ ]
+ }}
@staticmethod
def getVersion(*args, **kwargs) -> dict:
diff --git a/src-python/model.py b/src-python/model.py
index 333f1394..236593a4 100644
--- a/src-python/model.py
+++ b/src-python/model.py
@@ -14,7 +14,6 @@ from typing import Callable
from packaging.version import parse
from flashtext import KeywordProcessor
-from pykakasi import kakasi
from device_manager import device_manager
from config import config
@@ -28,6 +27,7 @@ from models.translation.translation_languages import translation_lang
from models.transcription.transcription_languages import transcription_lang
from models.translation.translation_utils import checkCTranslate2Weight, downloadCTranslate2Weight, downloadCTranslate2Tokenizer
from models.transcription.transcription_whisper import checkWhisperWeight, downloadWhisperWeight
+from models.transliteration.transliteration_transliterator import Transliterator
from models.overlay.overlay import Overlay
from models.overlay.overlay_image import OverlayImage
from models.watchdog.watchdog import Watchdog
@@ -99,7 +99,7 @@ class Model:
self.overlay_image = OverlayImage(config.PATH_LOCAL)
self.mic_audio_queue = None
self.mic_mute_status = None
- self.kks = kakasi()
+ self.transliterator = Transliterator()
self.watchdog = Watchdog(config.WATCHDOG_TIMEOUT, config.WATCHDOG_INTERVAL)
self.osc_handler = OSCHandler(config.OSC_IP_ADDRESS, config.OSC_PORT)
self.websocket_server = None
@@ -275,13 +275,21 @@ class Model:
self.previous_receive_message = message
return repeat_flag
- def convertMessageToTransliteration(self, message: str) -> str:
- data_list = self.kks.convert(message)
- keys_to_keep = {"orig", "hira", "hepburn"}
- filtered_list = []
- for item in data_list:
- filtered_item = {key: value for key, value in item.items() if key in keys_to_keep}
- filtered_list.append(filtered_item)
+ def convertMessageToTransliteration(self, message: str, hiragana: bool=True, romaji: bool=True) -> str:
+ if hiragana is False and romaji is False:
+ return message
+
+ keys_to_keep = {"orig"}
+ if hiragana:
+ keys_to_keep.add("hira")
+ if romaji:
+ keys_to_keep.add("hepburn")
+
+ data_list = self.transliterator.analyze(message, use_macron=False)
+ filtered_list = [
+ {key: value for key, value in item.items() if key in keys_to_keep}
+ for item in data_list
+ ]
return filtered_list
def setOscIpAddress(self, ip_address):
diff --git a/src-python/models/transliteration/transliteration_kana_to_hepburn.py b/src-python/models/transliteration/transliteration_kana_to_hepburn.py
new file mode 100644
index 00000000..e7ba04c2
--- /dev/null
+++ b/src-python/models/transliteration/transliteration_kana_to_hepburn.py
@@ -0,0 +1,215 @@
+# katakana_to_hepburn.py
+# カタカナ -> ヘボン式ローマ字(パッケージ不要)
+
+def katakana_to_hepburn(kata: str, use_macron: bool = True) -> str:
+ """
+ カタカナ文字列をヘボン式ローマ字に変換する。
+ use_macron=True のとき ā ī ū ē ō で長音を表現(マクロン)。
+ use_macron=False のときは単純に連続母音を残す(例: ou, oo)。
+ """
+ # 基本音の対応(主要なカタカナ)
+ base = {
+ 'ア':'a','イ':'i','ウ':'u','エ':'e','オ':'o',
+ 'カ':'ka','キ':'ki','ク':'ku','ケ':'ke','コ':'ko',
+ 'サ':'sa','シ':'shi','ス':'su','セ':'se','ソ':'so',
+ 'タ':'ta','チ':'chi','ツ':'tsu','テ':'te','ト':'to',
+ 'ナ':'na','ニ':'ni','ヌ':'nu','ネ':'ne','ノ':'no',
+ 'ハ':'ha','ヒ':'hi','フ':'fu','ヘ':'he','ホ':'ho',
+ 'マ':'ma','ミ':'mi','ム':'mu','メ':'me','モ':'mo',
+ 'ヤ':'ya','ユ':'yu','ヨ':'yo',
+ 'ラ':'ra','リ':'ri','ル':'ru','レ':'re','ロ':'ro',
+ 'ワ':'wa','ヲ':'wo','ン':'n',
+ 'ガ':'ga','ギ':'gi','グ':'gu','ゲ':'ge','ゴ':'go',
+ 'ザ':'za','ジ':'ji','ズ':'zu','ゼ':'ze','ゾ':'zo',
+ 'ダ':'da','ヂ':'ji','ヅ':'zu','デ':'de','ド':'do',
+ 'バ':'ba','ビ':'bi','ブ':'bu','ベ':'be','ボ':'bo',
+ 'パ':'pa','ピ':'pi','プ':'pu','ペ':'pe','ポ':'po',
+ # 小書き(単独で使われることは少ないがマップしておく)
+ 'ァ':'a','ィ':'i','ゥ':'u','ェ':'e','ォ':'o',
+ 'ャ':'ya','ュ':'yu','ョ':'yo','ッ':'xtsu','ー':'-',
+ 'ヴ':'vu','シェ':'she' # 特殊は下で組合せで処理
+ }
+
+ # 拡張:子音 + 小ャユョ の組合せ(主要なもの)
+ digraphs = {
+ ('キ','ャ'):'kya', ('キ','ュ'):'kyu', ('キ','ョ'):'kyo',
+ ('ギ','ャ'):'gya', ('ギ','ュ'):'gyu', ('ギ','ョ'):'gyo',
+ ('シ','ャ'):'sha', ('シ','ュ'):'shu', ('シ','ョ'):'sho',
+ ('ジ','ャ'):'ja', ('ジ','ュ'):'ju', ('ジ','ョ'):'jo',
+ ('チ','ャ'):'cha', ('チ','ュ'):'chu', ('チ','ョ'):'cho',
+ ('ニ','ャ'):'nya', ('ニ','ュ'):'nyu', ('ニ','ョ'):'nyo',
+ ('ヒ','ャ'):'hya', ('ヒ','ュ'):'hyu', ('ヒ','ョ'):'hyo',
+ ('ビ','ャ'):'bya', ('ビ','ュ'):'byu', ('ビ','ョ'):'byo',
+ ('ピ','ャ'):'pya', ('ピ','ュ'):'pyu', ('ピ','ョ'):'pyo',
+ ('ミ','ャ'):'mya', ('ミ','ュ'):'myu', ('ミ','ョ'):'myo',
+ ('リ','ャ'):'rya', ('リ','ュ'):'ryu', ('リ','ョ'):'ryo',
+ # 外来音対応(ファ/フィ/チェ 等)
+ ('フ','ャ'):'fya', ('フ','ュ'):'fyu', ('フ','ョ'):'fyo',
+ ('ト','ゥ'):'tu', ('ド','ゥ'):'du',
+ # F-sounds (ファ フィ フェ フォ)
+ ('フ','ァ'):'fa', ('フ','ィ'):'fi', ('フ','ェ'):'fe', ('フ','ォ'):'fo',
+ # シェ チェ ティ etc.
+ ('シ','ェ'):'she', ('チ','ェ'):'che',
+ ('テ','ィ'):'ti', ('ト','ゥ'):'tu', ('ド','ゥ'):'du',
+ ('ウ','ァ'):'wa', ('ウ','ィ'):'wi', ('ウ','ェ'):'we', ('ウ','ォ'):'wo',
+ # その他外来語によくある組合せ
+ ('ス','ィ'):'si', ('ズ','ィ'):'zi', ('ツ','ァ'):'tsa', ('ツ','ィ'):'tsi', ('ツ','ェ'):'tse', ('ツ','ォ'):'tso',
+ ('キ','ェ'):'kye', ('ギ','ェ'):'gye',
+ ('ヴ','ァ'):'va', ('ヴ','ィ'):'vi', ('ヴ','ェ'):'ve', ('ヴ','ォ'):'vo', ('ヴ','ュ'):'vyu'
+ }
+
+ # 小文字一覧(ゃゅょぁぃぅぇぉ など)
+ small_kana = set(['ャ','ュ','ョ','ァ','ィ','ゥ','ェ','ォ','ヮ','ヵ','ヶ','ッ','ャ','ュ','ョ'])
+
+ # マクロン変換マップ(連続母音 -> マクロン)
+ macron_map = {
+ 'aa':'ā','ii':'ī','uu':'ū','ee':'ē','oo':'ō',
+ # ou -> ō という扱いを多くのヘボン式はする(特に日本語由来の長音)
+ 'ou':'ō'
+ }
+
+ # Helper: 次のローマ字の先頭子音を取り出す(促音処理用)
+ def initial_consonant(rom: str) -> str:
+ # romはローマ字(例 'shi','chi','ta')
+ # 子音は最初の母音直前までと考える(母音: a,i,u,e,o)
+ for i,ch in enumerate(rom):
+ if ch in 'aeiou':
+ return rom[:i]
+ return rom # 母音がないなら全部
+
+ # 変換メイン
+ res = []
+ i = 0
+ kata = kata.strip()
+ length = len(kata)
+
+ while i < length:
+ ch = kata[i]
+
+ # 促音(ッ):次の音の初めの子音を重ねる
+ if ch == 'ッ':
+ # lookahead
+ if i+1 < length:
+ # 先の1文字 or 合字を取り得る(小書きが続く可能性)
+ # まず合字優先で調べる
+ next_pair = None
+ if i+2 < length and (kata[i+1], kata[i+2]) in digraphs:
+ next_pair = digraphs[(kata[i+1], kata[i+2])]
+ elif kata[i+1] in base:
+ next_pair = base.get(kata[i+1])
+
+ if next_pair:
+ cons = initial_consonant(next_pair)
+ if cons == '':
+ # もし母音始まりなら促音は無視(稀)
+ pass
+ else:
+ # Hepburnでは "ch" の場合 "cch"(matcha)等の扱いになるように
+ # cons の先頭1文字を倍にするより、cons全体の先頭文字を重ねるのが一般的(例: 'shi' -> 'ssh' ? いい例は少ない)
+ # 実務上は先頭子音の最初の文字を重複する:
+ res.append(cons[0])
+ # advance only the 促音 itself here; next loop handles next kana
+ i += 1
+ continue
+
+ # 長音符(ー):前の母音を伸ばす(マクロン処理は後でまとめて)
+ if ch == 'ー':
+ # append marker '-' to indicate prolong; we'll post-process
+ res.append('-')
+ i += 1
+ continue
+
+ # 合字(子 + 小ャュョ等)
+ if i+1 < length and (ch, kata[i+1]) in digraphs:
+ res.append(digraphs[(ch, kata[i+1])])
+ i += 2
+ continue
+
+ # 小書きが前に独立して出てきた場合(通常は合字で処理されるが念のため)
+ if ch in small_kana and ch != 'ッ':
+ # 小書きを単独で英字に変換(例: 'ァ' -> 'a')
+ res.append(base.get(ch, ''))
+ i += 1
+ continue
+
+ # 普通のカタカナ
+ if ch in base:
+ res.append(base[ch])
+ i += 1
+ continue
+
+ # 英数字や記号・ひらがななどはそのまま(変換対象外)
+ res.append(ch)
+ i += 1
+
+ # ここまでで res はローマ字パーツのリスト(長音は '-' でマーク)
+ raw = ''.join(res)
+
+ # 撥音(ン)処理: n の前が b/p/m の場合 m にする
+ # ただし既に 'n' のまま次が母音や y の時は通常 n' を入れるべきだが簡易処理として n のまま保持。
+ # 我々は 'n' の後に b/p/m が来たら 'm' に置換
+ import re
+ raw = re.sub(r'n(?=[bmp])', 'm', raw)
+
+ # 長音処理('-' マークを見て前の母音を伸ばす)
+ # raw 中の '-' を削って該当の母音を伸ばす
+ while '-' in raw:
+ idx = raw.find('-')
+ if idx == 0:
+ # 先頭に長音符が来るのはおかしいので削除
+ raw = raw[:idx] + raw[idx+1:]
+ continue
+ # 前の文字が母音ならそれを重ねる
+ prev = raw[idx-1]
+ if prev in 'aiueo':
+ # 直前に既に vowel がある場合、後でマクロン処理に任せて母音を2つにする
+ raw = raw[:idx] + prev + raw[idx+1:]
+ else:
+ # 直前が子音なら何もして取り除く
+ raw = raw[:idx] + raw[idx+1:]
+
+ # 小さな例外対応: 'ti' 等の表記は 'chi' と扱いたいが上述マップでカバー済み
+ # macron の適用(長音の正規化)
+ if use_macron:
+ # まず 'ou' を ō に(ただし語による例外はあるが、一般的ヘボンに合わせる)
+ # その前に 'oo' を 'ō' に(稀)
+ for pair, mac in macron_map.items():
+ raw = raw.replace(pair, mac)
+ # else: leave as is (ou/oo/aa...)
+
+ # 仕上げ:小文字統一(ヘボンは小文字)
+ raw = raw.lower()
+
+ # 最後に、n の後に母音または y が来る場合は「んあ->n'a」的扱いが必要だが
+ # シンプル実装では n の後に母音や y が来るときは n' を入れる(明瞭化)
+ # ただし多くの実例では省略されることも多いのでコメントアウトしておく
+ # raw = re.sub(r"n(?=[aiueoy])", "n'", raw)
+
+ return raw
+
+
+# --- テスト例 ---
+if __name__ == "__main__":
+ tests = [
+ "カタカナ",
+ "コンピューター",
+ "キャッチ",
+ "マッチャ",
+ "シェア",
+ "ジェット",
+ "ヴァイオリン",
+ "ホテル",
+ "スーパー",
+ "ギュウニュウ",
+ "パーティー",
+ "トウキョウ", # 東京(トウキョウ -> tōkyō)
+ "オーケー",
+ "ファイル",
+ "ニューヨーク",
+ "ラーメン",
+ "パン",
+ "チョコレート",
+ ]
+
+ for s in tests:
+ print(s, "->", katakana_to_hepburn(s, use_macron=True))
\ No newline at end of file
diff --git a/src-python/models/transliteration/transliteration_transliterator.py b/src-python/models/transliteration/transliteration_transliterator.py
new file mode 100644
index 00000000..f2a9780f
--- /dev/null
+++ b/src-python/models/transliteration/transliteration_transliterator.py
@@ -0,0 +1,187 @@
+from sudachipy import tokenizer
+from sudachipy import dictionary
+try:
+ from .transliteration_kana_to_hepburn import katakana_to_hepburn
+except ImportError:
+ from transliteration_kana_to_hepburn import katakana_to_hepburn
+
+class Transliterator:
+ def __init__(self):
+ self.tokenizer_obj = dictionary.Dictionary().create()
+ self.mode = tokenizer.Tokenizer.SplitMode.A
+
+ @staticmethod
+ def is_kanji(ch: str) -> bool:
+ return '\u4e00' <= ch <= '\u9fff'
+
+ @staticmethod
+ def kata_to_hira(text: str) -> str:
+ return "".join(
+ chr(ord(c) - 0x60) if 'ァ' <= c <= 'ン' else c
+ for c in text
+ )
+
+ @staticmethod
+ def split_kanji_okurigana(surface: str, reading_kana: str):
+ """
+ 1語の表層形(surface)と読み(reading_kana)を
+ [ {"orig":..., "kana":..., "hira":..., "hepburn":...}, ... ] に分割
+ """
+ result = []
+
+ # 表層を「漢字ブロック」と「非漢字ブロック」に分割
+ buf = ""
+ prev_is_kanji = None
+ blocks = []
+ for ch in surface:
+ now_is_kanji = Transliterator.is_kanji(ch)
+ if prev_is_kanji is None or now_is_kanji == prev_is_kanji:
+ buf += ch
+ else:
+ blocks.append((prev_is_kanji, buf))
+ buf = ch
+ prev_is_kanji = now_is_kanji
+ if buf:
+ blocks.append((prev_is_kanji, buf))
+
+ # 読みを分配
+ kana_left = reading_kana
+ for i, (is_kan, part) in enumerate(blocks):
+ if is_kan:
+ # 漢字ブロックの処理
+ if len(blocks) == 1:
+ # 単一ブロック(全て漢字)の場合
+ kana_for_kan = kana_left
+ elif i == len(blocks) - 1:
+ # 最後のブロック(漢字)の場合
+ kana_for_kan = kana_left
+ else:
+ # 中間の漢字ブロックの場合
+ # 後続の非漢字ブロックの文字数を計算
+ remaining_non_kanji = sum(len(p) for is_k, p in blocks[i+1:] if not is_k)
+ if remaining_non_kanji > 0 and len(kana_left) > remaining_non_kanji:
+ kana_for_kan = kana_left[:-remaining_non_kanji]
+ else:
+ # 漢字1文字あたり最低1文字の読みを割り当て
+ min_kana = len(part)
+ kana_for_kan = kana_left[:max(min_kana, len(kana_left) - remaining_non_kanji)]
+
+ # 空の読みを避ける
+ if not kana_for_kan and kana_left:
+ kana_for_kan = kana_left[:1]
+
+ result.append(
+ {
+ "orig": part,
+ "kana": kana_for_kan,
+ "hira": Transliterator.kata_to_hira(kana_for_kan),
+ "hepburn": katakana_to_hepburn(kana_for_kan, use_macron=True)
+ }
+ )
+ kana_left = kana_left[len(kana_for_kan):]
+ else:
+ # 非漢字部分(送り仮名など)
+ kana_for_okuri = kana_left[:len(part)]
+ result.append(
+ {
+ "orig": part,
+ "kana": kana_for_okuri,
+ "hira": Transliterator.kata_to_hira(kana_for_okuri),
+ "hepburn": katakana_to_hepburn(kana_for_okuri, use_macron=True)
+ }
+ )
+ kana_left = kana_left[len(kana_for_okuri):]
+
+ return result
+
+ def analyze(self, text: str, use_macron: bool = True):
+ tokens = self.tokenizer_obj.tokenize(text, self.mode)
+
+ results = []
+ for t in tokens:
+ surface = t.surface()
+ reading = t.reading_form()
+
+ # 単純に1文字ずつ処理
+ if len(surface) == 1:
+ # 1文字の場合はそのまま
+ results.append({
+ "orig": surface,
+ "kana": reading,
+ "hira": self.kata_to_hira(reading),
+ "hepburn": katakana_to_hepburn(reading, use_macron=use_macron)
+ })
+ else:
+ # 複数文字の場合は文字種別で分割
+ i = 0
+ reading_pos = 0
+
+ while i < len(surface):
+ char = surface[i]
+
+ if self.is_kanji(char):
+ # 漢字の場合、連続する漢字をまとめて処理
+ kanji_block = ""
+ while i < len(surface) and self.is_kanji(surface[i]):
+ kanji_block += surface[i]
+ i += 1
+
+ # 漢字ブロックの読みを推定
+ if i < len(surface):
+ # 後に文字がある場合、送り仮名を考慮
+ remaining_chars = len(surface) - i
+ kanji_reading = reading[reading_pos:-remaining_chars] if remaining_chars > 0 else reading[reading_pos:]
+ else:
+ # 最後の漢字ブロックの場合
+ kanji_reading = reading[reading_pos:]
+
+ results.append({
+ "orig": kanji_block,
+ "kana": kanji_reading,
+ "hira": self.kata_to_hira(kanji_reading),
+ "hepburn": katakana_to_hepburn(kanji_reading, use_macron=use_macron)
+ })
+ reading_pos += len(kanji_reading)
+ else:
+ # 非漢字の場合
+ non_kanji_block = ""
+ while i < len(surface) and not self.is_kanji(surface[i]):
+ non_kanji_block += surface[i]
+ i += 1
+
+ # 非漢字部分の読み(通常は文字数分)
+ non_kanji_reading = reading[reading_pos:reading_pos + len(non_kanji_block)]
+
+ results.append({
+ "orig": non_kanji_block,
+ "kana": non_kanji_reading,
+ "hira": self.kata_to_hira(non_kanji_reading),
+ "hepburn": katakana_to_hepburn(non_kanji_reading, use_macron=use_macron)
+ })
+ reading_pos += len(non_kanji_reading)
+
+ return results
+
+# --- テスト ---
+if __name__ == "__main__":
+ test_cases = [
+ "美しい花を見る",
+ "東京に行く",
+ "漢字とカタカナの混在",
+ "パーティーに行く",
+ "コンピューターを使う",
+ "シェアハウスに住む",
+ "ヴァイオリンを弾く",
+ "ギュウニュウを飲む",
+ "ニューヨークに行く",
+ "ラーメンを食べる",
+ "チョコレートが好き",
+ "SessionIDを取得する",
+ "取り敢えず検索してみる",
+ "見知らぬ土地で冒険する",
+ "彼は優れたエンジニアです",
+ ]
+
+ transliterator = Transliterator()
+ for case in test_cases:
+ print(transliterator.analyze(case))
\ No newline at end of file
diff --git a/src-ui/app/config_page/setting_section/setting_box/others/Others.jsx b/src-ui/app/config_page/setting_section/setting_box/others/Others.jsx
index 65027ef3..5ef39ce0 100644
--- a/src-ui/app/config_page/setting_section/setting_box/others/Others.jsx
+++ b/src-ui/app/config_page/setting_section/setting_box/others/Others.jsx
@@ -45,6 +45,10 @@ export const Others = () => {
{messages.message}
- ) : is_translated_exist ? ( +{messages.original.message}
+ ) : is_translation_exist ? ({messages.original}
+{item.message}
; + } + + return ( ++ {item.transliteration.map((token, idx) => renderTokenNode(token, idx))} +
+ ); +}; + +const OriginalMessage = ({ messages }) => { return ( <> -{messages.original}
- {translated_data.map((message, index) => ( -{message}
+{messages.original.message}
+ {messages.translations.map((item, idx) => ( +