diff --git a/src-python/models/transliteration/transliteration_transliterator.py b/src-python/models/transliteration/transliteration_transliterator.py index f2a9780f..7c85ebee 100644 --- a/src-python/models/transliteration/transliteration_transliterator.py +++ b/src-python/models/transliteration/transliteration_transliterator.py @@ -8,7 +8,7 @@ except ImportError: class Transliterator: def __init__(self): self.tokenizer_obj = dictionary.Dictionary().create() - self.mode = tokenizer.Tokenizer.SplitMode.A + self.mode = tokenizer.Tokenizer.SplitMode.C @staticmethod def is_kanji(ch: str) -> bool: @@ -101,7 +101,20 @@ class Transliterator: for t in tokens: surface = t.surface() reading = t.reading_form() - + pos = t.part_of_speech() + + if pos and pos[0] in ["記号", "補助記号"]: + reading = surface + + if surface == reading: + results.append({ + "orig": surface, + "kana": reading, + "hira": surface, + "hepburn": surface, + }) + continue + # 単純に1文字ずつ処理 if len(surface) == 1: # 1文字の場合はそのまま @@ -134,32 +147,48 @@ class Transliterator: else: # 最後の漢字ブロックの場合 kanji_reading = reading[reading_pos:] - + + # 空の読みを避ける + if not kanji_reading and reading_pos < len(reading): + kanji_reading = reading[reading_pos:] + if not kanji_reading and kanji_block: + # 読みが空だが漢字ブロックがある場合、残りの読みを全て割り当てる + kanji_reading = reading[reading_pos:] + + # reading_posの更新を正確に行うために、割り当てられた読みの長さをチェック + len_allocated_reading = len(kanji_reading) + if reading_pos + len_allocated_reading > len(reading): + len_allocated_reading = len(reading) - reading_pos + results.append({ "orig": kanji_block, "kana": kanji_reading, "hira": self.kata_to_hira(kanji_reading), "hepburn": katakana_to_hepburn(kanji_reading, use_macron=use_macron) }) - reading_pos += len(kanji_reading) + reading_pos += len_allocated_reading else: # 非漢字の場合 non_kanji_block = "" while i < len(surface) and not self.is_kanji(surface[i]): non_kanji_block += surface[i] i += 1 - - # 非漢字部分の読み(通常は文字数分) - non_kanji_reading = reading[reading_pos:reading_pos + len(non_kanji_block)] - + + # 非漢字部分の読み(通常は文字数分、または残りの読みの分だけ) + len_block = len(non_kanji_block) + non_kanji_reading = reading[reading_pos:reading_pos + len_block] + + # 割り当てられた読みの長さ + len_allocated_reading = len(non_kanji_reading) + results.append({ "orig": non_kanji_block, "kana": non_kanji_reading, "hira": self.kata_to_hira(non_kanji_reading), "hepburn": katakana_to_hepburn(non_kanji_reading, use_macron=use_macron) }) - reading_pos += len(non_kanji_reading) - + reading_pos += len_allocated_reading + return results # --- テスト --- @@ -180,6 +209,10 @@ if __name__ == "__main__": "取り敢えず検索してみる", "見知らぬ土地で冒険する", "彼は優れたエンジニアです", + " ".join(list("[]<>!@#$%^&*()_+-={}|\;:'\",.<>/?`~")), + " ".join(list("「」<>!@#$%^&*()_+-={}|\;:'",./?`~")), + " ".join(list("♪♫♬♭♮♯°℃℉№Å®©™✓✔✕✖★☆○●◎◇◆□■△▲▽▼※→←↑↓↔︎↕︎⇄⇅∞∴∵∷≪≫≦≧±×÷≠≈≡⊂⊃⊆⊇⊄⊅∪∩∈∋∅∀∃∠⊥⌒∂∇√∫∬∮∑∏∧∨¬⇒⇔∀∃∠⊥⌒∂∇√∫∬∮∑∏")), + " ".join(list("😀😃😄😁😆😅😂🤣😊😇🙂")) ] transliterator = Transliterator() diff --git a/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx b/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx index d53ccaa6..cb38e838 100644 --- a/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx +++ b/src-ui/app/main_page/main_section/message_container/log_box/message_container/MessageContainer.jsx @@ -94,6 +94,7 @@ const MessageWithTransliteration = ({ item }) => { const hira = token.hira ?? ""; const hepburn = token.hepburn ?? ""; + // Only hovered romaji if it exists. (No ruby cuz 'orig' and 'hira' are same.) if (hira && hira === orig && hepburn) { return ( @@ -102,7 +103,8 @@ const MessageWithTransliteration = ({ item }) => { ); } - if (hira && hira !== orig && hepburn) { + // Ruby hiragana and hovered romaji. + if (hira && hepburn) { return ( {orig} @@ -111,15 +113,20 @@ const MessageWithTransliteration = ({ item }) => { ); } - if (hepburn && hepburn !== orig) { - return ( - - {orig} - {hepburn} - - ); + // Ruby romaji or hiragana. + if (hepburn || hira) { + const ruby = hepburn ? hepburn : hira; + if (ruby !== orig) { + return ( + + {orig} + {ruby} + + ); + }; } + // Nothing. Original only. return ( {orig}