From 8be132abe64bdafb49d1337b1c1b68bf0b49d38f Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Wed, 22 Oct 2025 14:23:41 +0900 Subject: [PATCH] =?UTF-8?q?overlay:=20=E3=83=95=E3=82=A9=E3=83=B3=E3=83=88?= =?UTF-8?q?=E3=82=AD=E3=83=A3=E3=83=83=E3=82=B7=E3=83=A5=E8=BF=BD=E5=8A=A0?= =?UTF-8?q?=E3=81=A8=E5=B0=8F=E5=9E=8B=E3=83=AD=E3=82=B0=E3=81=AE=E3=83=AB?= =?UTF-8?q?=E3=83=93=E6=8F=8F=E7=94=BB=E3=82=92=E6=94=B9=E5=96=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - フォントのメモリキャッシュ(_font_cache)と _get_font を導入し truetype 読み込みを最適化 - createTextboxSmallLog を堅牢化(None対応、幅測定の例外処理、改行判定の改善) - 単語単位ルビ描画機能を追加(renderRubyBlock / createTextboxSmallLogWithRubyTokens) - トークン毎の幅算出/センタリング、行折り返し検出時のフォールバック実装 - createOverlayImageSmallLog を拡張して、元文・翻訳それぞれにルビ適用とフォールバック合成を実行 - __main__ のテスト呼び出しを翻訳リスト形式に合わせて修正し、ルビテスト追加 --- src-python/config.py | 11 + src-python/model.py | 34 ++- src-python/models/overlay/overlay_image.py | 302 ++++++++++++++++++--- 3 files changed, 303 insertions(+), 44 deletions(-) diff --git a/src-python/config.py b/src-python/config.py index e1683534..479b4b96 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -1068,6 +1068,15 @@ class Config: case "opacity" | "ui_scaling": if isinstance(value, (int, float)): self._OVERLAY_SMALL_LOG_SETTINGS[key] = float(value) + case "ruby_font_scale": + if isinstance(value, (int, float)): + v = float(value) + if 0.05 <= v <= 3.0: + self._OVERLAY_SMALL_LOG_SETTINGS[key] = v + case "ruby_line_spacing": + if isinstance(value, int): + if 0 <= value <= 200: + self._OVERLAY_SMALL_LOG_SETTINGS[key] = value self.saveConfig(inspect.currentframe().f_code.co_name, self.OVERLAY_SMALL_LOG_SETTINGS) @property @@ -1423,6 +1432,8 @@ class Config: "opacity": 1.0, "ui_scaling": 1.0, "tracker": "HMD", + "ruby_font_scale": 0.5, + "ruby_line_spacing": 4, } self._OVERLAY_LARGE_LOG = False self._OVERLAY_LARGE_LOG_SETTINGS = { diff --git a/src-python/model.py b/src-python/model.py index e8d35aec..4b3a9e11 100644 --- a/src-python/model.py +++ b/src-python/model.py @@ -944,13 +944,41 @@ class Model: self.speaker_energy_recorder.stop() self.speaker_energy_recorder = None - def createOverlayImageSmallLog(self, message:Optional[str], your_language:Optional[str], translation:list, target_language:Optional[dict]) -> object: + def createOverlayImageSmallLog(self, message:Optional[str], your_language:Optional[str], translation:list, target_language:Optional[dict], translation_transliteration_tokens: Optional[list] = None) -> object: self.ensure_initialized() - # target_language may be provided as dict or None + # Normalize target_language dict -> list target_language_list = [] if isinstance(target_language, dict): target_language_list = [data["language"] for data in target_language.values() if data.get("enable") is True] - return self.overlay_image.createOverlayImageSmallLog(message, your_language, translation, target_language_list) + + # Prepare transliteration tokens only if we have an original message string. + transliteration_tokens = [] + if isinstance(message, str) and message.strip(): + try: + # Always request both romaji + hiragana for ruby (per spec: romaji upper, hiragana lower) + transliteration_tokens = self.convertMessageToTransliteration(message, hiragana=True, romaji=True) + except Exception: + transliteration_tokens = [] + errorLogging() + + # Fetch ruby settings from config (with safe defaults if missing) + ruby_font_scale = config.OVERLAY_SMALL_LOG_SETTINGS.get("ruby_font_scale", 0.5) + ruby_line_spacing = config.OVERLAY_SMALL_LOG_SETTINGS.get("ruby_line_spacing", 4) + + # 翻訳行ルビ (任意) が指定されていれば渡す。後方互換のため None / 不正型は空リストに。 + if not isinstance(translation_transliteration_tokens, list): + translation_transliteration_tokens = [] + + return self.overlay_image.createOverlayImageSmallLog( + message, + your_language, + translation, + target_language_list, + transliteration_tokens=transliteration_tokens, + translation_transliteration_tokens=translation_transliteration_tokens, + ruby_font_scale=ruby_font_scale, + ruby_line_spacing=ruby_line_spacing, + ) def createOverlayImageSmallMessage(self, message): self.ensure_initialized() diff --git a/src-python/models/overlay/overlay_image.py b/src-python/models/overlay/overlay_image.py index d307411e..1e5e5b06 100644 --- a/src-python/models/overlay/overlay_image.py +++ b/src-python/models/overlay/overlay_image.py @@ -37,6 +37,8 @@ class OverlayImage: self.root_path = os_path.join(os_path.dirname(__file__), "fonts") else: raise FileNotFoundError("Font directory not found.") + # Simple in-memory font cache to avoid repeated truetype loading cost. + self._font_cache = {} @staticmethod def concatenateImagesVertically(img1: Image, img2: Image, margin: int = 0) -> Image: @@ -71,29 +73,156 @@ class OverlayImage: } return colors - def createTextboxSmallLog(self, text: str, language: str, text_color: Tuple[int, int, int], base_width: int, base_height: int, font_size: int) -> Image: - font_family = self.LANGUAGES.get(language, self.LANGUAGES["Default"]) - img = Image.new("RGBA", (base_width, base_height), (0, 0, 0, 0)) - draw = ImageDraw.Draw(img) - + def _get_font(self, font_family: str, size: int) -> ImageFont.FreeTypeFont: font_path = os_path.join(self.root_path, font_family) - font = ImageFont.truetype(font_path, font_size) + key = (font_path, size) + if key not in self._font_cache: + self._font_cache[key] = ImageFont.truetype(font_path, size) + return self._font_cache[key] - text_width = draw.textlength(text, font) - character_width = text_width // len(text) - character_line_num = int((base_width // character_width) - 12) - if len(text) > character_line_num: - text = "\n".join([text[i:i + character_line_num] for i in range(0, len(text), character_line_num)]) - text_height = font_size * (len(text.split("\n")) + 1) + 20 + def createTextboxSmallLog(self, text: str, language: str, text_color: Tuple[int, int, int], base_width: int, base_height: int, font_size: int) -> Image: + if text is None: + text = "" + font_family = self.LANGUAGES.get(language, self.LANGUAGES["Default"]) + font = self._get_font(font_family, font_size) + + # Initial image for width measurement + img_tmp = Image.new("RGBA", (base_width, base_height), (0, 0, 0, 0)) + draw_tmp = ImageDraw.Draw(img_tmp) + try: + text_width = draw_tmp.textlength(text, font) if len(text) > 0 else 1 + character_width = max(1, text_width // max(1, len(text))) + character_line_num = int((base_width // character_width) - 12) + if len(text) > character_line_num and character_line_num > 0: + text = "\n".join([text[i:i + character_line_num] for i in range(0, len(text), character_line_num)]) + except Exception: + errorLogging() + lines = text.split("\n") if text else [""] + text_height = font_size * (len(lines) + 1) + 20 img = Image.new("RGBA", (base_width, text_height), (0, 0, 0, 0)) draw = ImageDraw.Draw(img) - text_x = base_width // 2 text_y = text_height // 2 draw.text((text_x, text_y), text, text_color, anchor="mm", stroke_width=0, font=font, align="center") return img - def createOverlayImageSmallLog(self, message: str, your_language: str, translation: List[str] = [], target_language: List[str] = []) -> Image: + def renderRubyBlock(self, transliteration_tokens: List[dict], language: str, base_width: int, base_font_size: int, ruby_font_scale: float, ruby_line_spacing: int, text_color: Tuple[int, int, int]) -> Optional[Image.Image]: + # Build romaji and hiragana lines. + romaji_line = " ".join([t.get("hepburn", "") for t in transliteration_tokens if t.get("hepburn")]) + hira_line = " ".join([t.get("hira", "") for t in transliteration_tokens if t.get("hira")]) + if not romaji_line and not hira_line: + return None + font_family = self.LANGUAGES.get(language, self.LANGUAGES["Default"]) + ruby_size = max(1, int(base_font_size * ruby_font_scale)) + font_ruby = self._get_font(font_family, ruby_size) + # Measure widths to center lines independently. + img_tmp = Image.new("RGBA", (base_width, ruby_size * 2 + ruby_line_spacing + 10), (0, 0, 0, 0)) + draw_tmp = ImageDraw.Draw(img_tmp) + romaji_width = draw_tmp.textlength(romaji_line, font_ruby) if romaji_line else 0 + hira_width = draw_tmp.textlength(hira_line, font_ruby) if hira_line else 0 + romaji_x = (base_width - romaji_width) // 2 + hira_x = (base_width - hira_width) // 2 + # Construct final ruby image. + ruby_height = ruby_size * (2 if hira_line and romaji_line else 1) + (ruby_line_spacing if hira_line and romaji_line else 0) + 10 + ruby_img = Image.new("RGBA", (base_width, ruby_height), (0, 0, 0, 0)) + draw = ImageDraw.Draw(ruby_img) + current_y = 5 + ruby_size // 2 + if romaji_line: + draw.text((romaji_x + romaji_width // 2, current_y), romaji_line, text_color, anchor="mm", font=font_ruby) + current_y += ruby_size + (ruby_line_spacing if hira_line else 0) + if hira_line: + draw.text((hira_x + hira_width // 2, current_y), hira_line, text_color, anchor="mm", font=font_ruby) + return ruby_img + + def createTextboxSmallLogWithRubyTokens(self, message: str, transliteration_tokens: List[dict], language: str, text_color: Tuple[int, int, int], base_width: int, font_size: int, ruby_font_scale: float, ruby_line_spacing: int, ruby_original_spacing: int) -> Image: + """Render a single textbox (original message) with per-token centered ruby (romaji above hiragana) over each original token. + + Fallback: if wrapping would occur (message too wide) or tokens mismatch, revert to block-level ruby (renderRubyBlock + createTextboxSmallLog). + """ + if not message or not transliteration_tokens: + return self.createTextboxSmallLog(message, language, text_color, base_width, self.getUiSizeSmallLog()["height"], font_size) + + # Obtain font instances + font_family = self.LANGUAGES.get(language, self.LANGUAGES["Default"]) + font_orig = self._get_font(font_family, font_size) + ruby_size = max(1, int(font_size * ruby_font_scale)) + font_ruby = self._get_font(font_family, ruby_size) + + # Token width measurement + draw_tmp_img = Image.new("RGBA", (1, 1), (0, 0, 0, 0)) + draw_tmp = ImageDraw.Draw(draw_tmp_img) + token_infos = [] + total_width = 0 + for tok in transliteration_tokens: + orig = tok.get("orig", "") + if not orig: + continue + hira = tok.get("hira", "") + romaji = tok.get("hepburn", "") + orig_w = max(1, int(draw_tmp.textlength(orig, font_orig))) + hira_w = max(0, int(draw_tmp.textlength(hira, font_ruby))) if hira else 0 + romaji_w = max(0, int(draw_tmp.textlength(romaji, font_ruby))) if romaji else 0 + layout_w = max(orig_w, hira_w, romaji_w) # allocate width so ruby lines never overflow neighboring token + token_infos.append((orig, hira, romaji, layout_w)) + total_width += layout_w + + if not token_infos: + # Fallback + ruby_block = self.renderRubyBlock(transliteration_tokens, language, base_width, font_size, ruby_font_scale, ruby_line_spacing, text_color) + base_img = self.createTextboxSmallLog(message, language, text_color, base_width, self.getUiSizeSmallLog()["height"], font_size) + if ruby_block: + return self.concatenateImagesVertically(ruby_block, base_img) + return base_img + + # Simple wrapping detection: if total width exceeds base_width * 0.9 → fallback + if total_width > base_width * 0.9: + ruby_block = self.renderRubyBlock(transliteration_tokens, language, base_width, font_size, ruby_font_scale, ruby_line_spacing, text_color) + base_img = self.createTextboxSmallLog(message, language, text_color, base_width, self.getUiSizeSmallLog()["height"], font_size) + if ruby_block: + return self.concatenateImagesVertically(ruby_block, base_img) + return base_img + + # Compute left start for centering complete line + start_x = (base_width - total_width) // 2 + # Vertical positioning + # Symmetric outer padding: make top padding equal to bottom padding (previously top was 4, bottom ~10) + outer_padding = 10 # uniform top & bottom padding for visual balance + ruby_lines_count = 0 + has_romaji_any = any(r for (_, _, r, _) in token_infos) + has_hira_any = any(h for (_, h, _, _) in token_infos) + if has_romaji_any: + ruby_lines_count += 1 + if has_hira_any: + ruby_lines_count += 1 + # Height calculation (replace asymmetric 4/10 with symmetric outer_padding) + ruby_block_height = ruby_lines_count * ruby_size + (ruby_line_spacing if ruby_lines_count == 2 else 0) + total_height = outer_padding + ruby_block_height + ruby_original_spacing + font_size + outer_padding + img = Image.new("RGBA", (base_width, total_height), (0, 0, 0, 0)) + draw = ImageDraw.Draw(img) + + # Y centers + current_y = outer_padding + ruby_size // 2 + romaji_y = current_y if has_romaji_any else None + hira_y = None + if has_romaji_any and has_hira_any: + hira_y = romaji_y + ruby_size + ruby_line_spacing + elif has_hira_any: + hira_y = current_y + + orig_y = outer_padding + ruby_block_height + ruby_original_spacing + font_size // 2 + + # Draw tokens sequentially + cursor_x = start_x + for orig, hira, romaji, w in token_infos: + token_center_x = cursor_x + w // 2 + if romaji_y is not None and romaji: + draw.text((token_center_x, romaji_y), romaji, text_color, anchor="mm", font=font_ruby) + if hira_y is not None and hira: + draw.text((token_center_x, hira_y), hira, text_color, anchor="mm", font=font_ruby) + draw.text((token_center_x, orig_y), orig, text_color, anchor="mm", font=font_orig) + cursor_x += w + return img + def createOverlayImageSmallLog(self, message: str, your_language: str, translation: List[str] = [], target_language: List[str] = [], transliteration_tokens: List[dict] = [], translation_transliteration_tokens: List[List[dict]] = [], ruby_font_scale: float = 0.5, ruby_line_spacing: int = 4) -> Image: # UI設定を取得 ui_size = self.getUiSizeSmallLog() width, height, font_size = ui_size["width"], ui_size["height"], ui_size["font_size"] @@ -107,23 +236,89 @@ class OverlayImage: textbox_images = [] # 翻訳がある場合 + # Use improved per-token placement if possible; else fallback to previous block approach. + ruby_original_spacing = 2 # Narrow vertical gap between hiragana block and original text. if translation and target_language: - # 元のメッセージがある場合は追加 if message: - textbox_images.append( - self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size) - ) - - # 翻訳をすべて追加 - for trans, lang in zip(translation, target_language): - textbox_images.append( - self.createTextboxSmallLog(trans, lang, text_color, width, height, font_size) - ) + if transliteration_tokens: + try: + base_msg_img = self.createTextboxSmallLogWithRubyTokens( + message, + transliteration_tokens, + your_language, + text_color, + width, + font_size, + ruby_font_scale, + ruby_line_spacing, + ruby_original_spacing, + ) + except Exception: + errorLogging() + # Fallback to old method + base_msg_img = self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size) + try: + ruby_img = self.renderRubyBlock(transliteration_tokens, your_language, width, font_size, ruby_font_scale, ruby_line_spacing, text_color) + if ruby_img is not None: + base_msg_img = self.concatenateImagesVertically(ruby_img, base_msg_img) + except Exception: + errorLogging() + else: + base_msg_img = self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size) + textbox_images.append(base_msg_img) + for idx, (trans, lang) in enumerate(zip(translation, target_language)): + # 翻訳行用ルビ (任意) translation_transliteration_tokens[idx] が存在すれば使用 + per_trans_tokens: List[dict] = [] + if idx < len(translation_transliteration_tokens): + candidate = translation_transliteration_tokens[idx] + if isinstance(candidate, list): + per_trans_tokens = candidate + if per_trans_tokens: + try: + trans_img = self.createTextboxSmallLogWithRubyTokens( + trans, + per_trans_tokens, + lang, + text_color, + width, + font_size, + ruby_font_scale, + ruby_line_spacing, + ruby_original_spacing, + ) + except Exception: + errorLogging() + trans_img = self.createTextboxSmallLog(trans, lang, text_color, width, height, font_size) + else: + trans_img = self.createTextboxSmallLog(trans, lang, text_color, width, height, font_size) + textbox_images.append(trans_img) else: - # 翻訳がない場合は元のメッセージのみ - textbox_images.append( - self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size) - ) + # 翻訳無しモード + if message and transliteration_tokens: + try: + base_msg_img = self.createTextboxSmallLogWithRubyTokens( + message, + transliteration_tokens, + your_language, + text_color, + width, + font_size, + ruby_font_scale, + ruby_line_spacing, + ruby_original_spacing, + ) + except Exception: + errorLogging() + base_msg_img = self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size) + try: + ruby_img = self.renderRubyBlock(transliteration_tokens, your_language, width, font_size, ruby_font_scale, ruby_line_spacing, text_color) + if ruby_img is not None: + base_msg_img = self.concatenateImagesVertically(ruby_img, base_msg_img) + except Exception: + errorLogging() + else: + base_msg_img = self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size) + textbox_images.append(base_msg_img) # すべてのテキストボックスを縦に結合 img = textbox_images[0] @@ -297,18 +492,43 @@ class OverlayImage: if __name__ == "__main__": overlay = OverlayImage() - img = overlay.createOverlayImageSmallLog("Hello, World!", "English", "こんにちは、世界!", "Japanese") + # Basic small log test (with translation list form) + img = overlay.createOverlayImageSmallLog("Hello, World!", "English", ["こんにちは、世界!"], ["Japanese"]) img.save("overlay_small.png") - img = overlay.createOverlayImageLargeLog("send", "Hello, World!", "English", "こんにちは、世界!", "Japanese") - img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!", "Japanese", "Hello, World!", "English") - img = overlay.createOverlayImageLargeLog("send", "Hello, World!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "English", "aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああこんにちは、世界!", "Japanese") - img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "Japanese", "Hello, World!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "English") - img = overlay.createOverlayImageLargeLog("send", "Hello, World!", "English", "こんにちは、世界!", "Japanese") - img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!", "Japanese", "Hello, World!", "English") - img = overlay.createOverlayImageLargeLog("send", "Hello, World!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "English", "aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああこんにちは、世界!", "Japanese") - img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "Japanese", "Hello, World!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "English") - img = overlay.createOverlayImageLargeLog("send", "Hello, World!", "English", "こんにちは、世界!", "Japanese") - img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!", "Japanese", "Hello, World!", "English") - img = overlay.createOverlayImageLargeLog("send", "Hello, World!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "English", "aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああこんにちは、世界!", "Japanese") - img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "Japanese", "Hello, World!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "English") + + # Ruby small log test (Japanese original with transliteration tokens) + ruby_tokens = [ + {"orig": "慮", "hira": "おもんぱか", "hepburn": "omonpaka"}, + {"orig": "る", "hira": "る", "hepburn": "ru"}, + ] + # Ruby on original + ruby on translation example + translation_tokens = [ + [ + {"orig": "慮", "hira": "おもんぱか", "hepburn": "omonpaka"}, + {"orig": "る", "hira": "る", "hepburn": "ru"}, + ] + ] + img_ruby = overlay.createOverlayImageSmallLog( + "慮る", + "Japanese", + ["慮る"], + ["Default"], + transliteration_tokens=ruby_tokens, + translation_transliteration_tokens=translation_tokens, + ruby_font_scale=0.5, + ruby_line_spacing=4, + ) + img_ruby.save("overlay_small_ruby.png") + + # Large log tests (adjusted to pass translation/target_language as lists) + img = overlay.createOverlayImageLargeLog("send", "Hello, World!", "English", ["こんにちは、世界!"], ["Japanese"]) + img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!", "Japanese", ["Hello, World!"], ["English"]) + long_en = "Hello, World!" + "a"*25 + "あ"*25 + "a"*25 + "あ"*25 + long_jp = "こんにちは、世界!" + "a"*25 + "あ"*25 + "a"*25 + "あ"*25 + img = overlay.createOverlayImageLargeLog("send", long_en, "English", [long_jp], ["Japanese"]) + img = overlay.createOverlayImageLargeLog("receive", long_jp, "Japanese", [long_en], ["English"]) + img = overlay.createOverlayImageLargeLog("send", "Hello, World!", "English", ["こんにちは、世界!"], ["Japanese"]) + img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!", "Japanese", ["Hello, World!"], ["English"]) + img = overlay.createOverlayImageLargeLog("send", long_en, "English", [long_jp], ["Japanese"]) + img = overlay.createOverlayImageLargeLog("receive", long_jp, "Japanese", [long_en], ["English"]) img.save("overlay_large.png") \ No newline at end of file