overlay: フォントキャッシュ追加と小型ログのルビ描画を改善

- フォントのメモリキャッシュ(_font_cache)と _get_font を導入し truetype 読み込みを最適化
- createTextboxSmallLog を堅牢化(None対応、幅測定の例外処理、改行判定の改善)
- 単語単位ルビ描画機能を追加(renderRubyBlock / createTextboxSmallLogWithRubyTokens)
  - トークン毎の幅算出/センタリング、行折り返し検出時のフォールバック実装
- createOverlayImageSmallLog を拡張して、元文・翻訳それぞれにルビ適用とフォールバック合成を実行
- __main__ のテスト呼び出しを翻訳リスト形式に合わせて修正し、ルビテスト追加
This commit is contained in:
misyaguziya
2025-10-22 14:23:41 +09:00
parent caeb8b6888
commit 8be132abe6
3 changed files with 303 additions and 44 deletions

View File

@@ -1068,6 +1068,15 @@ class Config:
case "opacity" | "ui_scaling":
if isinstance(value, (int, float)):
self._OVERLAY_SMALL_LOG_SETTINGS[key] = float(value)
case "ruby_font_scale":
if isinstance(value, (int, float)):
v = float(value)
if 0.05 <= v <= 3.0:
self._OVERLAY_SMALL_LOG_SETTINGS[key] = v
case "ruby_line_spacing":
if isinstance(value, int):
if 0 <= value <= 200:
self._OVERLAY_SMALL_LOG_SETTINGS[key] = value
self.saveConfig(inspect.currentframe().f_code.co_name, self.OVERLAY_SMALL_LOG_SETTINGS)
@property
@@ -1423,6 +1432,8 @@ class Config:
"opacity": 1.0,
"ui_scaling": 1.0,
"tracker": "HMD",
"ruby_font_scale": 0.5,
"ruby_line_spacing": 4,
}
self._OVERLAY_LARGE_LOG = False
self._OVERLAY_LARGE_LOG_SETTINGS = {

View File

@@ -944,13 +944,41 @@ class Model:
self.speaker_energy_recorder.stop()
self.speaker_energy_recorder = None
def createOverlayImageSmallLog(self, message:Optional[str], your_language:Optional[str], translation:list, target_language:Optional[dict]) -> object:
def createOverlayImageSmallLog(self, message:Optional[str], your_language:Optional[str], translation:list, target_language:Optional[dict], translation_transliteration_tokens: Optional[list] = None) -> object:
self.ensure_initialized()
# target_language may be provided as dict or None
# Normalize target_language dict -> list
target_language_list = []
if isinstance(target_language, dict):
target_language_list = [data["language"] for data in target_language.values() if data.get("enable") is True]
return self.overlay_image.createOverlayImageSmallLog(message, your_language, translation, target_language_list)
# Prepare transliteration tokens only if we have an original message string.
transliteration_tokens = []
if isinstance(message, str) and message.strip():
try:
# Always request both romaji + hiragana for ruby (per spec: romaji upper, hiragana lower)
transliteration_tokens = self.convertMessageToTransliteration(message, hiragana=True, romaji=True)
except Exception:
transliteration_tokens = []
errorLogging()
# Fetch ruby settings from config (with safe defaults if missing)
ruby_font_scale = config.OVERLAY_SMALL_LOG_SETTINGS.get("ruby_font_scale", 0.5)
ruby_line_spacing = config.OVERLAY_SMALL_LOG_SETTINGS.get("ruby_line_spacing", 4)
# 翻訳行ルビ (任意) が指定されていれば渡す。後方互換のため None / 不正型は空リストに。
if not isinstance(translation_transliteration_tokens, list):
translation_transliteration_tokens = []
return self.overlay_image.createOverlayImageSmallLog(
message,
your_language,
translation,
target_language_list,
transliteration_tokens=transliteration_tokens,
translation_transliteration_tokens=translation_transliteration_tokens,
ruby_font_scale=ruby_font_scale,
ruby_line_spacing=ruby_line_spacing,
)
def createOverlayImageSmallMessage(self, message):
self.ensure_initialized()

View File

@@ -37,6 +37,8 @@ class OverlayImage:
self.root_path = os_path.join(os_path.dirname(__file__), "fonts")
else:
raise FileNotFoundError("Font directory not found.")
# Simple in-memory font cache to avoid repeated truetype loading cost.
self._font_cache = {}
@staticmethod
def concatenateImagesVertically(img1: Image, img2: Image, margin: int = 0) -> Image:
@@ -71,29 +73,156 @@ class OverlayImage:
}
return colors
def createTextboxSmallLog(self, text: str, language: str, text_color: Tuple[int, int, int], base_width: int, base_height: int, font_size: int) -> Image:
font_family = self.LANGUAGES.get(language, self.LANGUAGES["Default"])
img = Image.new("RGBA", (base_width, base_height), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
def _get_font(self, font_family: str, size: int) -> ImageFont.FreeTypeFont:
font_path = os_path.join(self.root_path, font_family)
font = ImageFont.truetype(font_path, font_size)
key = (font_path, size)
if key not in self._font_cache:
self._font_cache[key] = ImageFont.truetype(font_path, size)
return self._font_cache[key]
text_width = draw.textlength(text, font)
character_width = text_width // len(text)
character_line_num = int((base_width // character_width) - 12)
if len(text) > character_line_num:
text = "\n".join([text[i:i + character_line_num] for i in range(0, len(text), character_line_num)])
text_height = font_size * (len(text.split("\n")) + 1) + 20
def createTextboxSmallLog(self, text: str, language: str, text_color: Tuple[int, int, int], base_width: int, base_height: int, font_size: int) -> Image:
if text is None:
text = ""
font_family = self.LANGUAGES.get(language, self.LANGUAGES["Default"])
font = self._get_font(font_family, font_size)
# Initial image for width measurement
img_tmp = Image.new("RGBA", (base_width, base_height), (0, 0, 0, 0))
draw_tmp = ImageDraw.Draw(img_tmp)
try:
text_width = draw_tmp.textlength(text, font) if len(text) > 0 else 1
character_width = max(1, text_width // max(1, len(text)))
character_line_num = int((base_width // character_width) - 12)
if len(text) > character_line_num and character_line_num > 0:
text = "\n".join([text[i:i + character_line_num] for i in range(0, len(text), character_line_num)])
except Exception:
errorLogging()
lines = text.split("\n") if text else [""]
text_height = font_size * (len(lines) + 1) + 20
img = Image.new("RGBA", (base_width, text_height), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
text_x = base_width // 2
text_y = text_height // 2
draw.text((text_x, text_y), text, text_color, anchor="mm", stroke_width=0, font=font, align="center")
return img
def createOverlayImageSmallLog(self, message: str, your_language: str, translation: List[str] = [], target_language: List[str] = []) -> Image:
def renderRubyBlock(self, transliteration_tokens: List[dict], language: str, base_width: int, base_font_size: int, ruby_font_scale: float, ruby_line_spacing: int, text_color: Tuple[int, int, int]) -> Optional[Image.Image]:
# Build romaji and hiragana lines.
romaji_line = " ".join([t.get("hepburn", "") for t in transliteration_tokens if t.get("hepburn")])
hira_line = " ".join([t.get("hira", "") for t in transliteration_tokens if t.get("hira")])
if not romaji_line and not hira_line:
return None
font_family = self.LANGUAGES.get(language, self.LANGUAGES["Default"])
ruby_size = max(1, int(base_font_size * ruby_font_scale))
font_ruby = self._get_font(font_family, ruby_size)
# Measure widths to center lines independently.
img_tmp = Image.new("RGBA", (base_width, ruby_size * 2 + ruby_line_spacing + 10), (0, 0, 0, 0))
draw_tmp = ImageDraw.Draw(img_tmp)
romaji_width = draw_tmp.textlength(romaji_line, font_ruby) if romaji_line else 0
hira_width = draw_tmp.textlength(hira_line, font_ruby) if hira_line else 0
romaji_x = (base_width - romaji_width) // 2
hira_x = (base_width - hira_width) // 2
# Construct final ruby image.
ruby_height = ruby_size * (2 if hira_line and romaji_line else 1) + (ruby_line_spacing if hira_line and romaji_line else 0) + 10
ruby_img = Image.new("RGBA", (base_width, ruby_height), (0, 0, 0, 0))
draw = ImageDraw.Draw(ruby_img)
current_y = 5 + ruby_size // 2
if romaji_line:
draw.text((romaji_x + romaji_width // 2, current_y), romaji_line, text_color, anchor="mm", font=font_ruby)
current_y += ruby_size + (ruby_line_spacing if hira_line else 0)
if hira_line:
draw.text((hira_x + hira_width // 2, current_y), hira_line, text_color, anchor="mm", font=font_ruby)
return ruby_img
def createTextboxSmallLogWithRubyTokens(self, message: str, transliteration_tokens: List[dict], language: str, text_color: Tuple[int, int, int], base_width: int, font_size: int, ruby_font_scale: float, ruby_line_spacing: int, ruby_original_spacing: int) -> Image:
"""Render a single textbox (original message) with per-token centered ruby (romaji above hiragana) over each original token.
Fallback: if wrapping would occur (message too wide) or tokens mismatch, revert to block-level ruby (renderRubyBlock + createTextboxSmallLog).
"""
if not message or not transliteration_tokens:
return self.createTextboxSmallLog(message, language, text_color, base_width, self.getUiSizeSmallLog()["height"], font_size)
# Obtain font instances
font_family = self.LANGUAGES.get(language, self.LANGUAGES["Default"])
font_orig = self._get_font(font_family, font_size)
ruby_size = max(1, int(font_size * ruby_font_scale))
font_ruby = self._get_font(font_family, ruby_size)
# Token width measurement
draw_tmp_img = Image.new("RGBA", (1, 1), (0, 0, 0, 0))
draw_tmp = ImageDraw.Draw(draw_tmp_img)
token_infos = []
total_width = 0
for tok in transliteration_tokens:
orig = tok.get("orig", "")
if not orig:
continue
hira = tok.get("hira", "")
romaji = tok.get("hepburn", "")
orig_w = max(1, int(draw_tmp.textlength(orig, font_orig)))
hira_w = max(0, int(draw_tmp.textlength(hira, font_ruby))) if hira else 0
romaji_w = max(0, int(draw_tmp.textlength(romaji, font_ruby))) if romaji else 0
layout_w = max(orig_w, hira_w, romaji_w) # allocate width so ruby lines never overflow neighboring token
token_infos.append((orig, hira, romaji, layout_w))
total_width += layout_w
if not token_infos:
# Fallback
ruby_block = self.renderRubyBlock(transliteration_tokens, language, base_width, font_size, ruby_font_scale, ruby_line_spacing, text_color)
base_img = self.createTextboxSmallLog(message, language, text_color, base_width, self.getUiSizeSmallLog()["height"], font_size)
if ruby_block:
return self.concatenateImagesVertically(ruby_block, base_img)
return base_img
# Simple wrapping detection: if total width exceeds base_width * 0.9 → fallback
if total_width > base_width * 0.9:
ruby_block = self.renderRubyBlock(transliteration_tokens, language, base_width, font_size, ruby_font_scale, ruby_line_spacing, text_color)
base_img = self.createTextboxSmallLog(message, language, text_color, base_width, self.getUiSizeSmallLog()["height"], font_size)
if ruby_block:
return self.concatenateImagesVertically(ruby_block, base_img)
return base_img
# Compute left start for centering complete line
start_x = (base_width - total_width) // 2
# Vertical positioning
# Symmetric outer padding: make top padding equal to bottom padding (previously top was 4, bottom ~10)
outer_padding = 10 # uniform top & bottom padding for visual balance
ruby_lines_count = 0
has_romaji_any = any(r for (_, _, r, _) in token_infos)
has_hira_any = any(h for (_, h, _, _) in token_infos)
if has_romaji_any:
ruby_lines_count += 1
if has_hira_any:
ruby_lines_count += 1
# Height calculation (replace asymmetric 4/10 with symmetric outer_padding)
ruby_block_height = ruby_lines_count * ruby_size + (ruby_line_spacing if ruby_lines_count == 2 else 0)
total_height = outer_padding + ruby_block_height + ruby_original_spacing + font_size + outer_padding
img = Image.new("RGBA", (base_width, total_height), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
# Y centers
current_y = outer_padding + ruby_size // 2
romaji_y = current_y if has_romaji_any else None
hira_y = None
if has_romaji_any and has_hira_any:
hira_y = romaji_y + ruby_size + ruby_line_spacing
elif has_hira_any:
hira_y = current_y
orig_y = outer_padding + ruby_block_height + ruby_original_spacing + font_size // 2
# Draw tokens sequentially
cursor_x = start_x
for orig, hira, romaji, w in token_infos:
token_center_x = cursor_x + w // 2
if romaji_y is not None and romaji:
draw.text((token_center_x, romaji_y), romaji, text_color, anchor="mm", font=font_ruby)
if hira_y is not None and hira:
draw.text((token_center_x, hira_y), hira, text_color, anchor="mm", font=font_ruby)
draw.text((token_center_x, orig_y), orig, text_color, anchor="mm", font=font_orig)
cursor_x += w
return img
def createOverlayImageSmallLog(self, message: str, your_language: str, translation: List[str] = [], target_language: List[str] = [], transliteration_tokens: List[dict] = [], translation_transliteration_tokens: List[List[dict]] = [], ruby_font_scale: float = 0.5, ruby_line_spacing: int = 4) -> Image:
# UI設定を取得
ui_size = self.getUiSizeSmallLog()
width, height, font_size = ui_size["width"], ui_size["height"], ui_size["font_size"]
@@ -107,23 +236,89 @@ class OverlayImage:
textbox_images = []
# 翻訳がある場合
# Use improved per-token placement if possible; else fallback to previous block approach.
ruby_original_spacing = 2 # Narrow vertical gap between hiragana block and original text.
if translation and target_language:
# 元のメッセージがある場合は追加
if message:
textbox_images.append(
self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size)
)
# 翻訳をすべて追加
for trans, lang in zip(translation, target_language):
textbox_images.append(
self.createTextboxSmallLog(trans, lang, text_color, width, height, font_size)
)
if transliteration_tokens:
try:
base_msg_img = self.createTextboxSmallLogWithRubyTokens(
message,
transliteration_tokens,
your_language,
text_color,
width,
font_size,
ruby_font_scale,
ruby_line_spacing,
ruby_original_spacing,
)
except Exception:
errorLogging()
# Fallback to old method
base_msg_img = self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size)
try:
ruby_img = self.renderRubyBlock(transliteration_tokens, your_language, width, font_size, ruby_font_scale, ruby_line_spacing, text_color)
if ruby_img is not None:
base_msg_img = self.concatenateImagesVertically(ruby_img, base_msg_img)
except Exception:
errorLogging()
else:
base_msg_img = self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size)
textbox_images.append(base_msg_img)
for idx, (trans, lang) in enumerate(zip(translation, target_language)):
# 翻訳行用ルビ (任意) translation_transliteration_tokens[idx] が存在すれば使用
per_trans_tokens: List[dict] = []
if idx < len(translation_transliteration_tokens):
candidate = translation_transliteration_tokens[idx]
if isinstance(candidate, list):
per_trans_tokens = candidate
if per_trans_tokens:
try:
trans_img = self.createTextboxSmallLogWithRubyTokens(
trans,
per_trans_tokens,
lang,
text_color,
width,
font_size,
ruby_font_scale,
ruby_line_spacing,
ruby_original_spacing,
)
except Exception:
errorLogging()
trans_img = self.createTextboxSmallLog(trans, lang, text_color, width, height, font_size)
else:
trans_img = self.createTextboxSmallLog(trans, lang, text_color, width, height, font_size)
textbox_images.append(trans_img)
else:
# 翻訳がない場合は元のメッセージのみ
textbox_images.append(
self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size)
)
# 翻訳無しモード
if message and transliteration_tokens:
try:
base_msg_img = self.createTextboxSmallLogWithRubyTokens(
message,
transliteration_tokens,
your_language,
text_color,
width,
font_size,
ruby_font_scale,
ruby_line_spacing,
ruby_original_spacing,
)
except Exception:
errorLogging()
base_msg_img = self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size)
try:
ruby_img = self.renderRubyBlock(transliteration_tokens, your_language, width, font_size, ruby_font_scale, ruby_line_spacing, text_color)
if ruby_img is not None:
base_msg_img = self.concatenateImagesVertically(ruby_img, base_msg_img)
except Exception:
errorLogging()
else:
base_msg_img = self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size)
textbox_images.append(base_msg_img)
# すべてのテキストボックスを縦に結合
img = textbox_images[0]
@@ -297,18 +492,43 @@ class OverlayImage:
if __name__ == "__main__":
overlay = OverlayImage()
img = overlay.createOverlayImageSmallLog("Hello, World!", "English", "こんにちは、世界!", "Japanese")
# Basic small log test (with translation list form)
img = overlay.createOverlayImageSmallLog("Hello, World!", "English", ["こんにちは、世界!"], ["Japanese"])
img.save("overlay_small.png")
img = overlay.createOverlayImageLargeLog("send", "Hello, World!", "English", "こんにちは、世界!", "Japanese")
img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!", "Japanese", "Hello, World!", "English")
img = overlay.createOverlayImageLargeLog("send", "Hello, World!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "English", "aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああこんにちは、世界", "Japanese")
img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "Japanese", "Hello, World!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "English")
img = overlay.createOverlayImageLargeLog("send", "Hello, World!", "English", "こんにちは、世界!", "Japanese")
img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!", "Japanese", "Hello, World!", "English")
img = overlay.createOverlayImageLargeLog("send", "Hello, World!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "English", "aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああこんにちは、世界", "Japanese")
img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "Japanese", "Hello, World!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "English")
img = overlay.createOverlayImageLargeLog("send", "Hello, World!", "English", "こんにちは、世界!", "Japanese")
img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!", "Japanese", "Hello, World!", "English")
img = overlay.createOverlayImageLargeLog("send", "Hello, World!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "English", "aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああこんにちは、世界", "Japanese")
img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "Japanese", "Hello, World!aaaaaaaaaaaaaaaaaあああああああああああああああaaaaaaaaaaaaaaaaaあああああああああああああああ", "English")
# Ruby small log test (Japanese original with transliteration tokens)
ruby_tokens = [
{"orig": "", "hira": "おもんぱか", "hepburn": "omonpaka"},
{"orig": "", "hira": "", "hepburn": "ru"},
]
# Ruby on original + ruby on translation example
translation_tokens = [
[
{"orig": "", "hira": "おもんぱか", "hepburn": "omonpaka"},
{"orig": "", "hira": "", "hepburn": "ru"},
]
]
img_ruby = overlay.createOverlayImageSmallLog(
"慮る",
"Japanese",
["慮る"],
["Default"],
transliteration_tokens=ruby_tokens,
translation_transliteration_tokens=translation_tokens,
ruby_font_scale=0.5,
ruby_line_spacing=4,
)
img_ruby.save("overlay_small_ruby.png")
# Large log tests (adjusted to pass translation/target_language as lists)
img = overlay.createOverlayImageLargeLog("send", "Hello, World!", "English", ["こんにちは、世界!"], ["Japanese"])
img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!", "Japanese", ["Hello, World!"], ["English"])
long_en = "Hello, World!" + "a"*25 + ""*25 + "a"*25 + ""*25
long_jp = "こんにちは、世界!" + "a"*25 + ""*25 + "a"*25 + ""*25
img = overlay.createOverlayImageLargeLog("send", long_en, "English", [long_jp], ["Japanese"])
img = overlay.createOverlayImageLargeLog("receive", long_jp, "Japanese", [long_en], ["English"])
img = overlay.createOverlayImageLargeLog("send", "Hello, World!", "English", ["こんにちは、世界!"], ["Japanese"])
img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!", "Japanese", ["Hello, World!"], ["English"])
img = overlay.createOverlayImageLargeLog("send", long_en, "English", [long_jp], ["Japanese"])
img = overlay.createOverlayImageLargeLog("receive", long_jp, "Japanese", [long_en], ["English"])
img.save("overlay_large.png")