Files
VRCT/src-python/models/overlay/overlay_image.py
misyaguziya 8be132abe6 overlay: フォントキャッシュ追加と小型ログのルビ描画を改善
- フォントのメモリキャッシュ(_font_cache)と _get_font を導入し truetype 読み込みを最適化
- createTextboxSmallLog を堅牢化(None対応、幅測定の例外処理、改行判定の改善)
- 単語単位ルビ描画機能を追加(renderRubyBlock / createTextboxSmallLogWithRubyTokens)
  - トークン毎の幅算出/センタリング、行折り返し検出時のフォールバック実装
- createOverlayImageSmallLog を拡張して、元文・翻訳それぞれにルビ適用とフォールバック合成を実行
- __main__ のテスト呼び出しを翻訳リスト形式に合わせて修正し、ルビテスト追加
2025-10-22 14:23:41 +09:00

534 lines
26 KiB
Python

from os import path as os_path
from datetime import datetime
from typing import Tuple, List, Optional
from PIL import Image, ImageDraw, ImageFont
try:
from utils import errorLogging
except ImportError:
def errorLogging():
import traceback
print(traceback.format_exc())
class OverlayImage:
LANGUAGES = {
"Default": "NotoSansJP-Regular.ttf",
"Japanese": "NotoSansJP-Regular.ttf",
"Korean": "NotoSansKR-Regular.ttf",
"Chinese Simplified": "NotoSansSC-Regular.ttf",
"Chinese Traditional": "NotoSansTC-Regular.ttf",
}
def __init__(self, root_path: Optional[str] = None) -> None:
"""Overlay image helper.
Args:
root_path: optional project root to resolve bundled fonts. If omitted,
defaults to repository `fonts` directory.
"""
self.message_log: List[dict] = []
# PyInstallerでビルドされた場合のパス
if root_path and os_path.exists(os_path.join(root_path, "_internal", "fonts")):
self.root_path = os_path.join(root_path, "_internal", "fonts")
# src-pythonフォルダから直接実行している場合のパス
elif os_path.exists(os_path.join(os_path.dirname(__file__), "models", "overlay", "fonts")):
self.root_path = os_path.join(os_path.dirname(__file__), "models", "overlay", "fonts")
# overlayフォルダから直接実行している場合のパス
elif os_path.exists(os_path.join(os_path.dirname(__file__), "fonts")):
self.root_path = os_path.join(os_path.dirname(__file__), "fonts")
else:
raise FileNotFoundError("Font directory not found.")
# Simple in-memory font cache to avoid repeated truetype loading cost.
self._font_cache = {}
@staticmethod
def concatenateImagesVertically(img1: Image, img2: Image, margin: int = 0) -> Image:
total_height = img1.height + img2.height + margin
dst = Image.new("RGBA", (img1.width, total_height))
dst.paste(img1, (0, 0))
dst.paste(img2, (0, img1.height + margin))
return dst
@staticmethod
def addImageMargin(image: Image, top: int, right: int, bottom: int, left: int, color: Tuple[int, int, int, int]) -> Image:
new_width = image.width + right + left
new_height = image.height + top + bottom
result = Image.new(image.mode, (new_width, new_height), color)
result.paste(image, (left, top))
return result
@staticmethod
def getUiSizeSmallLog() -> dict:
return {
"width": 3840,
"height": 92,
"font_size": 92,
}
@staticmethod
def getUiColorSmallLog() -> dict:
colors = {
"background_color": (41, 42, 45),
"background_outline_color": (41, 42, 45),
"text_color": (223, 223, 223)
}
return colors
def _get_font(self, font_family: str, size: int) -> ImageFont.FreeTypeFont:
font_path = os_path.join(self.root_path, font_family)
key = (font_path, size)
if key not in self._font_cache:
self._font_cache[key] = ImageFont.truetype(font_path, size)
return self._font_cache[key]
def createTextboxSmallLog(self, text: str, language: str, text_color: Tuple[int, int, int], base_width: int, base_height: int, font_size: int) -> Image:
if text is None:
text = ""
font_family = self.LANGUAGES.get(language, self.LANGUAGES["Default"])
font = self._get_font(font_family, font_size)
# Initial image for width measurement
img_tmp = Image.new("RGBA", (base_width, base_height), (0, 0, 0, 0))
draw_tmp = ImageDraw.Draw(img_tmp)
try:
text_width = draw_tmp.textlength(text, font) if len(text) > 0 else 1
character_width = max(1, text_width // max(1, len(text)))
character_line_num = int((base_width // character_width) - 12)
if len(text) > character_line_num and character_line_num > 0:
text = "\n".join([text[i:i + character_line_num] for i in range(0, len(text), character_line_num)])
except Exception:
errorLogging()
lines = text.split("\n") if text else [""]
text_height = font_size * (len(lines) + 1) + 20
img = Image.new("RGBA", (base_width, text_height), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
text_x = base_width // 2
text_y = text_height // 2
draw.text((text_x, text_y), text, text_color, anchor="mm", stroke_width=0, font=font, align="center")
return img
def renderRubyBlock(self, transliteration_tokens: List[dict], language: str, base_width: int, base_font_size: int, ruby_font_scale: float, ruby_line_spacing: int, text_color: Tuple[int, int, int]) -> Optional[Image.Image]:
# Build romaji and hiragana lines.
romaji_line = " ".join([t.get("hepburn", "") for t in transliteration_tokens if t.get("hepburn")])
hira_line = " ".join([t.get("hira", "") for t in transliteration_tokens if t.get("hira")])
if not romaji_line and not hira_line:
return None
font_family = self.LANGUAGES.get(language, self.LANGUAGES["Default"])
ruby_size = max(1, int(base_font_size * ruby_font_scale))
font_ruby = self._get_font(font_family, ruby_size)
# Measure widths to center lines independently.
img_tmp = Image.new("RGBA", (base_width, ruby_size * 2 + ruby_line_spacing + 10), (0, 0, 0, 0))
draw_tmp = ImageDraw.Draw(img_tmp)
romaji_width = draw_tmp.textlength(romaji_line, font_ruby) if romaji_line else 0
hira_width = draw_tmp.textlength(hira_line, font_ruby) if hira_line else 0
romaji_x = (base_width - romaji_width) // 2
hira_x = (base_width - hira_width) // 2
# Construct final ruby image.
ruby_height = ruby_size * (2 if hira_line and romaji_line else 1) + (ruby_line_spacing if hira_line and romaji_line else 0) + 10
ruby_img = Image.new("RGBA", (base_width, ruby_height), (0, 0, 0, 0))
draw = ImageDraw.Draw(ruby_img)
current_y = 5 + ruby_size // 2
if romaji_line:
draw.text((romaji_x + romaji_width // 2, current_y), romaji_line, text_color, anchor="mm", font=font_ruby)
current_y += ruby_size + (ruby_line_spacing if hira_line else 0)
if hira_line:
draw.text((hira_x + hira_width // 2, current_y), hira_line, text_color, anchor="mm", font=font_ruby)
return ruby_img
def createTextboxSmallLogWithRubyTokens(self, message: str, transliteration_tokens: List[dict], language: str, text_color: Tuple[int, int, int], base_width: int, font_size: int, ruby_font_scale: float, ruby_line_spacing: int, ruby_original_spacing: int) -> Image:
"""Render a single textbox (original message) with per-token centered ruby (romaji above hiragana) over each original token.
Fallback: if wrapping would occur (message too wide) or tokens mismatch, revert to block-level ruby (renderRubyBlock + createTextboxSmallLog).
"""
if not message or not transliteration_tokens:
return self.createTextboxSmallLog(message, language, text_color, base_width, self.getUiSizeSmallLog()["height"], font_size)
# Obtain font instances
font_family = self.LANGUAGES.get(language, self.LANGUAGES["Default"])
font_orig = self._get_font(font_family, font_size)
ruby_size = max(1, int(font_size * ruby_font_scale))
font_ruby = self._get_font(font_family, ruby_size)
# Token width measurement
draw_tmp_img = Image.new("RGBA", (1, 1), (0, 0, 0, 0))
draw_tmp = ImageDraw.Draw(draw_tmp_img)
token_infos = []
total_width = 0
for tok in transliteration_tokens:
orig = tok.get("orig", "")
if not orig:
continue
hira = tok.get("hira", "")
romaji = tok.get("hepburn", "")
orig_w = max(1, int(draw_tmp.textlength(orig, font_orig)))
hira_w = max(0, int(draw_tmp.textlength(hira, font_ruby))) if hira else 0
romaji_w = max(0, int(draw_tmp.textlength(romaji, font_ruby))) if romaji else 0
layout_w = max(orig_w, hira_w, romaji_w) # allocate width so ruby lines never overflow neighboring token
token_infos.append((orig, hira, romaji, layout_w))
total_width += layout_w
if not token_infos:
# Fallback
ruby_block = self.renderRubyBlock(transliteration_tokens, language, base_width, font_size, ruby_font_scale, ruby_line_spacing, text_color)
base_img = self.createTextboxSmallLog(message, language, text_color, base_width, self.getUiSizeSmallLog()["height"], font_size)
if ruby_block:
return self.concatenateImagesVertically(ruby_block, base_img)
return base_img
# Simple wrapping detection: if total width exceeds base_width * 0.9 → fallback
if total_width > base_width * 0.9:
ruby_block = self.renderRubyBlock(transliteration_tokens, language, base_width, font_size, ruby_font_scale, ruby_line_spacing, text_color)
base_img = self.createTextboxSmallLog(message, language, text_color, base_width, self.getUiSizeSmallLog()["height"], font_size)
if ruby_block:
return self.concatenateImagesVertically(ruby_block, base_img)
return base_img
# Compute left start for centering complete line
start_x = (base_width - total_width) // 2
# Vertical positioning
# Symmetric outer padding: make top padding equal to bottom padding (previously top was 4, bottom ~10)
outer_padding = 10 # uniform top & bottom padding for visual balance
ruby_lines_count = 0
has_romaji_any = any(r for (_, _, r, _) in token_infos)
has_hira_any = any(h for (_, h, _, _) in token_infos)
if has_romaji_any:
ruby_lines_count += 1
if has_hira_any:
ruby_lines_count += 1
# Height calculation (replace asymmetric 4/10 with symmetric outer_padding)
ruby_block_height = ruby_lines_count * ruby_size + (ruby_line_spacing if ruby_lines_count == 2 else 0)
total_height = outer_padding + ruby_block_height + ruby_original_spacing + font_size + outer_padding
img = Image.new("RGBA", (base_width, total_height), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
# Y centers
current_y = outer_padding + ruby_size // 2
romaji_y = current_y if has_romaji_any else None
hira_y = None
if has_romaji_any and has_hira_any:
hira_y = romaji_y + ruby_size + ruby_line_spacing
elif has_hira_any:
hira_y = current_y
orig_y = outer_padding + ruby_block_height + ruby_original_spacing + font_size // 2
# Draw tokens sequentially
cursor_x = start_x
for orig, hira, romaji, w in token_infos:
token_center_x = cursor_x + w // 2
if romaji_y is not None and romaji:
draw.text((token_center_x, romaji_y), romaji, text_color, anchor="mm", font=font_ruby)
if hira_y is not None and hira:
draw.text((token_center_x, hira_y), hira, text_color, anchor="mm", font=font_ruby)
draw.text((token_center_x, orig_y), orig, text_color, anchor="mm", font=font_orig)
cursor_x += w
return img
def createOverlayImageSmallLog(self, message: str, your_language: str, translation: List[str] = [], target_language: List[str] = [], transliteration_tokens: List[dict] = [], translation_transliteration_tokens: List[List[dict]] = [], ruby_font_scale: float = 0.5, ruby_line_spacing: int = 4) -> Image:
# UI設定を取得
ui_size = self.getUiSizeSmallLog()
width, height, font_size = ui_size["width"], ui_size["height"], ui_size["font_size"]
ui_colors = self.getUiColorSmallLog()
text_color = ui_colors["text_color"]
background_color = ui_colors["background_color"]
background_outline_color = ui_colors["background_outline_color"]
# テキストボックス画像のリストを作成
textbox_images = []
# 翻訳がある場合
# Use improved per-token placement if possible; else fallback to previous block approach.
ruby_original_spacing = 2 # Narrow vertical gap between hiragana block and original text.
if translation and target_language:
if message:
if transliteration_tokens:
try:
base_msg_img = self.createTextboxSmallLogWithRubyTokens(
message,
transliteration_tokens,
your_language,
text_color,
width,
font_size,
ruby_font_scale,
ruby_line_spacing,
ruby_original_spacing,
)
except Exception:
errorLogging()
# Fallback to old method
base_msg_img = self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size)
try:
ruby_img = self.renderRubyBlock(transliteration_tokens, your_language, width, font_size, ruby_font_scale, ruby_line_spacing, text_color)
if ruby_img is not None:
base_msg_img = self.concatenateImagesVertically(ruby_img, base_msg_img)
except Exception:
errorLogging()
else:
base_msg_img = self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size)
textbox_images.append(base_msg_img)
for idx, (trans, lang) in enumerate(zip(translation, target_language)):
# 翻訳行用ルビ (任意) translation_transliteration_tokens[idx] が存在すれば使用
per_trans_tokens: List[dict] = []
if idx < len(translation_transliteration_tokens):
candidate = translation_transliteration_tokens[idx]
if isinstance(candidate, list):
per_trans_tokens = candidate
if per_trans_tokens:
try:
trans_img = self.createTextboxSmallLogWithRubyTokens(
trans,
per_trans_tokens,
lang,
text_color,
width,
font_size,
ruby_font_scale,
ruby_line_spacing,
ruby_original_spacing,
)
except Exception:
errorLogging()
trans_img = self.createTextboxSmallLog(trans, lang, text_color, width, height, font_size)
else:
trans_img = self.createTextboxSmallLog(trans, lang, text_color, width, height, font_size)
textbox_images.append(trans_img)
else:
# 翻訳無しモード
if message and transliteration_tokens:
try:
base_msg_img = self.createTextboxSmallLogWithRubyTokens(
message,
transliteration_tokens,
your_language,
text_color,
width,
font_size,
ruby_font_scale,
ruby_line_spacing,
ruby_original_spacing,
)
except Exception:
errorLogging()
base_msg_img = self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size)
try:
ruby_img = self.renderRubyBlock(transliteration_tokens, your_language, width, font_size, ruby_font_scale, ruby_line_spacing, text_color)
if ruby_img is not None:
base_msg_img = self.concatenateImagesVertically(ruby_img, base_msg_img)
except Exception:
errorLogging()
else:
base_msg_img = self.createTextboxSmallLog(message, your_language, text_color, width, height, font_size)
textbox_images.append(base_msg_img)
# すべてのテキストボックスを縦に結合
img = textbox_images[0]
for textbox_img in textbox_images[1:]:
img = self.concatenateImagesVertically(img, textbox_img)
# 角丸背景を作成
background = Image.new("RGBA", img.size, (0, 0, 0, 0))
draw = ImageDraw.Draw(background)
draw.rounded_rectangle([(0, 0), img.size], radius=50, fill=background_color, outline=background_outline_color, width=5)
# 背景とテキストを合成
img = Image.alpha_composite(background, img)
return img
@staticmethod
def getUiSizeLargeLog() -> dict:
return {
"width": 960,
"font_size_large": 30,
"font_size_small": 20,
"margin": 25,
"radius": 25,
"padding": 10,
"clause_margin": 20,
}
@staticmethod
def getUiColorLargeLog() -> dict:
return {
"background_color": (41, 42, 45),
"background_outline_color": (41, 42, 45),
"text_color_large": (223, 223, 223),
"text_color_small": (190, 190, 190),
"text_color_send": (97, 151, 180),
"text_color_receive": (168, 97, 180),
"text_color_time": (120, 120, 120)
}
def createTextImageLargeLog(self, message_type: str, size: str, text: str, language: str) -> Image:
ui_size = self.getUiSizeLargeLog()
font_size = ui_size["font_size_large"] if size == "large" else ui_size["font_size_small"]
text_color = self.getUiColorLargeLog()[f"text_color_{size}"]
anchor = "lm" if message_type == "receive" else "rm"
text_x = 0 if message_type == "receive" else ui_size["width"]
align = "left" if message_type == "receive" else "right"
font_family = self.LANGUAGES.get(language, self.LANGUAGES["Default"])
img = Image.new("RGBA", (0, 0), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
font_path = os_path.join(self.root_path, font_family)
font = ImageFont.truetype(font_path, font_size)
# 改行を含んだtextの最大の文字数を計算する
text_width = max(draw.textlength(line, font) for line in text.split("\n"))
character_width = text_width // len(text)
character_line_num = int((ui_size["width"] // character_width) - 1)
if len(text) > character_line_num:
text = "\n".join([text[i:i + character_line_num] for i in range(0, len(text), character_line_num)])
text_height = font_size * len(text.split("\n")) + ui_size["padding"]
img = Image.new("RGBA", (ui_size["width"], text_height), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
text_y = text_height // 2
draw.multiline_text((text_x, text_y), text, text_color, anchor=anchor, stroke_width=0, font=font, align=align)
return img
def createTextImageMessageType(self, message_type: str, date_time: str) -> Image:
ui_size = self.getUiSizeLargeLog()
font_size = ui_size["font_size_small"]
ui_padding = ui_size["padding"]
ui_color = self.getUiColorLargeLog()
text_color = ui_color[f"text_color_{message_type}"]
text_color_time = ui_color["text_color_time"]
anchor = "lm" if message_type == "receive" else "rm"
text = "Receive" if message_type == "receive" else "Send"
img = Image.new("RGBA", (0, 0), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
font_path = os_path.join(self.root_path, self.LANGUAGES["Default"])
font = ImageFont.truetype(font_path, font_size)
text_height = font_size + ui_padding
text_width = draw.textlength(date_time, font)
character_width = text_width // len(date_time)
img = Image.new("RGBA", (ui_size["width"], text_height), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
text_y = text_height // 2
text_time_x = 0 if message_type == "receive" else ui_size["width"] - (text_width + character_width)
text_x = (text_width + character_width) if message_type == "receive" else ui_size["width"]
draw.text((text_time_x, text_y), date_time, text_color_time, anchor=anchor, stroke_width=0, font=font)
draw.text((text_x, text_y), text, text_color, anchor=anchor, stroke_width=0, font=font)
return img
def createTextboxLargeLog(self, message_type: str, message: Optional[str] = None, your_language: Optional[str] = None, translation: List[str] = [], target_language: List[str] = [], date_time: Optional[str] = None) -> Image:
# テキスト画像のリストを作成
images = [self.createTextImageMessageType(message_type, date_time)]
# 翻訳がある場合
if translation and target_language:
# 元のメッセージがある場合は小さいサイズで追加
if message is not None:
images.append(
self.createTextImageLargeLog(message_type, "small", message, your_language)
)
# 翻訳をすべて大きいサイズで追加
for trans, lang in zip(translation, target_language):
images.append(
self.createTextImageLargeLog(message_type, "large", trans, lang)
)
else:
# 翻訳がない場合は元のメッセージのみ
images.append(
self.createTextImageLargeLog(message_type, "large", message, your_language)
)
# すべてのテキスト画像を縦に結合
combined_img = images[0]
for img in images[1:]:
combined_img = self.concatenateImagesVertically(combined_img, img)
return combined_img
def createOverlayImageLargeLog(self, message_type: str, message: Optional[str] = None, your_language: Optional[str] = None, translation: List[str] = [], target_language: List[str] = []) -> Image:
ui_color = self.getUiColorLargeLog()
background_color = ui_color["background_color"]
background_outline_color = ui_color["background_outline_color"]
ui_size = self.getUiSizeLargeLog()
ui_margin = ui_size["margin"]
ui_radius = ui_size["radius"]
ui_clause_margin = ui_size["clause_margin"]
self.message_log.append({
"message_type": message_type,
"message": message,
"your_language": your_language,
"translation": translation,
"target_language": target_language,
"datetime": datetime.now().strftime("%H:%M")
})
if len(self.message_log) > 5:
self.message_log = self.message_log[-5:]
imgs = [
self.createTextboxLargeLog(
log["message_type"],
log["message"],
log["your_language"],
log["translation"],
log["target_language"],
log["datetime"]) for log in self.message_log
]
img = imgs[0]
for i in imgs[1:]:
img = self.concatenateImagesVertically(img, i, ui_clause_margin)
img = self.addImageMargin(img, ui_margin, ui_margin, ui_margin, ui_margin, (0, 0, 0, 0))
width, height = img.size
background = Image.new("RGBA", (width, height), (0, 0, 0, 0))
draw = ImageDraw.Draw(background)
draw.rounded_rectangle([(0, 0), (width, height)], radius=ui_radius, fill=background_color, outline=background_outline_color, width=5)
img = Image.alpha_composite(background, img)
return img
if __name__ == "__main__":
overlay = OverlayImage()
# Basic small log test (with translation list form)
img = overlay.createOverlayImageSmallLog("Hello, World!", "English", ["こんにちは、世界!"], ["Japanese"])
img.save("overlay_small.png")
# Ruby small log test (Japanese original with transliteration tokens)
ruby_tokens = [
{"orig": "", "hira": "おもんぱか", "hepburn": "omonpaka"},
{"orig": "", "hira": "", "hepburn": "ru"},
]
# Ruby on original + ruby on translation example
translation_tokens = [
[
{"orig": "", "hira": "おもんぱか", "hepburn": "omonpaka"},
{"orig": "", "hira": "", "hepburn": "ru"},
]
]
img_ruby = overlay.createOverlayImageSmallLog(
"慮る",
"Japanese",
["慮る"],
["Default"],
transliteration_tokens=ruby_tokens,
translation_transliteration_tokens=translation_tokens,
ruby_font_scale=0.5,
ruby_line_spacing=4,
)
img_ruby.save("overlay_small_ruby.png")
# Large log tests (adjusted to pass translation/target_language as lists)
img = overlay.createOverlayImageLargeLog("send", "Hello, World!", "English", ["こんにちは、世界!"], ["Japanese"])
img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!", "Japanese", ["Hello, World!"], ["English"])
long_en = "Hello, World!" + "a"*25 + ""*25 + "a"*25 + ""*25
long_jp = "こんにちは、世界!" + "a"*25 + ""*25 + "a"*25 + ""*25
img = overlay.createOverlayImageLargeLog("send", long_en, "English", [long_jp], ["Japanese"])
img = overlay.createOverlayImageLargeLog("receive", long_jp, "Japanese", [long_en], ["English"])
img = overlay.createOverlayImageLargeLog("send", "Hello, World!", "English", ["こんにちは、世界!"], ["Japanese"])
img = overlay.createOverlayImageLargeLog("receive", "こんにちは、世界!", "Japanese", ["Hello, World!"], ["English"])
img = overlay.createOverlayImageLargeLog("send", long_en, "English", [long_jp], ["Japanese"])
img = overlay.createOverlayImageLargeLog("receive", long_jp, "Japanese", [long_en], ["English"])
img.save("overlay_large.png")