From 35e8d7dda92001191cd18e557af395651fc8a2e3 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 18:43:12 +0900 Subject: [PATCH] =?UTF-8?q?=E3=82=B9=E3=83=AC=E3=83=83=E3=83=89=E3=82=BB?= =?UTF-8?q?=E3=83=BC=E3=83=95=E3=81=AA=E3=83=88=E3=83=BC=E3=82=AF=E3=83=8A?= =?UTF-8?q?=E3=82=A4=E3=82=B6=E3=83=BC=E3=82=A2=E3=82=AF=E3=82=BB=E3=82=B9?= =?UTF-8?q?=E3=81=AE=E3=81=9F=E3=82=81=E3=81=AB=E3=83=AD=E3=83=83=E3=82=AF?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0=E3=80=82=E3=83=88=E3=83=BC=E3=82=AF?= =?UTF-8?q?=E3=83=8A=E3=82=A4=E3=82=B6=E3=83=BC=E3=81=AE=E5=91=BC=E3=81=B3?= =?UTF-8?q?=E5=87=BA=E3=81=97=E6=99=82=E3=81=AB=E7=99=BA=E7=94=9F=E3=81=99?= =?UTF-8?q?=E3=82=8B=E5=8F=AF=E8=83=BD=E6=80=A7=E3=81=AE=E3=81=82=E3=82=8B?= =?UTF-8?q?RuntimeError=E3=82=92=E9=98=B2=E3=81=90=E3=81=9F=E3=82=81?= =?UTF-8?q?=E3=81=AB=E3=80=81=E3=82=A2=E3=82=AF=E3=82=BB=E3=82=B9=E3=82=92?= =?UTF-8?q?=E7=9B=B4=E5=88=97=E5=8C=96=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../transliteration/transliteration_transliterator.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src-python/models/transliteration/transliteration_transliterator.py b/src-python/models/transliteration/transliteration_transliterator.py index 8aff912e..44464348 100644 --- a/src-python/models/transliteration/transliteration_transliterator.py +++ b/src-python/models/transliteration/transliteration_transliterator.py @@ -1,6 +1,7 @@ from sudachipy import tokenizer from sudachipy import dictionary from typing import List, Dict, Any +import threading try: from .transliteration_kana_to_hepburn import katakana_to_hepburn except ImportError: @@ -14,6 +15,9 @@ class Transliterator: def __init__(self) -> None: self.tokenizer_obj = dictionary.Dictionary(dict_type="full").create() self.mode = tokenizer.Tokenizer.SplitMode.C + # Lock to prevent concurrent access to sudachipy tokenizer which may + # internally use Rust/PyO3 borrow semantics and raise "Already borrowed". + self._tokenizer_lock = threading.Lock() @staticmethod def is_kanji(ch: str) -> bool: @@ -132,7 +136,10 @@ class Transliterator: results. """ - tokens = self.tokenizer_obj.tokenize(text, self.mode) + # Tokenizer may raise RuntimeError: Already borrowed when called + # concurrently. Protect the call with a lock to serialize access. + with self._tokenizer_lock: + tokens = self.tokenizer_obj.tokenize(text, self.mode) results: List[Dict[str, Any]] = [] for t in tokens: