From 73abc9b5e9e5e57e1a7b499a7fbebaf3cfabf084 Mon Sep 17 00:00:00 2001 From: misyaguziya Date: Fri, 6 Sep 2024 23:13:47 +0900 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=9A=A7=20[WIP/TEST]=20=E3=82=A8?= =?UTF-8?q?=E3=82=B9=E3=82=B1=E3=83=BC=E3=83=97=E6=96=87=E5=AD=97=E3=81=AB?= =?UTF-8?q?=E3=82=88=E3=82=8B=E7=BF=BB=E8=A8=B3=E3=81=97=E3=81=AA=E3=81=84?= =?UTF-8?q?=E6=96=87=E7=AB=A0=E3=82=92=E8=A8=AD=E5=AE=9A=E3=81=A7=E3=81=8D?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ![...]で囲んだ範囲(...は文章)翻訳ONの状態でもその部分は翻訳しない。 この機能はチャットのみで有効 --- src-python/config.py | 12 +++++++++ src-python/webui_controller.py | 45 +++++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src-python/config.py b/src-python/config.py index 23091218..59d3e71d 100644 --- a/src-python/config.py +++ b/src-python/config.py @@ -691,6 +691,17 @@ class Config: self._AUTH_KEYS[key] = value saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, self.AUTH_KEYS) + @property + @json_serializable('USE_EXCLUDE_WORDS') + def USE_EXCLUDE_WORDS(self): + return self._USE_EXCLUDE_WORDS + + @USE_EXCLUDE_WORDS.setter + def USE_EXCLUDE_WORDS(self, value): + if isinstance(value, bool): + self._USE_EXCLUDE_WORDS = value + saveJson(self.PATH_CONFIG, inspect.currentframe().f_code.co_name, value) + @property @json_serializable('USE_TRANSLATION_FEATURE') def USE_TRANSLATION_FEATURE(self): @@ -1094,6 +1105,7 @@ class Config: self._AUTH_KEYS = { "DeepL_API": None, } + self._USE_EXCLUDE_WORDS = True self._USE_TRANSLATION_FEATURE = True self._CTRANSLATE2_WEIGHT_TYPE = "Small" self._USE_WHISPER_FEATURE = False diff --git a/src-python/webui_controller.py b/src-python/webui_controller.py index a5d257ef..3d137d50 100644 --- a/src-python/webui_controller.py +++ b/src-python/webui_controller.py @@ -2,6 +2,9 @@ from typing import Callable, Union from time import sleep from subprocess import Popen from threading import Thread +import re +import random +import string from config import config from model import model from utils import getKeyByValue, isUniqueStrings, printLog, printResponse @@ -282,6 +285,39 @@ def stopThreadingTranscriptionReceiveMessageOnOpenConfigWindow(): th_stopTranscriptionReceiveMessage.start() # func message box +def replaceExclamationsWithRandom(text): + characters = string.ascii_letters + string.digits + # ![...] にマッチする正規表現 + pattern = r'!\[(.*?)\]' + + # 乱数と置換部分を保存する辞書 + replacement_dict = {} + + # マッチした部分をランダムな整数に置換し、その対応を辞書に保存 + def replace(match): + original = match.group(1) # ![]内のテキストを取得 + rand_value = ''.join(random.choices(characters, k=8)) # 8文字のランダムな文字列を生成 + replacement_dict[rand_value] = original # 辞書に保存 + return str(rand_value) # 置換する値 + + # 文章内の ![] の部分を置換 + replaced_text = re.sub(pattern, replace, text) + + return replaced_text, replacement_dict + +def restoreText(escaped_text, escape_dict): + # 辞書のキーに対応する値でテキスト内を置換する + for escape_seq, char in escape_dict.items(): + escaped_text = escaped_text.replace(escape_seq, char) + return escaped_text + +def removeExclamations(text): + # ![...] を [...] に置換する正規表現 + pattern = r'!\[(.*?)\]' + # ![...] の部分を [] 内のテキストに置換 + cleaned_text = re.sub(pattern, r'\1', text) + return cleaned_text + class ChatMessage: def __init__(self, action:Callable[[dict], None]) -> None: self.action = action @@ -295,7 +331,14 @@ class ChatMessage: if config.ENABLE_TRANSLATION is False: pass else: - translation, success = model.getInputTranslate(message) + if config.USE_EXCLUDE_WORDS is True: + replacement_message, replacement_dict = replaceExclamationsWithRandom(message) + translation, success = model.getInputTranslate(replacement_message) + message = removeExclamations(message) + translation = restoreText(translation, replacement_dict) + else: + translation, success = model.getInputTranslate(message) + if success is False: changeToCTranslate2Process() self.action("error_translation_engine", { From a2b52658a80256d5feff3a0cb07e46a0bd415544 Mon Sep 17 00:00:00 2001 From: misyaguziya Date: Sat, 7 Sep 2024 01:13:13 +0900 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=9A=A7=20[WIP/TEST]=20=E3=82=A8?= =?UTF-8?q?=E3=82=B9=E3=82=B1=E3=83=BC=E3=83=97=E6=96=87=E5=AD=97=E3=81=AB?= =?UTF-8?q?=E3=82=88=E3=82=8B=E7=BF=BB=E8=A8=B3=E3=81=97=E3=81=AA=E3=81=84?= =?UTF-8?q?=E6=96=87=E7=AB=A0=E3=82=92=E8=A8=AD=E5=AE=9A=E3=81=A7=E3=81=8D?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit エスケープ時の置換文字を0x1000から順番に規定 --- src-python/webui_controller.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src-python/webui_controller.py b/src-python/webui_controller.py index 3d137d50..619a24c2 100644 --- a/src-python/webui_controller.py +++ b/src-python/webui_controller.py @@ -286,19 +286,21 @@ def stopThreadingTranscriptionReceiveMessageOnOpenConfigWindow(): # func message box def replaceExclamationsWithRandom(text): - characters = string.ascii_letters + string.digits # ![...] にマッチする正規表現 pattern = r'!\[(.*?)\]' # 乱数と置換部分を保存する辞書 replacement_dict = {} - # マッチした部分をランダムな整数に置換し、その対応を辞書に保存 + num = 4096 + # マッチした部分を4096から始まる整数に置換する。置換毎に4097, 4098, ... と増える def replace(match): - original = match.group(1) # ![]内のテキストを取得 - rand_value = ''.join(random.choices(characters, k=8)) # 8文字のランダムな文字列を生成 - replacement_dict[rand_value] = original # 辞書に保存 - return str(rand_value) # 置換する値 + original = match.group(1) + nonlocal num + rand_value = hex(num) + replacement_dict[rand_value] = original + num += 1 + return str(rand_value) # 文章内の ![] の部分を置換 replaced_text = re.sub(pattern, replace, text) @@ -306,9 +308,10 @@ def replaceExclamationsWithRandom(text): return replaced_text, replacement_dict def restoreText(escaped_text, escape_dict): - # 辞書のキーに対応する値でテキスト内を置換する + # 大文字小文字を無視して置換するために、正規表現を使う for escape_seq, char in escape_dict.items(): - escaped_text = escaped_text.replace(escape_seq, char) + # escape_seq の部分を case-insensitive で置換 + escaped_text = re.sub(re.escape(escape_seq), char, escaped_text, flags=re.IGNORECASE) return escaped_text def removeExclamations(text): @@ -334,6 +337,7 @@ class ChatMessage: if config.USE_EXCLUDE_WORDS is True: replacement_message, replacement_dict = replaceExclamationsWithRandom(message) translation, success = model.getInputTranslate(replacement_message) + message = removeExclamations(message) translation = restoreText(translation, replacement_dict) else: From 619532f03bfee67021e85c6d35b614381366de37 Mon Sep 17 00:00:00 2001 From: misyaguziya Date: Sat, 7 Sep 2024 17:59:15 +0900 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=9A=A7=20[WIP/TEST]=20=E3=82=A8?= =?UTF-8?q?=E3=82=B9=E3=82=B1=E3=83=BC=E3=83=97=E6=96=87=E5=AD=97=E3=81=AB?= =?UTF-8?q?=E3=82=88=E3=82=8B=E7=BF=BB=E8=A8=B3=E3=81=97=E3=81=AA=E3=81=84?= =?UTF-8?q?=E6=96=87=E7=AB=A0=E3=82=92=E8=A8=AD=E5=AE=9A=E3=81=A7=E3=81=8D?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit プレースホルダーの形式を修正 ex) "0x1000" -> " <$ 0x1000> " --- src-python/webui_controller.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src-python/webui_controller.py b/src-python/webui_controller.py index 619a24c2..61db6aec 100644 --- a/src-python/webui_controller.py +++ b/src-python/webui_controller.py @@ -297,7 +297,7 @@ def replaceExclamationsWithRandom(text): def replace(match): original = match.group(1) nonlocal num - rand_value = hex(num) + rand_value = f" <$ {hex(num)}> " replacement_dict[rand_value] = original num += 1 return str(rand_value) @@ -311,7 +311,7 @@ def restoreText(escaped_text, escape_dict): # 大文字小文字を無視して置換するために、正規表現を使う for escape_seq, char in escape_dict.items(): # escape_seq の部分を case-insensitive で置換 - escaped_text = re.sub(re.escape(escape_seq), char, escaped_text, flags=re.IGNORECASE) + escaped_text = re.sub(re.escape(escape_seq[1:-1]), char, escaped_text, flags=re.IGNORECASE) return escaped_text def removeExclamations(text): From 5c6f51bdaa75f1834e49b50c2fc03172ec8c3a28 Mon Sep 17 00:00:00 2001 From: misyaguziya Date: Sun, 8 Sep 2024 05:48:46 +0900 Subject: [PATCH 4/4] =?UTF-8?q?=F0=9F=9A=A7=20[WIP/TEST]=20=E3=83=97?= =?UTF-8?q?=E3=83=AC=E3=83=BC=E3=82=B9=E3=83=9B=E3=83=AB=E3=83=80=E3=83=BC?= =?UTF-8?q?=E3=81=AE=E5=BD=A2=E5=BC=8F=E3=82=92=E4=BF=AE=E6=AD=A3=20ex)=20?= =?UTF-8?q?"=20<$=200x1000>=20"=20->=20"=20$=200x1000=20"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit $と0x1000の間にスペースが入る事があるため、それを考慮して正規表現を修正 --- src-python/webui_controller.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src-python/webui_controller.py b/src-python/webui_controller.py index 61db6aec..90fa9526 100644 --- a/src-python/webui_controller.py +++ b/src-python/webui_controller.py @@ -3,8 +3,6 @@ from time import sleep from subprocess import Popen from threading import Thread import re -import random -import string from config import config from model import model from utils import getKeyByValue, isUniqueStrings, printLog, printResponse @@ -297,10 +295,10 @@ def replaceExclamationsWithRandom(text): def replace(match): original = match.group(1) nonlocal num - rand_value = f" <$ {hex(num)}> " + rand_value = hex(num) replacement_dict[rand_value] = original num += 1 - return str(rand_value) + return f" ${rand_value} " # 文章内の ![] の部分を置換 replaced_text = re.sub(pattern, replace, text) @@ -310,8 +308,9 @@ def replaceExclamationsWithRandom(text): def restoreText(escaped_text, escape_dict): # 大文字小文字を無視して置換するために、正規表現を使う for escape_seq, char in escape_dict.items(): - # escape_seq の部分を case-insensitive で置換 - escaped_text = re.sub(re.escape(escape_seq[1:-1]), char, escaped_text, flags=re.IGNORECASE) + # escaped_text の部分を pattern で置換 + pattern = re.escape(f"${escape_seq}") + r"|\$\s+" + re.escape(escape_seq) + escaped_text = re.sub(pattern, char, escaped_text, flags=re.IGNORECASE) return escaped_text def removeExclamations(text):