From e67242a0c4aef079350f4fc649cb27ec5b3fd773 Mon Sep 17 00:00:00 2001 From: misyaguziya <53165965+misyaguziya@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:15:01 +0900 Subject: [PATCH] =?UTF-8?q?[=E6=96=B0=E8=A6=8F=E8=BF=BD=E5=8A=A0]=20?= =?UTF-8?q?=E3=83=89=E3=82=AD=E3=83=A5=E3=83=A1=E3=83=B3=E3=83=88=E3=81=AE?= =?UTF-8?q?=E3=82=A8=E3=83=B3=E3=83=89=E3=83=9D=E3=82=A4=E3=83=B3=E3=83=88?= =?UTF-8?q?=E3=81=A8=E3=83=9E=E3=83=83=E3=83=94=E3=83=B3=E3=82=B0=E3=81=AE?= =?UTF-8?q?=E6=A4=9C=E8=A8=BC=E3=82=B9=E3=82=AF=E3=83=AA=E3=83=97=E3=83=88?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../scripts/cleanup_docs_placeholders.py | 16 ++ src-python/scripts/find_doc_tokens.py | 22 +++ src-python/scripts/print_mapping.py | 28 +++ src-python/scripts/verify_docs_vs_code.py | 161 ++++++++++++++++++ .../scripts/verify_docs_vs_code_runtime.py | 126 ++++++++++++++ 5 files changed, 353 insertions(+) create mode 100644 src-python/scripts/cleanup_docs_placeholders.py create mode 100644 src-python/scripts/find_doc_tokens.py create mode 100644 src-python/scripts/print_mapping.py create mode 100644 src-python/scripts/verify_docs_vs_code.py create mode 100644 src-python/scripts/verify_docs_vs_code_runtime.py diff --git a/src-python/scripts/cleanup_docs_placeholders.py b/src-python/scripts/cleanup_docs_placeholders.py new file mode 100644 index 00000000..9da108fd --- /dev/null +++ b/src-python/scripts/cleanup_docs_placeholders.py @@ -0,0 +1,16 @@ +from pathlib import Path +p=Path(__file__).resolve().parents[1]/'docs'/'api.md' +text=p.read_text(encoding='utf-8') +lines=[] +for line in text.splitlines(): + stripped=line.strip() + # Remove exact umbrella placeholder tokens or standalone list entries + if stripped in ('- /set/enable', '- /set/disable', '- /get/data/', '/set/enable', '/set/disable', '/get/data/'): + continue + # Remove lines that are just '/get/data' or '/set/data' or '/run/' etc + if stripped in ('/get/data', '/set/data', '/run/', '/get', '/set', '/run'): + continue + lines.append(line) +new='\n'.join(lines) +p.write_text(new,encoding='utf-8') +print('cleaned') diff --git a/src-python/scripts/find_doc_tokens.py b/src-python/scripts/find_doc_tokens.py new file mode 100644 index 00000000..b9e1c95f --- /dev/null +++ b/src-python/scripts/find_doc_tokens.py @@ -0,0 +1,22 @@ +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +DOC_DIR = ROOT / 'docs' + +tokens = [ + 'transcription_mic', + 'transcription_speaker', + 'selected_translation_compute_device', + '/run/selected_translation_compute_device', + '/run/transcription_mic', + '/run/transcription_speaker', +] + +for p in DOC_DIR.rglob('*.md'): + text = p.read_text(encoding='utf-8') + for i, line in enumerate(text.splitlines(), start=1): + for t in tokens: + if t in line: + print(f"{p}:{i}:{line.strip()}") + +print('done') diff --git a/src-python/scripts/print_mapping.py b/src-python/scripts/print_mapping.py new file mode 100644 index 00000000..8b66e177 --- /dev/null +++ b/src-python/scripts/print_mapping.py @@ -0,0 +1,28 @@ +from pathlib import Path +import re +ROOT = Path(__file__).resolve().parents[1] +MAINLOOP = ROOT / 'mainloop.py' +text = MAINLOOP.read_text(encoding='utf-8') +run_mapping = {} +mapping = {} +for mm in re.finditer(r"[\'\"]([^\'\"]+)[\'\"]\s*:\s*[\'\"](/run/[a-zA-Z0-9_\-\\/]+)[\'\"]", text): + run_mapping[mm.group(1)] = mm.group(2) +for mm in re.finditer(r"[\'\"](/(?:get|set)/[a-zA-Z0-9_\-\\/]+)[\'\"]", text): + mapping[mm.group(1)] = True +print('run_mapping entries:', len(run_mapping)) +print('sample run_mapping keys:', sorted(run_mapping.items())[:10]) +print('\nmapping endpoints count:', len(mapping)) +# show any endpoints that are exactly '/get/data/' +print('\ncontains /get/data/?', '/get/data/' in mapping) +if '/get/data/' in mapping: + print('Found /get/data/ literal in mainloop.py text') +# show ones containing '/get/data' +has_get_data = [k for k in mapping.keys() if '/get/data' in k] +print('\nendpoints containing /get/data:', len(has_get_data)) +if has_get_data: + for k in sorted(has_get_data)[:30]: + print(' -', k) +# print first 20 mapping endpoints +print('\nFirst 40 endpoints:') +for k in sorted(mapping.keys())[:40]: + print(' -', k) diff --git a/src-python/scripts/verify_docs_vs_code.py b/src-python/scripts/verify_docs_vs_code.py new file mode 100644 index 00000000..7aa47c40 --- /dev/null +++ b/src-python/scripts/verify_docs_vs_code.py @@ -0,0 +1,161 @@ +import importlib.util +import re +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +MAINLOOP = ROOT / 'mainloop.py' +CONTROLLER = ROOT / 'controller.py' +DOC_API = ROOT / 'docs' / 'api.md' +DOC_RUN = ROOT / 'docs' / 'run_events_payloads.md' + + +def extract_mapping_from_mainloop(): + """ + Import mainloop.py and read 'mapping' and 'run_mapping' objects directly. + This executes the module in an isolated module object; mainloop has some + initialization but exposing these dicts is acceptable for verification. + """ + run_mapping = {} + mapping = {} + try: + spec = importlib.util.spec_from_file_location('project_mainloop', str(MAINLOOP)) + module = importlib.util.module_from_spec(spec) + loader = spec.loader + if loader is None: + raise RuntimeError('Could not load mainloop module') + loader.exec_module(module) + mapping = getattr(module, 'mapping', {}) or {} + run_mapping = getattr(module, 'run_mapping', {}) or {} + return mapping, run_mapping + except Exception as e: + print('Error importing mainloop.py', e) + + # Fallback: simple regex-based extraction from mainloop.py text + try: + text = MAINLOOP.read_text(encoding='utf-8') + # run_mapping entries like: "transcription_mic": "/run/transcription_send_mic_message", + for mm in re.finditer(r"[\'\"]([^\'\"]+)[\'\"]\s*:\s*[\'\"](/run/[a-zA-Z0-9_\-\/]+)[\'\"]", text): + run_mapping[mm.group(1)] = mm.group(2) + # mapping endpoints: any '/get/...' or '/set/...' literal in file + for mm in re.finditer(r"[\'\"](/(?:get|set)/[a-zA-Z0-9_\-\/]+)[\'\"]", text): + mapping[mm.group(1)] = True + except Exception as e: + print('Error parsing mainloop.py via fallback', e) + + return mapping, run_mapping + + +def extract_run_events_from_controller(): + code = CONTROLLER.read_text(encoding='utf-8') + # find self.run( ... , self.run_mapping["key"], ... ) and direct self.run(..., + run_keys = set() + # pattern for self.run(..., self.run_mapping["xxx"], ...) + pattern = re.compile(r"self\.run\([^\)]*self\.run_mapping\[\s*[\'\"]([^\'\"]+)[\'\"]\s*\]", re.M) + for m in pattern.finditer(code): + run_keys.add(m.group(1)) + # also find self.run(..., "/run/xxx", ...) + pattern2 = re.compile(r"self\.run\([^\)]*\"(/run/[^\'\"]+)\"", re.M) + for m in pattern2.finditer(code): + run_keys.add(m.group(1)) + return run_keys + + +def extract_endpoints_from_docs(): + api = DOC_API.read_text(encoding='utf-8') + run = DOC_RUN.read_text(encoding='utf-8') if DOC_RUN.exists() else '' + endpoints = set() + run_events = set() + # conservative extraction: match endpoints that start with /get/ /set/ /run/ + pattern = re.compile(r"(/(?:get|set|run)(?:/[a-zA-Z0-9_\-]+)+)") + for m in pattern.finditer(api): + token = m.group(1) + # drop umbrella placeholders and tokens that end with '/' + if token in ('/get', '/set', '/run', '/get/data', '/set/data'): + continue + if token.endswith('/'): + continue + if token.startswith('/run/'): + run_events.add(token) + else: + endpoints.add(token) + for m in pattern.finditer(run): + token = m.group(1) + if token in ('/get', '/set', '/run', '/get/data', '/set/data'): + continue + if token.endswith('/'): + continue + if token.startswith('/run/'): + run_events.add(token) + else: + endpoints.add(token) + return endpoints, run_events + + +def main(): + mapping, run_mapping = extract_mapping_from_mainloop() + code_endpoints = set(mapping.keys()) + code_run_events = set(run_mapping.values()) + # normalize run events: run_mapping values likely like '/run/…' + controller_run_keys = extract_run_events_from_controller() + + doc_endpoints, doc_run_events = extract_endpoints_from_docs() + + report = [] + report.append('=== Summary ===') + report.append(f'Code endpoints (/get,/set,/run): {len(code_endpoints)}') + report.append(f'Code run_mapping entries: {len(code_run_events)}') + report.append(f'Controller-run keys found by scan: {len(controller_run_keys)}') + report.append(f'Documented endpoints found in docs/api.md: {len(doc_endpoints)}') + report.append(f'Documented run events found in docs: {len(doc_run_events)}') + + # endpoints present in code but not in docs + missing_in_docs = code_endpoints - doc_endpoints + extra_in_docs = doc_endpoints - code_endpoints + + report.append('\n=== Endpoints present in code but NOT documented ===') + if missing_in_docs: + for e in sorted(missing_in_docs): + report.append(' - ' + e) + else: + report.append(' - None') + + report.append('\n=== Endpoints documented but NOT in code ===') + if extra_in_docs: + for e in sorted(extra_in_docs): + report.append(' - ' + e) + else: + report.append(' - None') + + report.append('\n=== Run events present in code (run_mapping) but NOT documented ===') + missing_run_in_docs = code_run_events - doc_run_events + if missing_run_in_docs: + for e in sorted(missing_run_in_docs): + report.append(' - ' + e) + else: + report.append(' - None') + + report.append('\n=== Run keys emitted in controller (self.run mapping keys) but NOT in run_mapping values ===') + # controller_run_keys are keys like 'connected_network' or '/run/connected_network' + # normalize controller keys to values: if key starts with '/run/' keep, else map via run_mapping if possible + normalized = set() + for k in controller_run_keys: + if k.startswith('/run/'): + normalized.add(k) + else: + if k in run_mapping: + normalized.add(run_mapping[k]) + else: + normalized.add(k) + # compare normalized with code_run_events + extra_controller_keys = normalized - code_run_events + if extra_controller_keys: + for e in sorted(extra_controller_keys): + report.append(' - ' + e) + else: + report.append(' - None') + + out = '\n'.join(report) + print(out) + +if __name__ == '__main__': + main() diff --git a/src-python/scripts/verify_docs_vs_code_runtime.py b/src-python/scripts/verify_docs_vs_code_runtime.py new file mode 100644 index 00000000..187575f3 --- /dev/null +++ b/src-python/scripts/verify_docs_vs_code_runtime.py @@ -0,0 +1,126 @@ +import re +import json +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +DOC_API = ROOT / 'docs' / 'api.md' +DOC_RUN = ROOT / 'docs' / 'run_events_payloads.md' + +# Ensure project root is importable so `import mainloop` works when this script is +# executed from the scripts/ folder. +sys.path.insert(0, str(ROOT)) + + +def main(): + # Delayed imports to avoid module-level import ordering issues (E402 in linters) + import mainloop + import controller as controller_module + + mapping_keys = set(mainloop.mapping.keys()) + run_mapping_values = set(mainloop.run_mapping.values()) + + # extract controller emitted run keys by source scan + controller_src = Path(controller_module.__file__).read_text(encoding='utf-8') + controller_run_keys = set() + for m in re.finditer(r"self\.run\([^\)]*self\.run_mapping\[\s*[\'\"]([^\'\"]+)[\'\"]\s*\]", controller_src): + controller_run_keys.add(m.group(1)) + for m in re.finditer(r"self\.run\([^\)]*\"(/run/[a-zA-Z0-9_\-/]+)\"", controller_src): + controller_run_keys.add(m.group(1)) + # read docs and extract endpoints conservatively (only full endpoints starting with /get/ /set/ /run/) + api_text = DOC_API.read_text(encoding='utf-8') + run_text = DOC_RUN.read_text(encoding='utf-8') if DOC_RUN.exists() else '' + + # include delete endpoints as well (e.g. /delete/data/deepl_auth_key) + endpoint_pattern = re.compile(r"(/(?:get|set|run|delete)[A-Za-z0-9_\-/]*)") + + doc_endpoints = set(m.group(1) for m in endpoint_pattern.finditer(api_text + '\n' + run_text)) + + # Remove umbrella placeholder artifacts that sometimes appear due to + # comma-separated lists or pattern fragments in the markdown. These are + # not concrete endpoints and should not be treated as documented endpoints + # for parity checking. + umbrella_tokens = { + '/get', '/set', '/run', '/get/data', '/set/data', '/set/enable', '/set/disable' + } + # Remove exact umbrella tokens and any accidental entries that end with a + # trailing slash (these are artifacts of pattern matching in markdown). + doc_endpoints = {e for e in doc_endpoints if e not in umbrella_tokens and not e.endswith('/')} + + # Compare + missing_in_docs = mapping_keys - doc_endpoints + # A documented endpoint is valid if it corresponds to either an incoming mapping (mapping_keys) + # or an outgoing run event (run_mapping_values). Treat extra_in_docs as anything documented + # that is neither in mapping_keys nor in run_mapping_values. + extra_in_docs = doc_endpoints - (mapping_keys | run_mapping_values) + + missing_run_in_docs = run_mapping_values - doc_endpoints + + # Normalize controller keys to run_mapping values + normalized = set() + for k in controller_run_keys: + if k.startswith('/run/'): + normalized.add(k) + else: + if k in mainloop.run_mapping: + normalized.add(mainloop.run_mapping[k]) + else: + normalized.add(k) + + extra_controller_keys = normalized - run_mapping_values + + report = [] + report.append('=== Runtime verification report ===') + report.append(f'Code mapping endpoints: {len(mapping_keys)}') + report.append(f'Code run_mapping entries: {len(run_mapping_values)}') + report.append(f'Controller emitted run keys: {len(controller_run_keys)}') + report.append(f'Documented endpoints (docs): {len(doc_endpoints)}') + + report.append('\n--- Endpoints present in code but NOT documented ---') + if missing_in_docs: + for e in sorted(missing_in_docs): + report.append(' - ' + e) + else: + report.append(' - None') + + report.append('\n--- Endpoints documented but NOT in code ---') + if extra_in_docs: + for e in sorted(extra_in_docs): + report.append(' - ' + e) + else: + report.append(' - None') + + report.append('\n--- Run events present in code (run_mapping) but NOT documented ---') + if missing_run_in_docs: + for e in sorted(missing_run_in_docs): + report.append(' - ' + e) + else: + report.append(' - None') + + report.append('\n--- Run keys emitted in controller (normalized) but NOT in run_mapping values ---') + if extra_controller_keys: + for e in sorted(extra_controller_keys): + report.append(' - ' + e) + else: + report.append(' - None') + + print('\n'.join(report)) + + # Also output JSON for downstream processing + out = { + 'mapping_keys': sorted(mapping_keys), + 'run_mapping_values': sorted(run_mapping_values), + 'controller_run_keys': sorted(controller_run_keys), + 'doc_endpoints': sorted(doc_endpoints), + 'missing_in_docs': sorted(missing_in_docs), + 'extra_in_docs': sorted(extra_in_docs), + 'missing_run_in_docs': sorted(missing_run_in_docs), + 'extra_controller_keys': sorted(extra_controller_keys), + } + print('\nJSON_OUTPUT_START') + print(json.dumps(out)) + print('JSON_OUTPUT_END') + + +if __name__ == '__main__': + main()