[新規追加] ドキュメントのエンドポイントとマッピングの検証スクリプトを追加

This commit is contained in:
misyaguziya
2025-10-09 13:15:01 +09:00
parent 5efa9c37d6
commit e67242a0c4
5 changed files with 353 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
from pathlib import Path
p=Path(__file__).resolve().parents[1]/'docs'/'api.md'
text=p.read_text(encoding='utf-8')
lines=[]
for line in text.splitlines():
stripped=line.strip()
# Remove exact umbrella placeholder tokens or standalone list entries
if stripped in ('- /set/enable', '- /set/disable', '- /get/data/', '/set/enable', '/set/disable', '/get/data/'):
continue
# Remove lines that are just '/get/data' or '/set/data' or '/run/' etc
if stripped in ('/get/data', '/set/data', '/run/', '/get', '/set', '/run'):
continue
lines.append(line)
new='\n'.join(lines)
p.write_text(new,encoding='utf-8')
print('cleaned')

View File

@@ -0,0 +1,22 @@
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
DOC_DIR = ROOT / 'docs'
tokens = [
'transcription_mic',
'transcription_speaker',
'selected_translation_compute_device',
'/run/selected_translation_compute_device',
'/run/transcription_mic',
'/run/transcription_speaker',
]
for p in DOC_DIR.rglob('*.md'):
text = p.read_text(encoding='utf-8')
for i, line in enumerate(text.splitlines(), start=1):
for t in tokens:
if t in line:
print(f"{p}:{i}:{line.strip()}")
print('done')

View File

@@ -0,0 +1,28 @@
from pathlib import Path
import re
ROOT = Path(__file__).resolve().parents[1]
MAINLOOP = ROOT / 'mainloop.py'
text = MAINLOOP.read_text(encoding='utf-8')
run_mapping = {}
mapping = {}
for mm in re.finditer(r"[\'\"]([^\'\"]+)[\'\"]\s*:\s*[\'\"](/run/[a-zA-Z0-9_\-\\/]+)[\'\"]", text):
run_mapping[mm.group(1)] = mm.group(2)
for mm in re.finditer(r"[\'\"](/(?:get|set)/[a-zA-Z0-9_\-\\/]+)[\'\"]", text):
mapping[mm.group(1)] = True
print('run_mapping entries:', len(run_mapping))
print('sample run_mapping keys:', sorted(run_mapping.items())[:10])
print('\nmapping endpoints count:', len(mapping))
# show any endpoints that are exactly '/get/data/'
print('\ncontains /get/data/?', '/get/data/' in mapping)
if '/get/data/' in mapping:
print('Found /get/data/ literal in mainloop.py text')
# show ones containing '/get/data'
has_get_data = [k for k in mapping.keys() if '/get/data' in k]
print('\nendpoints containing /get/data:', len(has_get_data))
if has_get_data:
for k in sorted(has_get_data)[:30]:
print(' -', k)
# print first 20 mapping endpoints
print('\nFirst 40 endpoints:')
for k in sorted(mapping.keys())[:40]:
print(' -', k)

View File

@@ -0,0 +1,161 @@
import importlib.util
import re
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
MAINLOOP = ROOT / 'mainloop.py'
CONTROLLER = ROOT / 'controller.py'
DOC_API = ROOT / 'docs' / 'api.md'
DOC_RUN = ROOT / 'docs' / 'run_events_payloads.md'
def extract_mapping_from_mainloop():
"""
Import mainloop.py and read 'mapping' and 'run_mapping' objects directly.
This executes the module in an isolated module object; mainloop has some
initialization but exposing these dicts is acceptable for verification.
"""
run_mapping = {}
mapping = {}
try:
spec = importlib.util.spec_from_file_location('project_mainloop', str(MAINLOOP))
module = importlib.util.module_from_spec(spec)
loader = spec.loader
if loader is None:
raise RuntimeError('Could not load mainloop module')
loader.exec_module(module)
mapping = getattr(module, 'mapping', {}) or {}
run_mapping = getattr(module, 'run_mapping', {}) or {}
return mapping, run_mapping
except Exception as e:
print('Error importing mainloop.py', e)
# Fallback: simple regex-based extraction from mainloop.py text
try:
text = MAINLOOP.read_text(encoding='utf-8')
# run_mapping entries like: "transcription_mic": "/run/transcription_send_mic_message",
for mm in re.finditer(r"[\'\"]([^\'\"]+)[\'\"]\s*:\s*[\'\"](/run/[a-zA-Z0-9_\-\/]+)[\'\"]", text):
run_mapping[mm.group(1)] = mm.group(2)
# mapping endpoints: any '/get/...' or '/set/...' literal in file
for mm in re.finditer(r"[\'\"](/(?:get|set)/[a-zA-Z0-9_\-\/]+)[\'\"]", text):
mapping[mm.group(1)] = True
except Exception as e:
print('Error parsing mainloop.py via fallback', e)
return mapping, run_mapping
def extract_run_events_from_controller():
code = CONTROLLER.read_text(encoding='utf-8')
# find self.run( ... , self.run_mapping["key"], ... ) and direct self.run(...,
run_keys = set()
# pattern for self.run(..., self.run_mapping["xxx"], ...)
pattern = re.compile(r"self\.run\([^\)]*self\.run_mapping\[\s*[\'\"]([^\'\"]+)[\'\"]\s*\]", re.M)
for m in pattern.finditer(code):
run_keys.add(m.group(1))
# also find self.run(..., "/run/xxx", ...)
pattern2 = re.compile(r"self\.run\([^\)]*\"(/run/[^\'\"]+)\"", re.M)
for m in pattern2.finditer(code):
run_keys.add(m.group(1))
return run_keys
def extract_endpoints_from_docs():
api = DOC_API.read_text(encoding='utf-8')
run = DOC_RUN.read_text(encoding='utf-8') if DOC_RUN.exists() else ''
endpoints = set()
run_events = set()
# conservative extraction: match endpoints that start with /get/ /set/ /run/
pattern = re.compile(r"(/(?:get|set|run)(?:/[a-zA-Z0-9_\-]+)+)")
for m in pattern.finditer(api):
token = m.group(1)
# drop umbrella placeholders and tokens that end with '/'
if token in ('/get', '/set', '/run', '/get/data', '/set/data'):
continue
if token.endswith('/'):
continue
if token.startswith('/run/'):
run_events.add(token)
else:
endpoints.add(token)
for m in pattern.finditer(run):
token = m.group(1)
if token in ('/get', '/set', '/run', '/get/data', '/set/data'):
continue
if token.endswith('/'):
continue
if token.startswith('/run/'):
run_events.add(token)
else:
endpoints.add(token)
return endpoints, run_events
def main():
mapping, run_mapping = extract_mapping_from_mainloop()
code_endpoints = set(mapping.keys())
code_run_events = set(run_mapping.values())
# normalize run events: run_mapping values likely like '/run/…'
controller_run_keys = extract_run_events_from_controller()
doc_endpoints, doc_run_events = extract_endpoints_from_docs()
report = []
report.append('=== Summary ===')
report.append(f'Code endpoints (/get,/set,/run): {len(code_endpoints)}')
report.append(f'Code run_mapping entries: {len(code_run_events)}')
report.append(f'Controller-run keys found by scan: {len(controller_run_keys)}')
report.append(f'Documented endpoints found in docs/api.md: {len(doc_endpoints)}')
report.append(f'Documented run events found in docs: {len(doc_run_events)}')
# endpoints present in code but not in docs
missing_in_docs = code_endpoints - doc_endpoints
extra_in_docs = doc_endpoints - code_endpoints
report.append('\n=== Endpoints present in code but NOT documented ===')
if missing_in_docs:
for e in sorted(missing_in_docs):
report.append(' - ' + e)
else:
report.append(' - None')
report.append('\n=== Endpoints documented but NOT in code ===')
if extra_in_docs:
for e in sorted(extra_in_docs):
report.append(' - ' + e)
else:
report.append(' - None')
report.append('\n=== Run events present in code (run_mapping) but NOT documented ===')
missing_run_in_docs = code_run_events - doc_run_events
if missing_run_in_docs:
for e in sorted(missing_run_in_docs):
report.append(' - ' + e)
else:
report.append(' - None')
report.append('\n=== Run keys emitted in controller (self.run mapping keys) but NOT in run_mapping values ===')
# controller_run_keys are keys like 'connected_network' or '/run/connected_network'
# normalize controller keys to values: if key starts with '/run/' keep, else map via run_mapping if possible
normalized = set()
for k in controller_run_keys:
if k.startswith('/run/'):
normalized.add(k)
else:
if k in run_mapping:
normalized.add(run_mapping[k])
else:
normalized.add(k)
# compare normalized with code_run_events
extra_controller_keys = normalized - code_run_events
if extra_controller_keys:
for e in sorted(extra_controller_keys):
report.append(' - ' + e)
else:
report.append(' - None')
out = '\n'.join(report)
print(out)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,126 @@
import re
import json
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
DOC_API = ROOT / 'docs' / 'api.md'
DOC_RUN = ROOT / 'docs' / 'run_events_payloads.md'
# Ensure project root is importable so `import mainloop` works when this script is
# executed from the scripts/ folder.
sys.path.insert(0, str(ROOT))
def main():
# Delayed imports to avoid module-level import ordering issues (E402 in linters)
import mainloop
import controller as controller_module
mapping_keys = set(mainloop.mapping.keys())
run_mapping_values = set(mainloop.run_mapping.values())
# extract controller emitted run keys by source scan
controller_src = Path(controller_module.__file__).read_text(encoding='utf-8')
controller_run_keys = set()
for m in re.finditer(r"self\.run\([^\)]*self\.run_mapping\[\s*[\'\"]([^\'\"]+)[\'\"]\s*\]", controller_src):
controller_run_keys.add(m.group(1))
for m in re.finditer(r"self\.run\([^\)]*\"(/run/[a-zA-Z0-9_\-/]+)\"", controller_src):
controller_run_keys.add(m.group(1))
# read docs and extract endpoints conservatively (only full endpoints starting with /get/ /set/ /run/)
api_text = DOC_API.read_text(encoding='utf-8')
run_text = DOC_RUN.read_text(encoding='utf-8') if DOC_RUN.exists() else ''
# include delete endpoints as well (e.g. /delete/data/deepl_auth_key)
endpoint_pattern = re.compile(r"(/(?:get|set|run|delete)[A-Za-z0-9_\-/]*)")
doc_endpoints = set(m.group(1) for m in endpoint_pattern.finditer(api_text + '\n' + run_text))
# Remove umbrella placeholder artifacts that sometimes appear due to
# comma-separated lists or pattern fragments in the markdown. These are
# not concrete endpoints and should not be treated as documented endpoints
# for parity checking.
umbrella_tokens = {
'/get', '/set', '/run', '/get/data', '/set/data', '/set/enable', '/set/disable'
}
# Remove exact umbrella tokens and any accidental entries that end with a
# trailing slash (these are artifacts of pattern matching in markdown).
doc_endpoints = {e for e in doc_endpoints if e not in umbrella_tokens and not e.endswith('/')}
# Compare
missing_in_docs = mapping_keys - doc_endpoints
# A documented endpoint is valid if it corresponds to either an incoming mapping (mapping_keys)
# or an outgoing run event (run_mapping_values). Treat extra_in_docs as anything documented
# that is neither in mapping_keys nor in run_mapping_values.
extra_in_docs = doc_endpoints - (mapping_keys | run_mapping_values)
missing_run_in_docs = run_mapping_values - doc_endpoints
# Normalize controller keys to run_mapping values
normalized = set()
for k in controller_run_keys:
if k.startswith('/run/'):
normalized.add(k)
else:
if k in mainloop.run_mapping:
normalized.add(mainloop.run_mapping[k])
else:
normalized.add(k)
extra_controller_keys = normalized - run_mapping_values
report = []
report.append('=== Runtime verification report ===')
report.append(f'Code mapping endpoints: {len(mapping_keys)}')
report.append(f'Code run_mapping entries: {len(run_mapping_values)}')
report.append(f'Controller emitted run keys: {len(controller_run_keys)}')
report.append(f'Documented endpoints (docs): {len(doc_endpoints)}')
report.append('\n--- Endpoints present in code but NOT documented ---')
if missing_in_docs:
for e in sorted(missing_in_docs):
report.append(' - ' + e)
else:
report.append(' - None')
report.append('\n--- Endpoints documented but NOT in code ---')
if extra_in_docs:
for e in sorted(extra_in_docs):
report.append(' - ' + e)
else:
report.append(' - None')
report.append('\n--- Run events present in code (run_mapping) but NOT documented ---')
if missing_run_in_docs:
for e in sorted(missing_run_in_docs):
report.append(' - ' + e)
else:
report.append(' - None')
report.append('\n--- Run keys emitted in controller (normalized) but NOT in run_mapping values ---')
if extra_controller_keys:
for e in sorted(extra_controller_keys):
report.append(' - ' + e)
else:
report.append(' - None')
print('\n'.join(report))
# Also output JSON for downstream processing
out = {
'mapping_keys': sorted(mapping_keys),
'run_mapping_values': sorted(run_mapping_values),
'controller_run_keys': sorted(controller_run_keys),
'doc_endpoints': sorted(doc_endpoints),
'missing_in_docs': sorted(missing_in_docs),
'extra_in_docs': sorted(extra_in_docs),
'missing_run_in_docs': sorted(missing_run_in_docs),
'extra_controller_keys': sorted(extra_controller_keys),
}
print('\nJSON_OUTPUT_START')
print(json.dumps(out))
print('JSON_OUTPUT_END')
if __name__ == '__main__':
main()