From b5f13df8ad8d5e822af1b0d1ac4edafe7f571e8f Mon Sep 17 00:00:00 2001
From: Spbd1 <148923621+Spbd1@users.noreply.github.com>
Date: Mon, 18 May 2026 06:46:47 +0000
Subject: [PATCH] Add production audit reports and local run fixes

---
 AUDIT_REPORT.md                               | 129 +++++++++++
 DASHBOARD_USABILITY_REPORT.md                 |  52 +++++
 FALSE_POSITIVE_RISKS.md                       |  52 +++++
 FINAL_RELEASE_CHECKLIST.md                    |  85 ++++++++
 MODEL_PROVIDER_SECURITY_REPORT.md             |  41 ++++
 PRIORITIZED_FIXES.md                          |  88 ++++++++
 TAXONOMY_IMPORT_EXPORT_REPORT.md              |  53 +++++
 backend/app/main.py                           |   3 +
 build_backend.py                              |   9 +
 engine/argument_risk_engine/reports/html.py   |   4 +-
 .../reports/json_export.py                    |   6 +-
 .../argument_risk_engine/reports/markdown.py  |   4 +-
 uvicorn/__init__.py                           | 205 +++++++++++++++++-
 13 files changed, 719 insertions(+), 12 deletions(-)
 create mode 100644 AUDIT_REPORT.md
 create mode 100644 DASHBOARD_USABILITY_REPORT.md
 create mode 100644 FALSE_POSITIVE_RISKS.md
 create mode 100644 FINAL_RELEASE_CHECKLIST.md
 create mode 100644 MODEL_PROVIDER_SECURITY_REPORT.md
 create mode 100644 PRIORITIZED_FIXES.md
 create mode 100644 TAXONOMY_IMPORT_EXPORT_REPORT.md

diff --git a/AUDIT_REPORT.md b/AUDIT_REPORT.md
new file mode 100644
index 0000000..f1f0510
--- /dev/null
+++ b/AUDIT_REPORT.md
@@ -0,0 +1,129 @@
+# Argument-Risk-Engine Production Audit Report
+
+Audit date: 2026-05-18 UTC
+
+## Executive summary
+
+The repository now passes the automated compile, unit/API, frontend install/build, HTTP smoke, taxonomy workbook export/import, and deterministic analysis smoke checks listed below. During the audit I fixed three local-run blockers rather than leaving them as documentation-only findings:
+
+1. `uvicorn backend.app.main:app --reload` did not resolve to a runnable console script after `pip install -e .[dev]`.
+2. The bundled `uvicorn` shim only answered `/health` and did not dispatch application routes over HTTP.
+3. The requested non-`/api` taxonomy/workbench/settings endpoints were not mounted.
+
+Remaining release risks are mostly quality and usability issues: the dashboard/API still use the small starter pack as the active taxonomy, the starter pack quality report fails, the mini benchmark shows a high false-positive rate, and the analysis service does not actually execute an LLM provider path when a non-deterministic provider is selected.
+
+## Verification performed
+
+| Area | Command / check | Result |
+| --- | --- | --- |
+| Install | `pip install -e .[dev]` | PASS |
+| Compile | `python -m compileall backend engine tests uvicorn build_backend.py` | PASS |
+| Tests | `pytest` | PASS: 42 passed, 4 collection warnings from the local FastAPI test-client shim |
+| Frontend install | `cd frontend && npm install` | PASS, with npm `http-proxy` environment warning |
+| Frontend build | `cd frontend && npm run build` | PASS |
+| One-command setup | `timeout 12s python scripts/dev.py --install --run --open` | WARNING: install/seed/frontend startup completed, then timed out intentionally because dev servers are long-running |
+| Backend server | `uvicorn backend.app.main:app --reload --port 8002` | PASS after fix |
+| Health | `curl -fsS http://127.0.0.1:8002/health` | PASS |
+| Analyze | `curl -fsS -H 'Content-Type: application/json' -d '{...}' http://127.0.0.1:8002/analyze` | PASS |
+| Taxonomy | `curl -fsS http://127.0.0.1:8002/taxonomy` | PASS after root-route fix |
+| Coverage | `curl -fsS http://127.0.0.1:8002/taxonomy-workbench/coverage` | PASS after root-route fix; reports starter-pack-only coverage |
+| Quality report | `curl -fsS http://127.0.0.1:8002/taxonomy-workbench/quality-report` | PASS endpoint, but report is not OK |
+| Model providers | `curl -fsS http://127.0.0.1:8002/settings/model-providers` | PASS after root-route fix |
+| Provider test | `curl -fsS -X POST http://127.0.0.1:8002/settings/model-providers/deterministic_baseline/test` | PASS |
+| Evaluation | `curl -fsS -H 'Content-Type: application/json' -d '{}' http://127.0.0.1:8002/evaluation/run` | PASS endpoint; metrics expose false-positive risk |
+| Reports | `POST /reports/from-analysis` plus generated JSON/Markdown/HTML payloads | PASS after JSON limitation-note fix |
+| Taxonomy export | `python scripts/export_taxonomy_excel.py /tmp/are-taxonomy-audit.xlsx` | PASS |
+| Taxonomy import | Python `import_workbook('/tmp/are-taxonomy-audit.xlsx', temp_root)` | PASS mechanically; validation issues remain |
+| Browser availability | `command -v google-chrome || command -v chromium || command -v chromium-browser` | WARNING: no Chrome/Chromium binary found in this environment |
+
+## Issues
+
+### AUD-001 — Fixed: HTTP server did not serve application routes
+
+- severity: blocker
+- file(s): `uvicorn/__init__.py`, `build_backend.py`
+- problem: Before the fix, `uvicorn backend.app.main:app --reload` failed because no `uvicorn` console entry point was installed, and `python -m uvicorn ...` only returned a hard-coded response for `/health`.
+- why it matters: The app could not satisfy the local-run requirement or the backend endpoint smoke tests via real HTTP.
+- recommended fix: Completed in this branch. The local build backend now emits a `uvicorn` console entry point, and the shim dispatches GET/POST/PUT/PATCH requests to the app routes with JSON bodies, query params, path params, responses, and single-file multipart uploads.
+- verification command: `pip install -e .[dev] && uvicorn backend.app.main:app --reload --port 8002` and `curl -fsS http://127.0.0.1:8002/analyze` with a JSON POST body.
+
+### AUD-002 — Fixed: Requested root API paths were missing for taxonomy/workbench/settings
+
+- severity: blocker
+- file(s): `backend/app/main.py`
+- problem: The app mounted taxonomy, taxonomy-workbench, and settings only under `/api`, while the audit required root paths such as `/taxonomy`, `/taxonomy-workbench/coverage`, and `/settings/model-providers`.
+- why it matters: Operators following the documented audit commands would receive not-found responses for required endpoints.
+- recommended fix: Completed in this branch. The same routers are mounted at both root and `/api` prefixes.
+- verification command: `curl -fsS http://127.0.0.1:8002/taxonomy-workbench/coverage`.
+
+### AUD-003 — Fixed: JSON report lacked a limitations note
+
+- severity: high
+- file(s): `engine/argument_risk_engine/reports/json_export.py`, `engine/argument_risk_engine/reports/markdown.py`, `engine/argument_risk_engine/reports/html.py`
+- problem: Markdown and HTML reports included the limitation text, but JSON exports returned only the raw analysis payload.
+- why it matters: JSON is often the format most likely to be integrated downstream; omitting limitations increases misuse risk.
+- recommended fix: Completed in this branch. JSON reports now include `limitations_note`; Markdown and HTML reuse the same constant.
+- verification command: `python - <<'PY' ... render_json_report(...) ... PY` confirming the limitation note is present in all three formats.
+
+### AUD-004 — Active dashboard taxonomy is only the starter pack
+
+- severity: high
+- file(s): `backend/app/core/paths.py`, `backend/app/services/taxonomy_service.py`, `data/taxonomy/packs/starter-pack.yaml`
+- problem: The repository contains 1,103 taxonomy entries across pack files, but the API and dashboard load only `data/taxonomy/packs/starter-pack.yaml` as the active taxonomy. `/taxonomy-workbench/coverage` reported only 3 entries.
+- why it matters: Taxonomy Browser, Taxonomy Workbench, analysis, and exports do not reflect the large taxonomy by default. This also hides large-taxonomy false-positive risk from dashboard users.
+- recommended fix: Decide whether production default should be the curated starter pack or the reviewed active subset from all packs. If all packs are intended, change the service layer to use `load_all_packs()` plus active/enabled filtering, and add tests that deprecated/backlog/healthy entries are excluded.
+- verification command: `python - <<'PY'\nfrom argument_risk_engine.taxonomy.pack_manager import load_all_packs\nprint(len(load_all_packs().entries))\nPY` and `curl -fsS http://127.0.0.1:8002/taxonomy-workbench/coverage`.
+
+### AUD-005 — Starter taxonomy quality report fails
+
+- severity: high
+- file(s): `data/taxonomy/packs/starter-pack.yaml`, `engine/argument_risk_engine/taxonomy/quality_audit.py`, `engine/argument_risk_engine/taxonomy/validator.py`
+- problem: `/taxonomy-workbench/quality-report` returned `ok: false`, 9 errors, and missing-example / missing-minimum-evidence / missing-false-positive-warning counts for the active starter entries.
+- why it matters: Classification runs against entries that fail the project’s own active-classification quality gate.
+- recommended fix: Add negative examples, minimum evidence requirements, and false-positive warnings to each active starter entry, or mark them review-required until quality gates pass.
+- verification command: `curl -fsS http://127.0.0.1:8002/taxonomy-workbench/quality-report | python -m json.tool`.
+
+### AUD-006 — Mini evaluation shows high false-positive rate
+
+- severity: high
+- file(s): `data/benchmarks/mini_eval_set.jsonl`, `engine/argument_risk_engine/classification/deterministic.py`, `engine/argument_risk_engine/scoring/scorer.py`, `data/taxonomy/packs/starter-pack.yaml`
+- problem: `POST /evaluation/run` returned `label_precision: 0.4444`, `false_positive_rate: 0.5556`, and `over_classification_rate: 0.25`. Hard negatives containing words such as “always”, “never”, “all”, and “everyone” are flagged as overgeneralization.
+- why it matters: The deterministic analyzer works without API keys, but its current active-pack behavior is not conservative enough for release claims about low false positives.
+- recommended fix: Strengthen starter-pack minimum evidence requirements and negative examples, add lexical exclusions for quoted terms / policy statements / inventory statements, and require stronger evidence for high-sensitivity entries.
+- verification command: `curl -fsS -H 'Content-Type: application/json' -d '{}' http://127.0.0.1:8002/evaluation/run | python -m json.tool`.
+
+### AUD-007 — Analyze endpoint does not actually use selected LLM providers
+
+- severity: high
+- file(s): `engine/argument_risk_engine/analyzer.py`, `backend/app/services/analyzer_service.py`, `engine/argument_risk_engine/classification/classifier.py`
+- problem: `analyze_text()` always calls `classify_deterministic(...)`. Passing `mode="llm"` or a non-deterministic `model_provider_id` changes metadata/fallback flags but does not invoke `ArgumentRiskClassifier` or the configured provider.
+- why it matters: This creates hidden model-switching ambiguity. Users can select/test providers, but analysis remains deterministic without a clear runtime warning.
+- recommended fix: Either wire `analyze_text()` through `ArgumentRiskClassifier` with explicit failure/fallback reporting, or constrain the analyze API/UI to deterministic mode until provider-backed analysis is implemented.
+- verification command: inspect `engine/argument_risk_engine/analyzer.py` and run `curl -fsS -H 'Content-Type: application/json' -d '{"text":"Everyone always caused this.","mode":"llm","model_provider_id":"openai_remote"}' http://127.0.0.1:8002/analyze | python -m json.tool`.
+
+### AUD-008 — Chrome-specific usability was not fully verifiable in this environment
+
+- severity: medium
+- file(s): `frontend/scripts/dev_server.mjs`, `frontend/src/runtime-dashboard.js`, `frontend/src/App.tsx`
+- problem: No Chrome/Chromium binary is installed in the execution environment, so I could verify the dashboard by HTTP, source inspection, and build only—not by an actual Chrome session.
+- why it matters: Frontend regressions involving DOM interaction, file download prompts, file upload controls, and clipboard APIs can pass build/curl checks but fail in Chrome.
+- recommended fix: Add Playwright or another headless browser smoke test to cover Analyze, model-provider dropdown, Taxonomy Browser, Workbench validate/import/export, Review save, Evaluation metrics, and Reports downloads.
+- verification command: `command -v google-chrome || command -v chromium || command -v chromium-browser` and `cd frontend && npm run build`.
+
+### AUD-009 — Served dashboard uses the runtime JavaScript app, not the React/Vite source tree
+
+- severity: medium
+- file(s): `frontend/index.html`, `frontend/scripts/dev_server.mjs`, `frontend/scripts/build_frontend.mjs`, `frontend/src/runtime-dashboard.js`, `frontend/src/App.tsx`
+- problem: `index.html` loads `/app.js`, and the dev/build scripts map that to `src/runtime-dashboard.js`. The React source under `frontend/src/components` and `frontend/src/App.tsx` is not what the served app runs.
+- why it matters: Developers may fix the React components and believe dashboard behavior changed, while production/dev output still uses the separate runtime dashboard implementation.
+- recommended fix: Either switch the build/dev path to the React app or remove/clearly mark the unused React tree. Add a smoke test that asserts the served bundle is the intended dashboard implementation.
+- verification command: `curl -fsS http://127.0.0.1:5173/app.js | head -5`.
+
+### AUD-010 — External provider tests can attempt network calls without secrets
+
+- severity: medium
+- file(s): `engine/argument_risk_engine/classification/llm_client.py`, `backend/app/services/settings_service.py`, `data/config/model_profiles.yaml`
+- problem: Testing `openai_remote` with no `OPENAI_API_KEY` produces a warning but still attempts model/chat endpoints, which failed in this environment with proxy 403s.
+- why it matters: Local audits without secrets should not create surprising network traffic when the missing secret is already known.
+- recommended fix: For remote providers, short-circuit provider tests when the declared API-key environment variable is unset unless the provider is explicitly marked as unauthenticated/local.
+- verification command: `python - <<'PY'\nfrom backend.app.services.settings_service import test_model_provider\nprint(test_model_provider('openai_remote').model_dump())\nPY`.
diff --git a/DASHBOARD_USABILITY_REPORT.md b/DASHBOARD_USABILITY_REPORT.md
new file mode 100644
index 0000000..92e5967
--- /dev/null
+++ b/DASHBOARD_USABILITY_REPORT.md
@@ -0,0 +1,52 @@
+# Dashboard Usability Report
+
+## Scope
+
+This audit covered install/build/startup, dashboard serving, Analyze, model provider dropdown, Taxonomy Browser, Taxonomy Workbench validate/import/export, Model Settings, Review feedback, Evaluation metrics, and Reports downloads by source inspection and HTTP smoke checks. A real Chrome run was not possible because Chrome/Chromium is not installed in this environment.
+
+## Verified
+
+- `cd frontend && npm install` passed.
+- `cd frontend && npm run build` passed.
+- `cd frontend && npm run dev` served `http://localhost:5173`.
+- `curl -fsS http://127.0.0.1:5173` returned the dashboard HTML.
+- `curl -fsS http://127.0.0.1:5173/app.js` returned the runtime dashboard app.
+- Backend routes used by the runtime dashboard are now reachable under `/api/*` and root aliases.
+
+## Issues
+
+### UI-001 — Chrome was unavailable for interactive verification
+
+- severity: medium
+- file(s): `frontend/src/runtime-dashboard.js`, `frontend/scripts/dev_server.mjs`
+- problem: The environment has no `google-chrome`, `chromium`, or `chromium-browser` executable.
+- why it matters: File upload/download behavior, navigation, select controls, clipboard, and alert-driven provider tests need browser automation or manual Chrome validation.
+- recommended fix: Add Playwright/Chromium smoke tests to CI and document browser test commands.
+- verification command: `command -v google-chrome || command -v chromium || command -v chromium-browser`.
+
+### UI-002 — Served app bypasses React components
+
+- severity: medium
+- file(s): `frontend/index.html`, `frontend/scripts/dev_server.mjs`, `frontend/scripts/build_frontend.mjs`, `frontend/src/runtime-dashboard.js`, `frontend/src/App.tsx`
+- problem: `index.html` loads `/app.js`; dev/build scripts serve/copy `src/runtime-dashboard.js`; React `App.tsx` is not the executed dashboard.
+- why it matters: The UI has two implementations, increasing the risk of fixing or auditing the wrong one.
+- recommended fix: Choose one implementation path. Prefer serving the React/Vite app if the React source is the maintained implementation.
+- verification command: `curl -fsS http://127.0.0.1:5173/app.js | head -5`.
+
+### UI-003 — Workbench import button does not guard against missing file selection
+
+- severity: low
+- file(s): `frontend/src/runtime-dashboard.js`
+- problem: The import handler appends `file.files[0]` without a visible guard or user-friendly error when no file is selected.
+- why it matters: A normal user mis-click can produce an opaque error.
+- recommended fix: Disable the import button until a file is selected, or show a clear inline validation message.
+- verification command: inspect `frontend/src/runtime-dashboard.js` import handler and run a browser interaction test.
+
+### UI-004 — Reports page download buttons target the first report, not a selected report object
+
+- severity: low
+- file(s): `frontend/src/runtime-dashboard.js`
+- problem: The report preview can be changed by clicking a list item, but download buttons are built from `state.reports[0]`.
+- why it matters: Users may download a different report than the one they previewed.
+- recommended fix: Track selected report ID in state and bind preview/download controls to the selected report.
+- verification command: inspect `frontend/src/runtime-dashboard.js` reports page and add a browser test with two saved reports.
diff --git a/FALSE_POSITIVE_RISKS.md b/FALSE_POSITIVE_RISKS.md
new file mode 100644
index 0000000..a1e627b
--- /dev/null
+++ b/FALSE_POSITIVE_RISKS.md
@@ -0,0 +1,52 @@
+# False Positive Risks
+
+## Summary
+
+The deterministic analyzer is available offline and evidence-spans are exact substrings in the smoke checks. However, the active starter taxonomy and deterministic keyword matching still over-classify hard negatives in the bundled benchmark.
+
+## Verification highlights
+
+- Neutral text smoke: `The meeting starts at 10 AM and the agenda includes budget review.` produced 0 starter-pack risks.
+- Healthy/cautious text smoke: `The pilot worked in one clinic, but the sample is small...` produced 0 starter-pack risks.
+- Full-pack neutral smoke also produced 0 risks.
+- Mini evaluation reported `false_positive_rate: 0.5556`, `label_precision: 0.4444`, and `over_classification_rate: 0.25`.
+- Evidence spans in deterministic findings are exact substrings in the analyzed claim.
+- LLM invented taxonomy labels are dropped by `ArgumentRiskClassifier` when not in supplied candidates.
+
+## Issues
+
+### FP-001 — Hard negatives with absolute words are classified as overgeneralization
+
+- severity: high
+- file(s): `engine/argument_risk_engine/classification/deterministic.py`, `data/benchmarks/mini_eval_set.jsonl`, `data/taxonomy/packs/starter-pack.yaml`
+- problem: Evaluation false positives include operational/policy/inventory sentences containing words such as “always”, “never”, “all”, “none”, and “everyone”.
+- why it matters: These words can be legitimate literal or procedural language, not argument-risk evidence.
+- recommended fix: Add exclusion patterns for quoted/token examples, policy rules, inventory/checksum/log statements, and require broader claim context before overgeneralization labels.
+- verification command: `curl -fsS -H 'Content-Type: application/json' -d '{}' http://127.0.0.1:8002/evaluation/run | python -m json.tool`.
+
+### FP-002 — Active starter entries lack false-positive warnings and minimum evidence requirements
+
+- severity: high
+- file(s): `data/taxonomy/packs/starter-pack.yaml`, `engine/argument_risk_engine/taxonomy/validator.py`
+- problem: Quality report flags all three active starter entries for missing false-positive warnings and minimum evidence requirements.
+- why it matters: The classifier and UI cannot explain common safe contexts to reviewers.
+- recommended fix: Fill these fields or disable the entries until reviewed.
+- verification command: `curl -fsS http://127.0.0.1:8002/taxonomy-workbench/quality-report | python -m json.tool`.
+
+### FP-003 — Large taxonomy is not aggressively classifying by default only because it is not active
+
+- severity: medium
+- file(s): `backend/app/core/paths.py`, `backend/app/services/taxonomy_service.py`, `engine/argument_risk_engine/taxonomy/pack_manager.py`
+- problem: The dashboard's low large-taxonomy false-positive exposure comes from using `starter-pack.yaml`, not from exercising all active/enabled entries in the large taxonomy.
+- why it matters: Switching to all packs later could introduce new false positives unless retrieval and scoring are tested against the large active subset.
+- recommended fix: Add full-pack evaluation runs and compare hard-negative false positive rates before changing active taxonomy defaults.
+- verification command: `python - <<'PY'\nfrom argument_risk_engine.analyzer import analyze_text\nfrom argument_risk_engine.taxonomy.pack_manager import load_all_packs\nprint(analyze_text('The meeting starts at 10 AM and the agenda includes budget review.', load_all_packs(), top_k=20)['risk_level'])\nPY`.
+
+### FP-004 — Short-claim cap exists but should be tested at API level
+
+- severity: low
+- file(s): `engine/argument_risk_engine/analyzer.py`, `engine/argument_risk_engine/scoring/scorer.py`, `tests/test_scorer.py`
+- problem: The analyzer truncates risks to `max_risks_per_claim`, and scoring has short-claim guardrails, but the API does not have a dedicated regression for the “max 3 risks” release requirement.
+- why it matters: API parameter changes could bypass conservative short-claim behavior.
+- recommended fix: Add an API test asserting short claims cannot return more than 3 final risks under default settings.
+- verification command: `pytest tests/test_scorer.py tests/test_api_analysis.py`.
diff --git a/FINAL_RELEASE_CHECKLIST.md b/FINAL_RELEASE_CHECKLIST.md
new file mode 100644
index 0000000..d5c4eee
--- /dev/null
+++ b/FINAL_RELEASE_CHECKLIST.md
@@ -0,0 +1,85 @@
+# Final Release Checklist
+
+Do not mark an item DONE unless its verification command passes in the target release environment.
+
+## Installation and local run
+
+- [x] DONE — Editable install works.
+  - verification command: `pip install -e .[dev]`
+- [x] DONE — Python modules compile.
+  - verification command: `python -m compileall backend engine tests uvicorn build_backend.py`
+- [x] DONE — Unit/API tests pass.
+  - verification command: `pytest`
+- [x] DONE — Frontend dependencies install.
+  - verification command: `cd frontend && npm install`
+- [x] DONE — Frontend build completes.
+  - verification command: `cd frontend && npm run build`
+- [~] PARTIAL — One-command setup installs/seeds/starts, but long-running server command was intentionally time-limited in audit.
+  - verification command: `python scripts/dev.py --install --run --open`
+
+## Backend endpoints
+
+- [x] DONE — `/health` responds.
+  - verification command: `curl -fsS http://127.0.0.1:8002/health`
+- [x] DONE — `/analyze` responds with deterministic analysis.
+  - verification command: `curl -fsS -H 'Content-Type: application/json' -d '{"text":"The pilot program reduced wait times in one clinic."}' http://127.0.0.1:8002/analyze`
+- [x] DONE — `/taxonomy` responds.
+  - verification command: `curl -fsS http://127.0.0.1:8002/taxonomy`
+- [x] DONE — `/taxonomy-workbench/coverage` responds.
+  - verification command: `curl -fsS http://127.0.0.1:8002/taxonomy-workbench/coverage`
+- [ ] NOT DONE — `/taxonomy-workbench/quality-report` responds but is not OK.
+  - verification command: `curl -fsS http://127.0.0.1:8002/taxonomy-workbench/quality-report | python -m json.tool`
+- [x] DONE — `/settings/model-providers` responds without raw secrets.
+  - verification command: `curl -fsS http://127.0.0.1:8002/settings/model-providers | python -m json.tool`
+- [x] DONE — deterministic provider test works without API keys.
+  - verification command: `curl -fsS -X POST http://127.0.0.1:8002/settings/model-providers/deterministic_baseline/test`
+- [x] DONE — `/evaluation/run` responds.
+  - verification command: `curl -fsS -H 'Content-Type: application/json' -d '{}' http://127.0.0.1:8002/evaluation/run`
+- [x] DONE — `/reports/from-analysis` generates JSON, Markdown, and HTML.
+  - verification command: `POST /reports/from-analysis` with an analysis payload.
+
+## Frontend
+
+- [~] PARTIAL — Dashboard HTML and JS are served; Chrome interaction was not verified because Chrome is unavailable.
+  - verification command: `curl -fsS http://127.0.0.1:5173` and `command -v google-chrome || command -v chromium || command -v chromium-browser`
+- [ ] NOT DONE — Browser automation for Analyze, dropdowns, taxonomy browser, workbench import/export, settings, review, evaluation, and reports downloads.
+  - verification command: future `npx playwright test`
+
+## Taxonomy
+
+- [x] DONE — Full-pack IDs are unique.
+  - verification command: Python `Counter` check over `load_all_packs()`.
+- [x] DONE — Workbook export works.
+  - verification command: `python scripts/export_taxonomy_excel.py /tmp/are-taxonomy-audit.xlsx`
+- [x] DONE — Workbook import works mechanically into a temporary root.
+  - verification command: Python `import_workbook('/tmp/are-taxonomy-audit.xlsx', temp_root)`.
+- [ ] NOT DONE — Active taxonomy quality gate passes.
+  - verification command: `curl -fsS http://127.0.0.1:8002/taxonomy-workbench/quality-report | python -m json.tool`
+- [x] DONE — Healthy/deprecated/backlog exclusions pass code-level checks.
+  - verification command: Python check over `load_all_packs()` and `active_classification_entries()`.
+- [ ] NOT DONE — Full API-level regression coverage for healthy/deprecated/backlog exclusions.
+  - verification command: future `pytest` tests covering imported full taxonomy.
+
+## Retrieval/classification/scoring
+
+- [x] DONE — Neutral text smoke check returns no aggressive starter-pack labels.
+  - verification command: Python `analyze_text('The meeting starts at 10 AM...')`.
+- [x] DONE — LLM invented taxonomy labels are dropped in `ArgumentRiskClassifier` unit smoke.
+  - verification command: Python fake LLM classifier smoke.
+- [x] DONE — LLM failure is visible in `ArgumentRiskClassifier` warnings.
+  - verification command: Python fake `LLMClientError` smoke.
+- [ ] NOT DONE — Analyze endpoint actually uses selected LLM providers.
+  - verification command: inspect `engine/argument_risk_engine/analyzer.py` and run `/analyze` with `mode: llm`.
+- [ ] NOT DONE — False-positive rate is release-ready.
+  - verification command: `curl -fsS -H 'Content-Type: application/json' -d '{}' http://127.0.0.1:8002/evaluation/run | python -m json.tool`
+
+## Reports and documentation
+
+- [x] DONE — Markdown report includes limitations note.
+  - verification command: Python `render_markdown_report(...)`.
+- [x] DONE — HTML report includes limitations note.
+  - verification command: Python `render_html_report(...)`.
+- [x] DONE — JSON report includes limitations note.
+  - verification command: Python `render_json_report(...)`.
+- [x] DONE — README and limitations docs avoid claims of scientific validation or truth/intent judgment.
+  - verification command: inspect `README.md` and `docs/limitations.md`.
diff --git a/MODEL_PROVIDER_SECURITY_REPORT.md b/MODEL_PROVIDER_SECURITY_REPORT.md
new file mode 100644
index 0000000..9783b0f
--- /dev/null
+++ b/MODEL_PROVIDER_SECURITY_REPORT.md
@@ -0,0 +1,41 @@
+# Model Provider Security Report
+
+## Scope
+
+This report covers provider listing/testing, secret exposure, deterministic offline behavior, and hidden model switching.
+
+## Verified
+
+- `GET /settings/model-providers` returns provider metadata with `api_key_env_var` names but no raw secret fields.
+- `POST /settings/model-providers/deterministic_baseline/test` returns `ok` without API keys.
+- `patch_model_provider()` drops `api_key` and `raw_api_key` patch keys before persistence.
+- Deterministic analysis works without API keys.
+
+## Issues
+
+### SEC-001 — Analyze does not use selected LLM provider despite provider settings
+
+- severity: high
+- file(s): `engine/argument_risk_engine/analyzer.py`, `backend/app/services/analyzer_service.py`, `engine/argument_risk_engine/classification/classifier.py`
+- problem: Provider profiles can be selected/tested, but `analyze_text()` always uses `classify_deterministic(...)`.
+- why it matters: This can mislead users and complicates auditability of whether model output was used.
+- recommended fix: Wire provider selection into `ArgumentRiskClassifier`, or clearly disable model-backed analysis in the UI/API until implemented.
+- verification command: `curl -fsS -H 'Content-Type: application/json' -d '{"text":"Everyone always caused this.","mode":"llm","model_provider_id":"openai_remote"}' http://127.0.0.1:8002/analyze | python -m json.tool`.
+
+### SEC-002 — Remote provider tests attempt network calls when API key env var is missing
+
+- severity: medium
+- file(s): `engine/argument_risk_engine/classification/llm_client.py`, `backend/app/services/settings_service.py`
+- problem: `openai_remote` test warns that `OPENAI_API_KEY` is unset but still attempts remote model/chat calls.
+- why it matters: Missing-secret checks should be fail-fast for remote providers to avoid unintended traffic.
+- recommended fix: Add a provider flag for unauthenticated local providers and short-circuit remote providers when the secret env var is absent.
+- verification command: `python - <<'PY'\nfrom backend.app.services.settings_service import test_model_provider\nprint(test_model_provider('openai_remote').model_dump())\nPY`.
+
+### SEC-003 — Secret names are exposed by design; raw secrets were not observed
+
+- severity: low
+- file(s): `backend/app/schemas/settings.py`, `backend/app/services/settings_service.py`, `data/config/model_profiles.yaml`
+- problem: The API returns environment variable names such as `OPENAI_API_KEY`; this is acceptable metadata but should be documented as non-secret.
+- why it matters: Operators should know raw keys belong only in environment variables or local `.env`, never in provider YAML or API responses.
+- recommended fix: Add UI helper text that only env-var names are stored, and keep rejecting `api_key` / `raw_api_key` fields.
+- verification command: `curl -fsS http://127.0.0.1:8002/settings/model-providers | python -m json.tool`.
diff --git a/PRIORITIZED_FIXES.md b/PRIORITIZED_FIXES.md
new file mode 100644
index 0000000..4a38d09
--- /dev/null
+++ b/PRIORITIZED_FIXES.md
@@ -0,0 +1,88 @@
+# Prioritized Fixes
+
+## P0 / blockers fixed in this branch
+
+### P0-1: Make the documented backend server command runnable
+
+- severity: blocker
+- file(s): `build_backend.py`, `uvicorn/__init__.py`
+- problem: `uvicorn backend.app.main:app --reload` was not available after editable install, and the server shim did not dispatch app routes.
+- why it matters: Local installation/run and HTTP smoke tests were blocked.
+- recommended fix: Done. Keep the console entry point and route-dispatching shim covered by smoke tests.
+- verification command: `pip install -e .[dev] && uvicorn backend.app.main:app --reload --port 8002`.
+
+### P0-2: Mount required non-`/api` routes
+
+- severity: blocker
+- file(s): `backend/app/main.py`
+- problem: Required audit endpoints under `/taxonomy`, `/taxonomy-workbench`, and `/settings` were missing at root.
+- why it matters: Backend audit commands failed even though `/api/*` routes existed.
+- recommended fix: Done. Maintain both root and `/api` aliases unless the API contract is revised.
+- verification command: `curl -fsS http://127.0.0.1:8002/settings/model-providers`.
+
+## P1 / should fix before release
+
+### P1-1: Decide and implement active taxonomy semantics
+
+- severity: high
+- file(s): `backend/app/core/paths.py`, `backend/app/services/taxonomy_service.py`, `backend/app/services/taxonomy_workbench_service.py`
+- problem: The active API/dashboard taxonomy is `starter-pack.yaml` only, while the repository contains a much larger taxonomy.
+- why it matters: Operators cannot audit or use the large taxonomy through the dashboard unless they import it into the starter path.
+- recommended fix: Introduce an explicit active-taxonomy config: `starter`, `all_packs_active_enabled`, or `imported_workbook`. Add tests for each mode.
+- verification command: `curl -fsS http://127.0.0.1:8002/taxonomy-workbench/coverage | python -m json.tool`.
+
+### P1-2: Make active taxonomy quality pass or deactivate weak entries
+
+- severity: high
+- file(s): `data/taxonomy/packs/starter-pack.yaml`, `engine/argument_risk_engine/taxonomy/validator.py`
+- problem: Active entries fail validation for missing negative examples, minimum evidence, and false-positive warnings.
+- why it matters: Quality gates do not protect users from known weak entries.
+- recommended fix: Complete each active entry or set it to `review_required` / `enabled_for_classification: false`.
+- verification command: `curl -fsS http://127.0.0.1:8002/taxonomy-workbench/quality-report | python -m json.tool`.
+
+### P1-3: Reduce deterministic false positives on hard negatives
+
+- severity: high
+- file(s): `engine/argument_risk_engine/classification/deterministic.py`, `engine/argument_risk_engine/scoring/scorer.py`, `data/benchmarks/mini_eval_set.jsonl`
+- problem: The mini benchmark reported a 0.5556 false-positive rate.
+- why it matters: Conservative behavior is a release requirement.
+- recommended fix: Add stronger evidence gates, hard-negative exclusions, and calibrated suppression for high-sensitivity entries.
+- verification command: `curl -fsS -H 'Content-Type: application/json' -d '{}' http://127.0.0.1:8002/evaluation/run | python -m json.tool`.
+
+### P1-4: Remove hidden provider-mode ambiguity in analysis
+
+- severity: high
+- file(s): `engine/argument_risk_engine/analyzer.py`, `backend/app/services/analyzer_service.py`, `engine/argument_risk_engine/classification/classifier.py`
+- problem: Provider settings can be selected/tested, but `analyze_text()` remains deterministic.
+- why it matters: Users may believe a selected provider is analyzing text when it is not.
+- recommended fix: Wire analyze through `ArgumentRiskClassifier`, or disable/label provider-backed analysis as unavailable.
+- verification command: `curl -fsS -H 'Content-Type: application/json' -d '{"text":"Everyone always caused this.","mode":"llm","model_provider_id":"openai_remote"}' http://127.0.0.1:8002/analyze | python -m json.tool`.
+
+## P2 / important usability and security hardening
+
+### P2-1: Add browser automation smoke tests
+
+- severity: medium
+- file(s): `frontend/src/runtime-dashboard.js`, `frontend/scripts/dev_server.mjs`
+- problem: Chrome was not available in this environment; no browser automation exists.
+- why it matters: Build success does not prove Analyze, imports, exports, review saves, and downloads work in Chrome.
+- recommended fix: Add Playwright tests for the required frontend flows.
+- verification command: future `npx playwright test`.
+
+### P2-2: Consolidate frontend implementation path
+
+- severity: medium
+- file(s): `frontend/index.html`, `frontend/src/runtime-dashboard.js`, `frontend/src/App.tsx`
+- problem: Served app uses runtime JS, not the React component tree.
+- why it matters: Maintenance changes can land in the wrong UI implementation.
+- recommended fix: Serve/build the React app or remove stale React components.
+- verification command: `curl -fsS http://127.0.0.1:5173/app.js | head -5`.
+
+### P2-3: Avoid remote provider test network calls when API key is missing
+
+- severity: medium
+- file(s): `engine/argument_risk_engine/classification/llm_client.py`
+- problem: Remote provider tests warn about missing keys but still attempt network calls.
+- why it matters: Local audits should avoid surprising external traffic.
+- recommended fix: Short-circuit remote tests when the configured secret env var is absent.
+- verification command: `python - <<'PY'\nfrom backend.app.services.settings_service import test_model_provider\nprint(test_model_provider('openai_remote').model_dump())\nPY`.
diff --git a/TAXONOMY_IMPORT_EXPORT_REPORT.md b/TAXONOMY_IMPORT_EXPORT_REPORT.md
new file mode 100644
index 0000000..3b04a25
--- /dev/null
+++ b/TAXONOMY_IMPORT_EXPORT_REPORT.md
@@ -0,0 +1,53 @@
+# Taxonomy Import/Export Report
+
+## Scope
+
+This report covers taxonomy IDs, pack validity, workbook import/export, source refs, healthy/deprecated/backlog exclusions, active/enabled classification filtering, and high false-positive sensitivity behavior.
+
+## Verification summary
+
+- `python scripts/export_taxonomy_excel.py /tmp/are-taxonomy-audit.xlsx` passed and produced a workbook file.
+- Python `import_taxonomy_excel('/tmp/are-taxonomy-audit.xlsx')` loaded 1,103 workbook entries.
+- Python `import_workbook('/tmp/are-taxonomy-audit.xlsx', temp_root)` completed mechanically with 1,103 entries and 49 active classification entries, but returned 9 validation errors and 23 warnings.
+- `load_all_packs()` found 1,103 entries and no duplicate IDs.
+- Healthy reasoning patterns had 0 entries enabled for classification.
+- Deprecated entries had 0 active/enabled classification entries.
+- Backlog exists as `data/taxonomy/candidate_backlog.yaml` and is not loaded by `load_all_packs()`.
+
+## Issues
+
+### TAX-001 — Active dashboard taxonomy is not the full pack set
+
+- severity: high
+- file(s): `backend/app/core/paths.py`, `backend/app/services/taxonomy_service.py`, `data/taxonomy/packs/starter-pack.yaml`
+- problem: The dashboard/API active taxonomy points to only `starter-pack.yaml`; full pack import/export scripts handle 1,103 entries.
+- why it matters: Workbench coverage/export from the dashboard can mislead users into thinking only 3 taxonomy entries exist.
+- recommended fix: Add explicit active taxonomy configuration and label the dashboard coverage as starter-only if that remains the intended default.
+- verification command: `curl -fsS http://127.0.0.1:8002/taxonomy-workbench/coverage | python -m json.tool`.
+
+### TAX-002 — Workbook import/export works mechanically but validation fails
+
+- severity: high
+- file(s): `engine/argument_risk_engine/taxonomy/importer.py`, `engine/argument_risk_engine/taxonomy/exporter.py`, `engine/argument_risk_engine/taxonomy/validator.py`, `data/taxonomy/packs/starter-pack.yaml`
+- problem: Round-trip import/export works, but validation reports active starter entries missing negative examples, minimum evidence requirements, and false-positive warnings.
+- why it matters: A mechanically importable taxonomy can still be unsafe for classification.
+- recommended fix: Treat validation failure as a release blocker for entries enabled for classification.
+- verification command: `python scripts/export_taxonomy_excel.py /tmp/are-taxonomy-audit.xlsx && python - <<'PY' ... import_workbook(... temp_root) ... PY`.
+
+### TAX-003 — Source refs are present in large packs but starter active entries are sparse
+
+- severity: medium
+- file(s): `data/taxonomy/packs/core_mvp.yaml`, `data/taxonomy/packs/starter-pack.yaml`, `engine/argument_risk_engine/taxonomy/source_registry.py`
+- problem: Large-pack entries include source refs, while starter-pack entries are legacy/sparse and quality checks flag missing supporting metadata.
+- why it matters: Active findings should be traceable to source refs or clear operational definitions.
+- recommended fix: Migrate starter active entries to the v0.2 schema quality level or retire the starter pack as the dashboard default.
+- verification command: `python - <<'PY'\nfrom argument_risk_engine.taxonomy.pack_manager import load_all_packs\nprint(load_all_packs().entries[0].source_refs)\nPY`.
+
+### TAX-004 — Healthy/deprecated/backlog exclusions pass in code inspection but need API tests
+
+- severity: medium
+- file(s): `engine/argument_risk_engine/taxonomy/pack_manager.py`, `engine/argument_risk_engine/classification/classifier.py`, `data/taxonomy/candidate_backlog.yaml`
+- problem: Code excludes healthy patterns and requires active/enabled entries, but tests should explicitly cover API/dashboard classification against full packs.
+- why it matters: Future taxonomy imports could accidentally enable healthy or backlog entries for final risks.
+- recommended fix: Add tests asserting healthy entries appear only as suppressors, deprecated/review/backlog entries never become final risks, and active/enabled is required.
+- verification command: `python - <<'PY'\nfrom argument_risk_engine.taxonomy.pack_manager import load_all_packs, active_classification_entries\nprint(len(active_classification_entries(load_all_packs())))\nPY`.
diff --git a/backend/app/main.py b/backend/app/main.py
index 6986d87..5074d1c 100644
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -23,8 +23,11 @@
 )
 
 app.include_router(routes_analysis.router)
+app.include_router(routes_taxonomy.router)
+app.include_router(routes_taxonomy_workbench.router)
 app.include_router(routes_review.router)
 app.include_router(routes_evaluation.router)
+app.include_router(routes_settings.router)
 app.include_router(routes_reports.router)
 app.include_router(routes_analysis.router, prefix="/api")
 app.include_router(routes_taxonomy.router, prefix="/api")
diff --git a/build_backend.py b/build_backend.py
index 2d41756..4018aae 100644
--- a/build_backend.py
+++ b/build_backend.py
@@ -22,6 +22,14 @@ def _metadata() -> str:
     ])
 
 
+def _entry_points() -> str:
+    return "\n".join([
+        "[console_scripts]",
+        "uvicorn=uvicorn.main:main",
+        "",
+    ])
+
+
 def _wheel() -> str:
     return "\n".join([
         "Wheel-Version: 1.0",
@@ -76,6 +84,7 @@ def _write_wheel(out_dir: Path, editable: bool) -> str:
         files: dict[str, bytes] = {
             f"{DIST}/METADATA": _metadata().encode(),
             f"{DIST}/WHEEL": _wheel().encode(),
+            f"{DIST}/entry_points.txt": _entry_points().encode(),
         }
         if editable:
             files["argument_risk_engine_editable.pth"] = f"{ROOT}\n{ROOT / 'engine'}\n".encode()
diff --git a/engine/argument_risk_engine/reports/html.py b/engine/argument_risk_engine/reports/html.py
index c64906d..66c2261 100644
--- a/engine/argument_risk_engine/reports/html.py
+++ b/engine/argument_risk_engine/reports/html.py
@@ -3,6 +3,8 @@
 from html import escape
 from typing import Any
 
+from argument_risk_engine.reports.json_export import LIMITATIONS_NOTE
+
 
 def render_html_report(result: dict[str, Any]) -> str:
     claims = result.get("claims", []) or []
@@ -16,7 +18,7 @@ def render_html_report(result: dict[str, Any]) -> str:
   <p><strong>Analysis ID:</strong> {escape(str(result.get('analysis_id') or result.get('text_id', 'unknown')))}</p>
   <p><strong>Overall risk score:</strong> {escape(str(result.get('overall_risk_score', 0)))}</p>
   <p><strong>Risk level:</strong> {escape(str(result.get('risk_level', 'unknown')))}</p>
-  <p><em>Metrics and reports are review aids only and do not claim scientific validation.</em></p>
+  <p><em>{escape(LIMITATIONS_NOTE)}</em></p>
   <h2>Summary</h2>
   <ul>
     <li>Claims reviewed: {len(claims)}</li>
diff --git a/engine/argument_risk_engine/reports/json_export.py b/engine/argument_risk_engine/reports/json_export.py
index 83747b3..3d7fc7d 100644
--- a/engine/argument_risk_engine/reports/json_export.py
+++ b/engine/argument_risk_engine/reports/json_export.py
@@ -3,6 +3,10 @@
 import json
 from typing import Any
 
+LIMITATIONS_NOTE = "Metrics and reports are review aids only and do not claim scientific validation."
+
 
 def render_json_report(result: dict[str, Any]) -> str:
-    return json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True)
+    payload = dict(result)
+    payload.setdefault("limitations_note", LIMITATIONS_NOTE)
+    return json.dumps(payload, indent=2, ensure_ascii=False, sort_keys=True)
diff --git a/engine/argument_risk_engine/reports/markdown.py b/engine/argument_risk_engine/reports/markdown.py
index 5d03ebc..9a6114f 100644
--- a/engine/argument_risk_engine/reports/markdown.py
+++ b/engine/argument_risk_engine/reports/markdown.py
@@ -2,6 +2,8 @@
 
 from typing import Any
 
+from argument_risk_engine.reports.json_export import LIMITATIONS_NOTE
+
 
 def render_markdown_report(result: dict[str, Any]) -> str:
     claims = result.get("claims", []) or []
@@ -13,7 +15,7 @@ def render_markdown_report(result: dict[str, Any]) -> str:
         f"Overall risk score: **{result.get('overall_risk_score', 0)}**",
         f"Risk level: **{result.get('risk_level', 'unknown')}**",
         "",
-        "> Metrics and reports are review aids only and do not claim scientific validation.",
+        f"> {LIMITATIONS_NOTE}",
         "",
         "## Summary",
         "",
diff --git a/uvicorn/__init__.py b/uvicorn/__init__.py
index 5349722..a5e68f6 100644
--- a/uvicorn/__init__.py
+++ b/uvicorn/__init__.py
@@ -1,14 +1,77 @@
-def run(app_path: str, host: str = '127.0.0.1', port: int = 8000, reload: bool = False):
-    import http.server
-    import json
-    import socketserver
+from __future__ import annotations
+
+import http.server
+import importlib
+import inspect
+import json
+import socketserver
+from email.parser import BytesParser
+from email.policy import default as email_policy
+from io import BytesIO
+from types import SimpleNamespace
+from urllib.parse import parse_qs, urlparse
+
+from fastapi import Response, UploadFile
+
+
+def run(app_path: str, host: str = "127.0.0.1", port: int = 8000, reload: bool = False):
+    """Run a tiny local HTTP server for the repository's FastAPI-compatible app.
+
+    This project ships lightweight FastAPI/Uvicorn shims so the demo can run in
+    restricted/offline environments.  The server intentionally supports only the
+    routing and request features used by the app: JSON bodies, query strings,
+    dynamic path parameters, and single-file multipart uploads.
+    """
+    app = _load_app(app_path)
 
     class Handler(http.server.BaseHTTPRequestHandler):
         def do_GET(self):
-            if self.path == '/health':
-                self.send_response(200); self.send_header('Content-Type','application/json'); self.end_headers(); self.wfile.write(json.dumps({'status':'ok'}).encode())
-            else:
-                self.send_response(200); self.send_header('Content-Type','application/json'); self.end_headers(); self.wfile.write(json.dumps({'message':'Argument-Risk-Engine API MVP'}).encode())
+            self._dispatch("GET")
+
+        def do_POST(self):
+            self._dispatch("POST")
+
+        def do_PUT(self):
+            self._dispatch("PUT")
+
+        def do_PATCH(self):
+            self._dispatch("PATCH")
+
+        def _dispatch(self, method: str) -> None:
+            fn, kwargs, query = _resolve(app, method, self.path)
+            if not fn:
+                self._send_json({"detail": "not found"}, status=404)
+                return
+            kwargs.update(query)
+            try:
+                body_kwargs = _body_args(fn, self)
+                kwargs.update(body_kwargs)
+                payload = _call(fn, kwargs)
+                self._send_payload(payload)
+            except Exception as error:  # pragma: no cover - defensive server boundary
+                self._send_json({"detail": str(error)}, status=500)
+
+        def _send_payload(self, payload) -> None:
+            if isinstance(payload, Response):
+                content = payload.content
+                if isinstance(content, str):
+                    content = content.encode("utf-8")
+                self.send_response(payload.status_code)
+                self.send_header("Content-Type", payload.media_type)
+                for key, value in payload.headers.items():
+                    self.send_header(key, value)
+                self.end_headers()
+                self.wfile.write(content or b"")
+                return
+            self._send_json(_to_jsonable(payload))
+
+        def _send_json(self, payload, status: int = 200) -> None:
+            body = json.dumps(_to_jsonable(payload), ensure_ascii=False).encode("utf-8")
+            self.send_response(status)
+            self.send_header("Content-Type", "application/json")
+            self.end_headers()
+            self.wfile.write(body)
+
         def log_message(self, *args):
             return
 
@@ -17,5 +80,129 @@ class ReusableTCPServer(socketserver.TCPServer):
         allow_reuse_port = True
 
     with ReusableTCPServer((host, port), Handler) as httpd:
-        print(f'Backend: http://{host}:{port}')
+        print(f"Backend: http://{host}:{port}", flush=True)
         httpd.serve_forever()
+
+
+def _load_app(app_path: str):
+    module_name, _, attr = app_path.partition(":")
+    module = importlib.import_module(module_name)
+    return getattr(module, attr or "app")
+
+
+def _resolve(app, method: str, raw_path: str):
+    parsed = urlparse(raw_path)
+    path = parsed.path.rstrip("/") or "/"
+    query = {key: values[-1] for key, values in parse_qs(parsed.query).items()}
+    exact = app.routes.get((method, path)) or app.routes.get((method, parsed.path))
+    if exact:
+        return exact, {}, query
+    path_parts = [part for part in path.split("/") if part]
+    for (route_method, route_path), fn in app.routes.items():
+        if route_method != method:
+            continue
+        route_parts = [part for part in route_path.split("/") if part]
+        if len(route_parts) != len(path_parts):
+            continue
+        kwargs = {}
+        matched = True
+        for route_part, path_part in zip(route_parts, path_parts, strict=True):
+            if route_part.startswith("{") and route_part.endswith("}"):
+                kwargs[route_part[1:-1]] = path_part
+            elif route_part != path_part:
+                matched = False
+                break
+        if matched:
+            return fn, kwargs, query
+    return None, {}, query
+
+
+def _body_args(fn, handler) -> dict:
+    length = int(handler.headers.get("Content-Length", "0") or 0)
+    if length <= 0:
+        return {}
+    content_type = handler.headers.get("Content-Type", "")
+    body = handler.rfile.read(length)
+    if content_type.startswith("application/json"):
+        data = json.loads(body.decode("utf-8") or "{}")
+        return _json_args(fn, data)
+    if content_type.startswith("multipart/form-data"):
+        return _multipart_args(content_type, body)
+    return {}
+
+
+def _json_args(fn, data: dict) -> dict:
+    if not data:
+        return {}
+    sig = inspect.signature(fn)
+    required_model_params = []
+    for name, param in sig.parameters.items():
+        if name == "file":
+            continue
+        ann = _resolve_annotation(fn, param.annotation)
+        if ann is inspect._empty or ann in {str, int, bool, float}:
+            continue
+        if param.default is inspect._empty:
+            required_model_params.append((name, ann))
+    if len(required_model_params) == 1:
+        name, cls = required_model_params[0]
+        try:
+            return {name: cls(**data)}
+        except Exception:
+            return {name: data}
+    return data
+
+
+def _multipart_args(content_type: str, body: bytes) -> dict:
+    message = BytesParser(policy=email_policy).parsebytes(
+        b"Content-Type: " + content_type.encode("utf-8") + b"\r\n\r\n" + body
+    )
+    result = {}
+    for part in message.iter_parts():
+        name = part.get_param("name", header="content-disposition")
+        filename = part.get_filename()
+        payload = part.get_payload(decode=True) or b""
+        if filename:
+            result[name or "file"] = UploadFile(filename=filename, file=BytesIO(payload))
+        elif name:
+            result[name] = payload.decode(part.get_content_charset() or "utf-8")
+    return result
+
+
+def _call(fn, kwargs):
+    sig = inspect.signature(fn)
+    accepted = {}
+    for name, param in sig.parameters.items():
+        if name in kwargs:
+            accepted[name] = kwargs[name]
+        elif param.default is inspect._empty:
+            ann = _resolve_annotation(fn, param.annotation)
+            if ann is not inspect._empty:
+                try:
+                    accepted[name] = ann(**kwargs)
+                except Exception:
+                    pass
+    return fn(**accepted)
+
+
+def _resolve_annotation(fn, ann):
+    if isinstance(ann, str):
+        builtin = {"str": str, "int": int, "bool": bool, "float": float}.get(ann)
+        if builtin is not None:
+            return builtin
+        return getattr(inspect.getmodule(fn), ann, fn.__globals__.get(ann, ann))
+    return ann
+
+
+def _to_jsonable(value):
+    if hasattr(value, "model_dump"):
+        return value.model_dump()
+    if isinstance(value, dict):
+        return {key: _to_jsonable(item) for key, item in value.items()}
+    if isinstance(value, list):
+        return [_to_jsonable(item) for item in value]
+    if hasattr(value, "value"):
+        return value.value
+    if isinstance(value, SimpleNamespace):
+        return vars(value)
+    return value