diff --git a/decnet/web/api.py b/decnet/web/api.py index 1cb63f4c..31bb23cd 100644 --- a/decnet/web/api.py +++ b/decnet/web/api.py @@ -1,5 +1,7 @@ import asyncio import os +import traceback +import uuid from contextlib import asynccontextmanager from typing import Any, AsyncGenerator, Optional @@ -297,3 +299,29 @@ async def pydantic_validation_exception_handler(request: Request, exc: Validatio "type": "internal_validation_error" }, ) + + +@app.exception_handler(Exception) +async def unhandled_exception_handler(request: Request, exc: Exception) -> ORJSONResponse: + """Catch-all for uncaught exceptions in route handlers and dependencies. + + Prod: opaque 500 with an ``error_id``; full traceback goes ONLY to server + logs. Dev (``DECNET_DEVELOPER=True``): same response plus ``exception_type`` + and ``traceback`` fields so failures are debuggable without tailing logs. + + The ``error_id`` lets operators correlate a user's 500 report with the full + traceback in server logs (``grep /var/log/decnet.log``). + + FastAPI's own ``HTTPException`` routing still takes precedence — this + handler only fires on genuinely-uncaught exceptions. + """ + error_id = uuid.uuid4().hex + log.exception( + "unhandled exception on %s %s [error_id=%s]", + request.method, request.url.path, error_id, + ) + body: dict[str, Any] = {"detail": "Internal Server Error", "error_id": error_id} + if DECNET_DEVELOPER: + body["exception_type"] = type(exc).__name__ + body["traceback"] = traceback.format_exc() + return ORJSONResponse(status_code=500, content=body) diff --git a/development/THREAT_MODEL.md b/development/THREAT_MODEL.md index 5b5f4603..c3a85602 100644 --- a/development/THREAT_MODEL.md +++ b/development/THREAT_MODEL.md @@ -185,7 +185,7 @@ Each sub-flow below gets its own table. Status codes: | T | Password hash tampering in DB | T | DB integrity is OS/filesystem scope. See boundary #2 for syslog-path tampering. | | R | User denies having performed an action | M | Every mutation logged with actor UUID; audit trail lives in `logs` table. | | I | Password reflected in login response on failure | M | Single uniform 401 for user-not-found and bad-password at `api_login.py`. No user-existence oracle. | -| I | JWT secret leaked via error message / stack trace | **?** | Verify: production error handler doesn't return tracebacks. | +| I | JWT secret leaked via error message / stack trace | M | Generic `@app.exception_handler(Exception)` at `decnet/web/api.py` returns opaque `{detail, error_id}` on uncaught exceptions; traceback is logged server-side only. Dev-mode (`DECNET_DEVELOPER=True`) includes traceback in body for debugging. | | D | Bcrypt-cost DoS via long password submission | M | Pydantic `max_length=72` on all password fields in `decnet/web/db/models/auth.py` (matches bcrypt's internal truncation limit). | | E | `role=None` bypass (historical bug) | M | See memory `project_rbac_null_role.md`; fixed via centralized RBAC that treats `None` as unauthenticated. | @@ -289,7 +289,8 @@ code" or "accepted, add to table above." - [x] ~~Per-IP / per-user rate limit on `/auth/login`.~~ Shipped — see F1/S row. - [x] ~~Uniform "invalid credentials" on login failure (no user-existence oracle).~~ Verified — see F1/I row. -- [ ] Production error handler suppresses tracebacks and internal details. +- [x] ~~Production error handler suppresses tracebacks and internal details.~~ Shipped — generic `@app.exception_handler(Exception)` in `decnet/web/api.py`; opaque `{detail, error_id}` in prod, traceback only under `DECNET_DEVELOPER=True`. +- [ ] `detail=str(e)` / `detail=f"…{e}"` sites in `decnet/web/router/fleet/api_deploy_deckies.py:41,67,83,155` — admin-gated, arguably intentional INI-parser UX, but worth auditing: preserve deliberate messages, sanitize any bubble-up from lower layers. - [x] ~~Password length clamp before bcrypt.~~ Verified — Pydantic `max_length=72`. - [ ] Contract test asserting every protected route returns 401 unauthenticated and 403 for under-roled. - [ ] Field allow-list on viewer responses for attacker / user / bounty serializers. @@ -357,3 +358,4 @@ In priority order: |------|--------|--------| | 2026-04-23 | Initial scaffold. System context + Dashboard↔API as first worked component. | ANTI | | 2026-04-23 | F1 Authn: 3 threats moved from **?** to **M** (rate limit shipped; uniform 401 verified; bcrypt length clamp verified). Added DA-08 accepted risk: reverse-proxy per-IP bucket collapse. | ANTI | +| 2026-04-23 | F1/I "traceback / stack trace leakage" moved from **?** to **M** via generic Exception handler with `error_id` correlation. Added follow-up checklist entry for `detail=str(e)` sites in fleet deploy router. | ANTI | diff --git a/tests/api/test_error_handler.py b/tests/api/test_error_handler.py new file mode 100644 index 00000000..9bf47fa6 --- /dev/null +++ b/tests/api/test_error_handler.py @@ -0,0 +1,129 @@ +"""Tests for the generic Exception handler at decnet/web/api.py. + +Mitigation target: threat model F1/I — "Production error handler suppresses +tracebacks and internal details." Verifies that uncaught exceptions return +an opaque 500 with a correlation id in prod, and include debug detail only +when DECNET_DEVELOPER is on. +""" +from __future__ import annotations + +import logging +import re +from typing import AsyncGenerator + +import httpx +import pytest + +from decnet.env import DECNET_ADMIN_USER, DECNET_ADMIN_PASSWORD +from decnet.web.api import app +from decnet.web.dependencies import repo + + +@pytest.fixture +async def client() -> AsyncGenerator[httpx.AsyncClient, None]: + """Override the shared client fixture to NOT re-raise app exceptions. + + The default `httpx.ASGITransport` re-raises any uncaught exception + from the app — which defeats the whole point of testing our + generic exception handler. With `raise_app_exceptions=False`, the + transport instead returns the HTTP response our handler built. + """ + transport = httpx.ASGITransport(app=app, raise_app_exceptions=False) + async with httpx.AsyncClient(transport=transport, base_url="http://test") as ac: + yield ac + + +async def _admin_headers(client: httpx.AsyncClient) -> dict[str, str]: + resp = await client.post( + "/api/v1/auth/login", + json={"username": DECNET_ADMIN_USER, "password": DECNET_ADMIN_PASSWORD}, + ) + token = resp.json()["access_token"] + # Clear must_change_password so the token passes mutation-gated endpoints. + await client.post( + "/api/v1/auth/change-password", + json={"old_password": DECNET_ADMIN_PASSWORD, "new_password": DECNET_ADMIN_PASSWORD}, + headers={"Authorization": f"Bearer {token}"}, + ) + resp2 = await client.post( + "/api/v1/auth/login", + json={"username": DECNET_ADMIN_USER, "password": DECNET_ADMIN_PASSWORD}, + ) + return {"Authorization": f"Bearer {resp2.json()['access_token']}"} + + +def _raise_boom(*_a, **_kw): + raise RuntimeError("boom") + + +@pytest.mark.anyio +async def test_unhandled_exception_prod_shape_is_opaque( + client: httpx.AsyncClient, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Prod mode (DECNET_DEVELOPER=False): 500 with opaque body + error_id. + Must NOT include traceback or exception_type.""" + import decnet.web.api as _api + monkeypatch.setattr(_api, "DECNET_DEVELOPER", False) + + headers = await _admin_headers(client) + monkeypatch.setattr(repo, "get_attacker_by_uuid", _raise_boom) + + r = await client.get("/api/v1/attackers/any-uuid", headers=headers) + + assert r.status_code == 500 + body = r.json() + assert body.get("detail") == "Internal Server Error" + assert "error_id" in body + assert re.fullmatch(r"[0-9a-f]{32}", body["error_id"]), body["error_id"] + assert "traceback" not in body + assert "exception_type" not in body + + +@pytest.mark.anyio +async def test_unhandled_exception_dev_shape_includes_traceback( + client: httpx.AsyncClient, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Dev mode (DECNET_DEVELOPER=True): body includes exception_type and + traceback so failures are debuggable without tailing server logs.""" + import decnet.web.api as _api + monkeypatch.setattr(_api, "DECNET_DEVELOPER", True) + + headers = await _admin_headers(client) + monkeypatch.setattr(repo, "get_attacker_by_uuid", _raise_boom) + + r = await client.get("/api/v1/attackers/any-uuid", headers=headers) + + assert r.status_code == 500 + body = r.json() + assert body["detail"] == "Internal Server Error" + assert "error_id" in body + assert body["exception_type"] == "RuntimeError" + assert "RuntimeError" in body["traceback"] + assert "boom" in body["traceback"] + + +@pytest.mark.anyio +async def test_unhandled_exception_logs_error_id( + client: httpx.AsyncClient, + monkeypatch: pytest.MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """The same error_id returned to the client must appear in server logs, + so operators can correlate a user's 500 report with the full traceback.""" + import decnet.web.api as _api + monkeypatch.setattr(_api, "DECNET_DEVELOPER", False) + + headers = await _admin_headers(client) + monkeypatch.setattr(repo, "get_attacker_by_uuid", _raise_boom) + + with caplog.at_level(logging.ERROR, logger="api"): + r = await client.get("/api/v1/attackers/any-uuid", headers=headers) + + assert r.status_code == 500 + error_id = r.json()["error_id"] + assert any(error_id in rec.getMessage() for rec in caplog.records), ( + f"error_id {error_id} not found in captured logs: " + f"{[rec.getMessage() for rec in caplog.records]}" + )