feat(api): opaque 500 handler + error_id correlation for unhandled exceptions
Registers a generic @app.exception_handler(Exception) that catches anything
uncaught in route handlers / dependencies. Prod response is opaque:
{detail: 'Internal Server Error', error_id: <uuid4 hex>}. Dev mode
(DECNET_DEVELOPER=True) adds exception_type and traceback fields so
failures are debuggable without tailing server logs.
The error_id is logged alongside the full traceback server-side, letting
operators correlate a user's 500 report with the exact exception via
`grep <error_id> /var/log/decnet.log`.
FastAPI's own HTTPException routing and the existing
RequestValidationError / ValidationError / RateLimitExceeded handlers
still take precedence — this handler only fires on genuinely-uncaught
exceptions.
Flips threat model F1/I 'traceback / stack trace leakage' from ? to M
and logs a follow-up checklist entry for 4 detail=str(e) sites in the
fleet deploy router (admin-gated, different threat class, separate
audit).
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
import asyncio
|
||||
import os
|
||||
import traceback
|
||||
import uuid
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any, AsyncGenerator, Optional
|
||||
|
||||
@@ -297,3 +299,29 @@ async def pydantic_validation_exception_handler(request: Request, exc: Validatio
|
||||
"type": "internal_validation_error"
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@app.exception_handler(Exception)
|
||||
async def unhandled_exception_handler(request: Request, exc: Exception) -> ORJSONResponse:
|
||||
"""Catch-all for uncaught exceptions in route handlers and dependencies.
|
||||
|
||||
Prod: opaque 500 with an ``error_id``; full traceback goes ONLY to server
|
||||
logs. Dev (``DECNET_DEVELOPER=True``): same response plus ``exception_type``
|
||||
and ``traceback`` fields so failures are debuggable without tailing logs.
|
||||
|
||||
The ``error_id`` lets operators correlate a user's 500 report with the full
|
||||
traceback in server logs (``grep <error_id> /var/log/decnet.log``).
|
||||
|
||||
FastAPI's own ``HTTPException`` routing still takes precedence — this
|
||||
handler only fires on genuinely-uncaught exceptions.
|
||||
"""
|
||||
error_id = uuid.uuid4().hex
|
||||
log.exception(
|
||||
"unhandled exception on %s %s [error_id=%s]",
|
||||
request.method, request.url.path, error_id,
|
||||
)
|
||||
body: dict[str, Any] = {"detail": "Internal Server Error", "error_id": error_id}
|
||||
if DECNET_DEVELOPER:
|
||||
body["exception_type"] = type(exc).__name__
|
||||
body["traceback"] = traceback.format_exc()
|
||||
return ORJSONResponse(status_code=500, content=body)
|
||||
|
||||
@@ -185,7 +185,7 @@ Each sub-flow below gets its own table. Status codes:
|
||||
| T | Password hash tampering in DB | T | DB integrity is OS/filesystem scope. See boundary #2 for syslog-path tampering. |
|
||||
| R | User denies having performed an action | M | Every mutation logged with actor UUID; audit trail lives in `logs` table. |
|
||||
| I | Password reflected in login response on failure | M | Single uniform 401 for user-not-found and bad-password at `api_login.py`. No user-existence oracle. |
|
||||
| I | JWT secret leaked via error message / stack trace | **?** | Verify: production error handler doesn't return tracebacks. |
|
||||
| I | JWT secret leaked via error message / stack trace | M | Generic `@app.exception_handler(Exception)` at `decnet/web/api.py` returns opaque `{detail, error_id}` on uncaught exceptions; traceback is logged server-side only. Dev-mode (`DECNET_DEVELOPER=True`) includes traceback in body for debugging. |
|
||||
| D | Bcrypt-cost DoS via long password submission | M | Pydantic `max_length=72` on all password fields in `decnet/web/db/models/auth.py` (matches bcrypt's internal truncation limit). |
|
||||
| E | `role=None` bypass (historical bug) | M | See memory `project_rbac_null_role.md`; fixed via centralized RBAC that treats `None` as unauthenticated. |
|
||||
|
||||
@@ -289,7 +289,8 @@ code" or "accepted, add to table above."
|
||||
|
||||
- [x] ~~Per-IP / per-user rate limit on `/auth/login`.~~ Shipped — see F1/S row.
|
||||
- [x] ~~Uniform "invalid credentials" on login failure (no user-existence oracle).~~ Verified — see F1/I row.
|
||||
- [ ] Production error handler suppresses tracebacks and internal details.
|
||||
- [x] ~~Production error handler suppresses tracebacks and internal details.~~ Shipped — generic `@app.exception_handler(Exception)` in `decnet/web/api.py`; opaque `{detail, error_id}` in prod, traceback only under `DECNET_DEVELOPER=True`.
|
||||
- [ ] `detail=str(e)` / `detail=f"…{e}"` sites in `decnet/web/router/fleet/api_deploy_deckies.py:41,67,83,155` — admin-gated, arguably intentional INI-parser UX, but worth auditing: preserve deliberate messages, sanitize any bubble-up from lower layers.
|
||||
- [x] ~~Password length clamp before bcrypt.~~ Verified — Pydantic `max_length=72`.
|
||||
- [ ] Contract test asserting every protected route returns 401 unauthenticated and 403 for under-roled.
|
||||
- [ ] Field allow-list on viewer responses for attacker / user / bounty serializers.
|
||||
@@ -357,3 +358,4 @@ In priority order:
|
||||
|------|--------|--------|
|
||||
| 2026-04-23 | Initial scaffold. System context + Dashboard↔API as first worked component. | ANTI |
|
||||
| 2026-04-23 | F1 Authn: 3 threats moved from **?** to **M** (rate limit shipped; uniform 401 verified; bcrypt length clamp verified). Added DA-08 accepted risk: reverse-proxy per-IP bucket collapse. | ANTI |
|
||||
| 2026-04-23 | F1/I "traceback / stack trace leakage" moved from **?** to **M** via generic Exception handler with `error_id` correlation. Added follow-up checklist entry for `detail=str(e)` sites in fleet deploy router. | ANTI |
|
||||
|
||||
129
tests/api/test_error_handler.py
Normal file
129
tests/api/test_error_handler.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""Tests for the generic Exception handler at decnet/web/api.py.
|
||||
|
||||
Mitigation target: threat model F1/I — "Production error handler suppresses
|
||||
tracebacks and internal details." Verifies that uncaught exceptions return
|
||||
an opaque 500 with a correlation id in prod, and include debug detail only
|
||||
when DECNET_DEVELOPER is on.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import AsyncGenerator
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from decnet.env import DECNET_ADMIN_USER, DECNET_ADMIN_PASSWORD
|
||||
from decnet.web.api import app
|
||||
from decnet.web.dependencies import repo
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def client() -> AsyncGenerator[httpx.AsyncClient, None]:
|
||||
"""Override the shared client fixture to NOT re-raise app exceptions.
|
||||
|
||||
The default `httpx.ASGITransport` re-raises any uncaught exception
|
||||
from the app — which defeats the whole point of testing our
|
||||
generic exception handler. With `raise_app_exceptions=False`, the
|
||||
transport instead returns the HTTP response our handler built.
|
||||
"""
|
||||
transport = httpx.ASGITransport(app=app, raise_app_exceptions=False)
|
||||
async with httpx.AsyncClient(transport=transport, base_url="http://test") as ac:
|
||||
yield ac
|
||||
|
||||
|
||||
async def _admin_headers(client: httpx.AsyncClient) -> dict[str, str]:
|
||||
resp = await client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": DECNET_ADMIN_USER, "password": DECNET_ADMIN_PASSWORD},
|
||||
)
|
||||
token = resp.json()["access_token"]
|
||||
# Clear must_change_password so the token passes mutation-gated endpoints.
|
||||
await client.post(
|
||||
"/api/v1/auth/change-password",
|
||||
json={"old_password": DECNET_ADMIN_PASSWORD, "new_password": DECNET_ADMIN_PASSWORD},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
resp2 = await client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": DECNET_ADMIN_USER, "password": DECNET_ADMIN_PASSWORD},
|
||||
)
|
||||
return {"Authorization": f"Bearer {resp2.json()['access_token']}"}
|
||||
|
||||
|
||||
def _raise_boom(*_a, **_kw):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_unhandled_exception_prod_shape_is_opaque(
|
||||
client: httpx.AsyncClient,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Prod mode (DECNET_DEVELOPER=False): 500 with opaque body + error_id.
|
||||
Must NOT include traceback or exception_type."""
|
||||
import decnet.web.api as _api
|
||||
monkeypatch.setattr(_api, "DECNET_DEVELOPER", False)
|
||||
|
||||
headers = await _admin_headers(client)
|
||||
monkeypatch.setattr(repo, "get_attacker_by_uuid", _raise_boom)
|
||||
|
||||
r = await client.get("/api/v1/attackers/any-uuid", headers=headers)
|
||||
|
||||
assert r.status_code == 500
|
||||
body = r.json()
|
||||
assert body.get("detail") == "Internal Server Error"
|
||||
assert "error_id" in body
|
||||
assert re.fullmatch(r"[0-9a-f]{32}", body["error_id"]), body["error_id"]
|
||||
assert "traceback" not in body
|
||||
assert "exception_type" not in body
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_unhandled_exception_dev_shape_includes_traceback(
|
||||
client: httpx.AsyncClient,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Dev mode (DECNET_DEVELOPER=True): body includes exception_type and
|
||||
traceback so failures are debuggable without tailing server logs."""
|
||||
import decnet.web.api as _api
|
||||
monkeypatch.setattr(_api, "DECNET_DEVELOPER", True)
|
||||
|
||||
headers = await _admin_headers(client)
|
||||
monkeypatch.setattr(repo, "get_attacker_by_uuid", _raise_boom)
|
||||
|
||||
r = await client.get("/api/v1/attackers/any-uuid", headers=headers)
|
||||
|
||||
assert r.status_code == 500
|
||||
body = r.json()
|
||||
assert body["detail"] == "Internal Server Error"
|
||||
assert "error_id" in body
|
||||
assert body["exception_type"] == "RuntimeError"
|
||||
assert "RuntimeError" in body["traceback"]
|
||||
assert "boom" in body["traceback"]
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_unhandled_exception_logs_error_id(
|
||||
client: httpx.AsyncClient,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""The same error_id returned to the client must appear in server logs,
|
||||
so operators can correlate a user's 500 report with the full traceback."""
|
||||
import decnet.web.api as _api
|
||||
monkeypatch.setattr(_api, "DECNET_DEVELOPER", False)
|
||||
|
||||
headers = await _admin_headers(client)
|
||||
monkeypatch.setattr(repo, "get_attacker_by_uuid", _raise_boom)
|
||||
|
||||
with caplog.at_level(logging.ERROR, logger="api"):
|
||||
r = await client.get("/api/v1/attackers/any-uuid", headers=headers)
|
||||
|
||||
assert r.status_code == 500
|
||||
error_id = r.json()["error_id"]
|
||||
assert any(error_id in rec.getMessage() for rec in caplog.records), (
|
||||
f"error_id {error_id} not found in captured logs: "
|
||||
f"{[rec.getMessage() for rec in caplog.records]}"
|
||||
)
|
||||
Reference in New Issue
Block a user