fix(security): close MEDIUM ASVS findings — JWT pinning, SSE tickets, SSRF, mTLS pin, rate limits + correctness bugs

Auth (V2.1.1/V3.1.2, V2.1.3, V3.1.1):
- Pin JWT iss/aud/typ at mint and require+verify them at decode; revocation
  (jti denylist + tokens_valid_from) still enforced.
- Change-password now requires min_length=12.
- SSE auth moves off JWT-in-URL to a single-use 60s opaque ticket
  (POST /auth/sse-ticket); raw JWT in query no longer authenticates a stream.
  Removed dead fail-open get_stream_user helper.

Egress (V5.1.1, V9.1.1/V14.1.3):
- Webhook delivery + CRUD reject SSRF destinations (private/loopback/link-local/
  metadata, IPv4-mapped, multi-A-record) via resolved-IP validation, pin to the
  vetted IP, and never auto-follow redirects. Opt-out via DECNET_WEBHOOK_ALLOW_PRIVATE.
- UpdaterClient pins the worker leaf cert SHA-256 against the stored per-host
  fingerprint (fail closed on missing/mismatch); DECNET_VERIFY_HOSTNAME now
  defaults True.

Hardening (V13.1.3, V4.1.4, V13.1.2):
- Rate-limit change-password (5/min), enroll-bundle (10/min), webhook-create
  (20/min), host-delete (20/min) via the existing slowapi limiter.
- Correct false 'global auth middleware' comment; document enroll-bundle proxy
  trust.

Correctness (BUG-7..11):
- BUG-7 unbound bus in finally; BUG-8 apply_ceiling clamps to min(base,ceiling);
  BUG-9 commit before emit; BUG-10 multi-actor rearm for sub-threshold identities;
  BUG-11 normalize naive timestamps to UTC.

Already-closed (no change): V14.1.1, V2.1.2/V3.1.3, V5.1.2. Tests added for
every fix; unanimous adversarial review.
This commit is contained in:
2026-06-10 12:32:15 -04:00
parent 6a8af315fb
commit d80e6aa6d1
37 changed files with 1414 additions and 121 deletions

View File

@@ -344,11 +344,11 @@ async def tick_multi_actor(
for entry in candidates:
identity_uuid = str(entry["identity_uuid"])
primitives: list[str] = sorted(entry.get("primitives") or [])
seen_now.add(identity_uuid)
if len(primitives) < _T.MULTI_ACTOR_MIN_PRIMITIVES:
# Repo already filters to >= 2 today; defensive against
# future schema drift.
continue
seen_now.add(identity_uuid)
signature = frozenset(primitives)
if last_fired.get(identity_uuid) == signature:
continue

View File

@@ -139,13 +139,20 @@ def record_fingerprint(
"ts": ts.isoformat(),
}
if publish_fn is not None:
publish_fn(_ROTATED_EVENT_TYPE, payload)
if syslog_fn is not None:
syslog_fn(_ROTATED_EVENT_TYPE, payload)
session.commit()
try:
if publish_fn is not None:
publish_fn(_ROTATED_EVENT_TYPE, payload)
if syslog_fn is not None:
syslog_fn(_ROTATED_EVENT_TYPE, payload)
except Exception: # noqa: BLE001
import logging as _logging
_logging.getLogger(__name__).warning(
"fingerprint_rotation: post-commit emit failed (state already durable)",
exc_info=True,
)
return RotationOutcome(
kind="rotated",
old_hash=old_hash,

View File

@@ -19,7 +19,7 @@ from __future__ import annotations
import re
from dataclasses import dataclass, field
from datetime import datetime
from datetime import datetime, timezone
from typing import Literal
# RFC 5424 line structure
@@ -159,6 +159,8 @@ def parse_line(line: str) -> LogEvent | None:
timestamp = datetime.fromisoformat(ts_raw)
except ValueError:
return None
if timestamp.tzinfo is None:
timestamp = timestamp.replace(tzinfo=timezone.utc)
fields = _parse_sd_params(sd_rest)
if sd_rest.startswith("-"):

View File

@@ -204,13 +204,26 @@ _cors_raw: str = os.environ.get("DECNET_CORS_ORIGINS", _cors_default)
DECNET_CORS_ORIGINS: list[str] = [o.strip() for o in _cors_raw.split(",") if o.strip()]
# Master→worker mTLS hostname verification. Off by default because legacy
# enrollments were issued certs with operator-supplied SAN lists that may
# not match the URL the master uses to connect; set to "true" on a fresh
# production deploy where you control enrollment to get TLS hostname checks
# on top of CA + fingerprint pinning.
# Master→worker mTLS hostname verification. ON by default — the worker's
# cert SAN must match the address the master connects to, on top of CA +
# SHA-256 fingerprint pinning. Operators with legacy enrollments whose
# operator-supplied SAN lists don't match the connect URL can opt OUT
# explicitly with DECNET_VERIFY_HOSTNAME=false, but that is a downgrade:
# it drops SAN binding and leans entirely on CA + per-host pinning.
DECNET_VERIFY_HOSTNAME: bool = (
os.environ.get("DECNET_VERIFY_HOSTNAME", "false").lower() == "true"
os.environ.get("DECNET_VERIFY_HOSTNAME", "true").lower() == "true"
)
# Webhook egress SSRF guard. By default DECNET refuses to deliver a webhook
# to a private (RFC1918), loopback, link-local (incl. 169.254.169.254 cloud
# metadata), unspecified, reserved, or multicast destination, and rejects
# such URLs at registration time. Operators who genuinely need to target an
# internal receiver (e.g. an on-box SIEM) opt IN explicitly by setting
# DECNET_WEBHOOK_ALLOW_PRIVATE=true. Fails closed: anything other than the
# literal "true" leaves the guard fully enabled.
DECNET_WEBHOOK_ALLOW_PRIVATE: bool = (
os.environ.get("DECNET_WEBHOOK_ALLOW_PRIVATE", "false").lower() == "true"
)

View File

@@ -498,6 +498,7 @@ async def _run_smtp_probe_listener(
probe_limit times — if not, forward via the master's real internet
connection and store a probe_relay bounty with the result.
"""
bus = None
try:
bus = get_bus(client_name="orchestrator-probe")
await bus.connect()
@@ -515,8 +516,9 @@ async def _run_smtp_probe_listener(
except Exception as exc: # noqa: BLE001
logger.warning("smtp probe listener: bus unavailable: %s", exc)
finally:
with contextlib.suppress(Exception):
await bus.close()
if bus is not None:
with contextlib.suppress(Exception):
await bus.close()
async def _handle_probe_pending(repo: BaseRepository, payload: dict) -> None:

View File

@@ -112,11 +112,11 @@ class AgentClient:
"""Either pass a SwarmHost dict, or explicit address/port.
``verify_hostname`` defers to ``DECNET_VERIFY_HOSTNAME`` when the
caller doesn't pass an explicit value — production deploys flip
the env var on so the worker's cert SAN must match the address
the master connects to, on top of the existing CA + fingerprint
pin. Defaults to False so dev/test enrollments with mismatched
SANs keep working unchanged.
caller doesn't pass an explicit value — the worker's cert SAN must
match the address the master connects to, on top of the existing CA
+ fingerprint pin. Defaults to True; operators opt out explicitly
via ``DECNET_VERIFY_HOSTNAME=false`` for dev/test enrollments with
mismatched SANs.
"""
if verify_hostname is None:
from decnet.env import DECNET_VERIFY_HOSTNAME
@@ -155,9 +155,10 @@ class AgentClient:
)
ctx.load_verify_locations(cafile=str(self._identity.ca_cert_path))
ctx.verify_mode = ssl.CERT_REQUIRED
# Pin by CA + cert chain, not by DNS — workers enroll with arbitrary
# SANs (IPs, hostnames) and we don't want to force operators to keep
# those in sync with whatever URL the master happens to use.
# Pin by CA + cert chain; hostname verification is on by default
# (DECNET_VERIFY_HOSTNAME=true) so the cert SAN must match the
# master's connect address. Operators set the env var to false only
# for dev/test enrollments with mismatched SANs.
ctx.check_hostname = self._verify_hostname
return httpx.AsyncClient(
base_url=f"https://{self._address}:{self._port}",

View File

@@ -13,14 +13,20 @@ the connection on purpose (the updater re-execs itself mid-response).
"""
from __future__ import annotations
import asyncio
import hashlib
import socket
import ssl
from typing import Any, Optional
import httpx
from decnet.logging import get_logger
from decnet.swarm.client import MasterIdentity, ensure_master_identity
from decnet.swarm.client import (
FingerprintMismatchError,
MasterIdentity,
ensure_master_identity,
)
log = get_logger("swarm.updater_client")
@@ -47,11 +53,19 @@ class UpdaterClient:
if host is not None:
self._address = host["address"]
self._host_name = host.get("name")
# SHA-256 of the worker's UPDATER leaf cert, recorded at enroll
# time (api_enroll_host.py writes ``updater_cert_fingerprint``).
# This is a distinct identity from the agent cert AgentClient
# pins — the updater channel pip-installs code as root, so it
# gets its own pin against its own cert.
fp = host.get("updater_cert_fingerprint")
self._expected_fingerprint = fp.lower() if isinstance(fp, str) else None
else:
if address is None:
raise ValueError("UpdaterClient requires host dict or address")
self._address = address
self._host_name = None
self._expected_fingerprint = None
self._port = updater_port
self._identity = identity or ensure_master_identity()
self._client: Optional[httpx.AsyncClient] = None
@@ -70,8 +84,64 @@ class UpdaterClient:
timeout=timeout,
)
def _fetch_peer_fingerprint(self) -> str:
"""Open a throwaway TLS connection to the updater port and return the
SHA-256 hex of the leaf cert it presents. Mirrors
``AgentClient._fetch_peer_fingerprint`` exactly."""
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
ctx.load_cert_chain(
str(self._identity.cert_path), str(self._identity.key_path),
)
ctx.load_verify_locations(cafile=str(self._identity.ca_cert_path))
ctx.verify_mode = ssl.CERT_REQUIRED
ctx.check_hostname = self._verify_hostname
sock = socket.create_connection((self._address, self._port), timeout=10.0)
try:
server_hostname = self._address if self._verify_hostname else None
with ctx.wrap_socket(sock, server_hostname=server_hostname) as ssock:
der = ssock.getpeercert(binary_form=True)
finally:
try:
sock.close()
except OSError:
pass
if not der:
raise FingerprintMismatchError(
f"{self._address}:{self._port}", self._expected_fingerprint or "", ""
)
return hashlib.sha256(der).hexdigest().lower()
async def _verify_pin(self) -> None:
"""Fail closed unless the updater leaf cert SHA-256 matches the pin.
Unlike ``AgentClient`` (which falls through to CA-only when no pin is
recorded), the updater channel pip-installs code as root — so a host
with NO recorded ``updater_cert_fingerprint`` is rejected outright
rather than accepted on CA validity alone. A missing pin means the
host was never enrolled with an updater identity; we refuse to drive
code into it."""
if not self._expected_fingerprint:
raise FingerprintMismatchError(
f"{self._address}:{self._port}",
"<no updater_cert_fingerprint recorded for host>",
"",
)
actual = await asyncio.to_thread(self._fetch_peer_fingerprint)
if actual != self._expected_fingerprint:
raise FingerprintMismatchError(
f"{self._address}:{self._port}",
self._expected_fingerprint,
actual,
)
async def __aenter__(self) -> "UpdaterClient":
self._client = self._build_client(_TIMEOUT_CONTROL)
try:
await self._verify_pin()
except BaseException:
await self._client.aclose()
self._client = None
raise
return self
async def __aexit__(self, *exc: Any) -> None:

View File

@@ -40,7 +40,7 @@ def apply_ceiling(base: float, state: "RuleState") -> float:
"""Apply the operator's confidence ceiling, downward only.
A ``clipped`` state with ``confidence_max < 1.0`` clamps the emitted
confidence to ``min(base, base * ceiling)``. Any other state is a
confidence to ``min(base, ceiling)``. Any other state is a
no-op. The clamp is downward by construction — operator clips can
never raise a rule's confidence above its YAML-declared base, per
TTP_TAGGING.md §"Confidence model".
@@ -50,7 +50,7 @@ def apply_ceiling(base: float, state: "RuleState") -> float:
ceiling = state.confidence_max
if ceiling is None or ceiling >= 1.0:
return base
return min(base, base * ceiling)
return min(base, ceiling)
__all__ = ["is_active", "apply_ceiling"]

View File

@@ -11,6 +11,14 @@ SECRET_KEY: str = DECNET_JWT_SECRET
ALGORITHM: str = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES: int = DECNET_JWT_EXP_MINUTES
# Pinned issuer/audience/type so a token signed with DECNET_JWT_SECRET for any
# OTHER purpose (or by a future co-tenant of the secret) is not accepted by the
# dashboard verifier. Issuance stamps these; _decode_payload requires + verifies
# them. Keep these two modules in lockstep — they are a single trust contract.
JWT_ISSUER: str = "decnet"
JWT_AUDIENCE: str = "decnet-dashboard"
JWT_TYPE: str = "access"
def verify_password(plain_password: str, hashed_password: str) -> bool:
return bcrypt.checkpw(
@@ -45,5 +53,10 @@ def create_access_token(data: dict[str, Any], expires_delta: Optional[timedelta]
_to_encode.update({"exp": _expire})
_to_encode.update({"iat": datetime.now(timezone.utc)})
# Pin issuer / audience / token-type so the verifier can reject tokens
# minted for any other purpose with the same shared secret.
_to_encode.setdefault("iss", JWT_ISSUER)
_to_encode.setdefault("aud", JWT_AUDIENCE)
_to_encode.setdefault("typ", JWT_TYPE)
_encoded_jwt: str = jwt.encode(_to_encode, SECRET_KEY, algorithm=ALGORITHM)
return _encoded_jwt

View File

@@ -50,7 +50,18 @@ class LoginRequest(BaseModel):
class ChangePasswordRequest(BaseModel):
old_password: str = PydanticField(..., max_length=72)
new_password: str = PydanticField(..., max_length=72)
# min_length=12 aligns with the DECNET_ADMIN_PASSWORD >=12 policy. The
# forced first-login flow routes through /auth/change-password, so without a
# floor a seeded admin could clear must_change_password with a 1-char secret.
new_password: str = PydanticField(..., min_length=12, max_length=72)
class SSETicketResponse(BaseModel):
"""Single-use, short-lived opaque ticket the dashboard exchanges its header
JWT for, then passes to an SSE endpoint as ?ticket= (EventSource cannot set
an Authorization header). See decnet.web.dependencies SSE ticket store."""
ticket: str
expires_in: int
# --- Configuration Models ---

View File

@@ -1,5 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
import asyncio
import secrets
import time
from datetime import datetime, timezone
from typing import Any, Optional
@@ -8,7 +9,13 @@ import jwt
from fastapi import HTTPException, status, Request
from fastapi.security import OAuth2PasswordBearer
from decnet.web.auth import ALGORITHM, SECRET_KEY
from decnet.web.auth import (
ALGORITHM,
JWT_AUDIENCE,
JWT_ISSUER,
JWT_TYPE,
SECRET_KEY,
)
from decnet.web.db.repository import BaseRepository
from decnet.web.db.factory import get_repository
@@ -168,13 +175,30 @@ def _epoch(value: Any) -> float:
def _decode_payload(token: str) -> dict[str, Any]:
"""Decode + signature/expiry-verify a raw JWT, or raise 401."""
"""Decode + signature/expiry-verify a raw JWT, or raise 401.
Beyond signature + expiry, this pins the issuer and audience and requires
the registered claims to be present, so a token minted with the same shared
secret for a different purpose (or omitting exp/iat/iss/aud) is rejected.
``uuid`` (not ``sub``) is this app's identity claim, so it is in ``require``.
``typ`` is a custom payload claim PyJWT does not validate natively, so it is
checked explicitly below.
"""
try:
payload: dict[str, Any] = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
payload: dict[str, Any] = jwt.decode(
token,
SECRET_KEY,
algorithms=[ALGORITHM],
audience=JWT_AUDIENCE,
issuer=JWT_ISSUER,
options={"require": ["exp", "iat", "iss", "aud", "uuid"]},
)
except jwt.PyJWTError:
raise _CREDENTIALS_EXCEPTION
if payload.get("uuid") is None:
raise _CREDENTIALS_EXCEPTION
if payload.get("typ") != JWT_TYPE:
raise _CREDENTIALS_EXCEPTION
return payload
@@ -236,17 +260,70 @@ async def get_token_claims(request: Request) -> dict[str, Any]:
return _decode_payload(token)
async def get_stream_user(request: Request, token: Optional[str] = None) -> str:
"""Auth dependency for SSE endpoints — accepts Bearer header OR ?token= query param.
EventSource does not support custom headers, so the query-string fallback is intentional here only.
# ---------------------------------------------------------------------------
# SSE stream tickets (V3.1.1)
# ---------------------------------------------------------------------------
# EventSource cannot set an Authorization header, so SSE auth historically rode
# in ?token=<JWT>, leaking the full-lifetime bearer into access/proxy logs,
# browser history, and Referer. Instead the client exchanges its header JWT for
# a single-use, short-lived OPAQUE ticket via POST /api/v1/auth/sse-ticket and
# connects with ?ticket=<opaque>. The JWT never appears in any URL.
#
# Security-boundary store — FAIL CLOSED. The map is keyed on the opaque ticket
# and holds (expiry_monotonic, bound_identity). Redemption validates presence +
# freshness, then DELETES the entry (single-use). Unknown / expired / reused
# tickets all resolve to 401.
#
# This is a MODULE-LEVEL dict: tickets live only in the process that minted
# them. A multi-process / multi-worker deployment needs a SHARED store (Redis,
# DB) so a ticket minted on worker A can be redeemed on worker B — out of scope
# here, deliberately. No background sweeper daemon (project rule: library, not
# new worker); expiry is enforced opportunistically on every redeem + mint.
_SSE_TICKET_TTL = 60.0 # seconds
_sse_tickets: dict[str, tuple[float, dict[str, Any]]] = {}
def _reset_sse_tickets() -> None:
"""Test hook: drop all outstanding stream tickets."""
_sse_tickets.clear()
def _sweep_sse_tickets(now: Optional[float] = None) -> None:
"""Opportunistic eviction of expired tickets. O(n) over a tiny map (tickets
are single-use and 60s-lived), called on every mint/redeem — no daemon."""
_now = time.monotonic() if now is None else now
expired = [t for t, (exp, _) in _sse_tickets.items() if exp <= _now]
for t in expired:
_sse_tickets.pop(t, None)
def mint_sse_ticket(user_uuid: str, role: str) -> str:
"""Mint a single-use, 60s opaque SSE ticket bound to ``user_uuid``+``role``.
Called by POST /auth/sse-ticket AFTER the header JWT has been validated, so
the bound identity is already trusted. Returns the opaque token the client
passes as ?ticket=. Sweeps expired entries on the way in.
"""
resolved = _bearer_from_header(request) or token
if not resolved:
_sweep_sse_tickets()
ticket = secrets.token_urlsafe(32)
expiry = time.monotonic() + _SSE_TICKET_TTL
_sse_tickets[ticket] = (expiry, {"uuid": user_uuid, "role": role})
return ticket
def _redeem_sse_ticket(ticket: str) -> dict[str, Any]:
"""Redeem a stream ticket: validate exists + unexpired, then DELETE it
(single-use). Returns the bound ``{"uuid","role"}`` identity or raises 401.
Fail closed: unknown / expired / already-redeemed all raise."""
now = time.monotonic()
_sweep_sse_tickets(now)
entry = _sse_tickets.pop(ticket, None) # pop = single-use, even on expiry
if entry is None:
raise _CREDENTIALS_EXCEPTION
# Decode-only: returns the uuid. Revocation/role enforcement happens in
# require_stream_role (the sole production caller), which runs the full
# _resolve_token path. Kept thin so its decode contract stays unit-testable.
return _decode_payload(resolved)["uuid"]
expiry, identity = entry
if expiry <= now:
raise _CREDENTIALS_EXCEPTION
return identity
async def get_current_user(request: Request) -> str:
@@ -298,18 +375,35 @@ def require_role(*allowed_roles: str):
def require_stream_role(*allowed_roles: str):
"""Like ``require_role`` but for SSE endpoints that accept a query-param token."""
async def _check(request: Request, token: Optional[str] = None) -> dict:
resolved = _bearer_from_header(request) or token
if not resolved:
"""Like ``require_role`` but for SSE endpoints.
Two ingress paths:
* Bearer header → full ``_resolve_token`` (revocation + cutoff enforced).
* ?ticket=<opaque> → single-use stream ticket minted by /auth/sse-ticket,
which already validated the header JWT and bound the uuid+role. The
ticket carries no jti, so the per-token denylist cannot apply here; the
60s single-use lifetime is the bounded exposure we accept for SSE.
Raw ?token=<JWT> is intentionally NOT accepted (V3.1.1)."""
async def _check(request: Request, ticket: Optional[str] = None) -> dict:
header_token = _bearer_from_header(request)
if header_token:
_user_uuid, user = await _resolve_token(header_token)
if user["role"] not in allowed_roles:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions",
)
return user
if not ticket:
raise _CREDENTIALS_EXCEPTION
_user_uuid, user = await _resolve_token(resolved)
if user["role"] not in allowed_roles:
identity = _redeem_sse_ticket(ticket)
if identity["role"] not in allowed_roles:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Insufficient permissions",
)
return user
return identity
return _check

View File

@@ -4,6 +4,7 @@ from fastapi import APIRouter
from .auth.api_login import router as login_router
from .auth.api_change_pass import router as change_pass_router
from .auth.api_logout import router as logout_router
from .auth.api_sse_ticket import router as sse_ticket_router
from .logs.api_get_logs import router as logs_router
from .logs.api_get_histogram import router as histogram_router
from .bounty.api_get_bounties import router as bounty_router
@@ -75,9 +76,12 @@ from .ttp.api_export_navigator import router as ttp_navigator_router
from .ttp.api_get_groups_for_technique import router as ttp_groups_for_technique_router
api_router = APIRouter(
# Every route under /api/v1 is auth-guarded (either by an explicit
# require_* Depends or by the global auth middleware). Document 401/403
# here so the OpenAPI schema reflects reality for contract tests.
# Auth is enforced PER ROUTE via explicit ``require_*`` Depends (see
# decnet.web.dependencies) — there is NO global auth middleware. A route
# without a require_* dependency is unauthenticated BY DESIGN; the only such
# routes are /health (liveness) and /auth/login (credential exchange).
# The 401/403 entries below are documented here so the OpenAPI schema
# reflects reality for contract tests, not because a middleware applies them.
responses={
400: {"description": "Malformed request body"},
401: {"description": "Missing or invalid credentials"},
@@ -91,6 +95,7 @@ api_router = APIRouter(
api_router.include_router(login_router)
api_router.include_router(change_pass_router)
api_router.include_router(logout_router)
api_router.include_router(sse_ticket_router)
# Logs & Analytics
api_router.include_router(logs_router)

View File

@@ -10,8 +10,9 @@ stream's attacker. Emits a one-shot snapshot on connect (latest
observation per primitive) so the panel hydrates immediately.
Authorization mirrors :mod:`decnet.web.router.topology.api_events` —
JWT via the ``?token=`` query parameter (EventSource can't set
arbitrary headers) + ``require_stream_viewer`` role gate. The 404
a single-use opaque ticket via the ``?ticket=`` query parameter
(EventSource can't set arbitrary headers) + ``require_stream_viewer``
role gate. The 404
fires after auth so an existence probe can't leak an attacker UUID
to an unauthenticated caller.

View File

@@ -2,12 +2,13 @@
from datetime import datetime, timezone
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi import APIRouter, Depends, HTTPException, Request, status
from decnet.telemetry import traced as _traced
from decnet.web.auth import ahash_password, averify_password
from decnet.web.dependencies import get_current_user_unchecked, invalidate_user_cache, repo
from decnet.web.db.models import ChangePasswordRequest, MessageResponse
from decnet.web.limiter import limiter
router = APIRouter()
@@ -19,19 +20,21 @@ router = APIRouter()
responses={
400: {"description": "Bad Request (e.g. malformed JSON)"},
401: {"description": "Could not validate credentials"},
422: {"description": "Validation error"}
422: {"description": "Validation error"},
429: {"description": "Too many password-change attempts — retry after the window resets"},
},
)
@limiter.limit("5/minute")
@_traced("api.change_password")
async def change_password(request: ChangePasswordRequest, current_user: str = Depends(get_current_user_unchecked)) -> dict[str, str]:
async def change_password(request: Request, body: ChangePasswordRequest, current_user: str = Depends(get_current_user_unchecked)) -> dict[str, str]:
_user: Optional[dict[str, Any]] = await repo.get_user_by_uuid(current_user)
if not _user or not await averify_password(request.old_password, _user["password_hash"]):
if not _user or not await averify_password(body.old_password, _user["password_hash"]):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect old password",
)
_new_hash: str = await ahash_password(request.new_password)
_new_hash: str = await ahash_password(body.new_password)
await repo.update_user_password(current_user, _new_hash, must_change_password=False)
# Changing a password revokes every existing session for this user (incl.
# the current one): the caller's next request 401s and re-authenticates.

View File

@@ -0,0 +1,39 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Mint a single-use, short-lived SSE stream ticket (V3.1.1).
EventSource cannot send an Authorization header, so SSE auth used to ride in
``?token=<JWT>`` — leaking the full-lifetime bearer into access/proxy logs,
browser history, and Referer. This endpoint lets an already-authenticated
client (gated by the NORMAL header JWT via ``require_viewer``) exchange that
header credential for an opaque ``secrets.token_urlsafe(32)`` ticket, valid for
60s and single-use, which it then passes to the SSE endpoint as ``?ticket=``.
The JWT never appears in any URL.
The ticket store lives in-process (decnet.web.dependencies); multi-process
deployments need a shared store — out of scope, see that module's note.
"""
from fastapi import APIRouter, Depends
from decnet.telemetry import traced as _traced
from decnet.web.dependencies import mint_sse_ticket, require_viewer, _SSE_TICKET_TTL
from decnet.web.db.models.auth import SSETicketResponse
router = APIRouter()
@router.post(
"/auth/sse-ticket",
tags=["Authentication"],
response_model=SSETicketResponse,
responses={
400: {"description": "Malformed request body"},
401: {"description": "Missing or invalid credentials"},
403: {"description": "Authenticated but not authorized"},
},
)
@_traced("api.sse_ticket")
async def mint_stream_ticket(user: dict = Depends(require_viewer)) -> SSETicketResponse:
"""Exchange the presented header JWT for a single-use 60s SSE ticket bound to
this user's uuid + role. Any authenticated (viewer or admin) user may mint."""
ticket = mint_sse_ticket(user["uuid"], user["role"])
return SSETicketResponse(ticket=ticket, expires_in=int(_SSE_TICKET_TTL))

View File

@@ -6,8 +6,9 @@ request and forwards each matching event as a Server-Sent Event.
Emits a one-shot snapshot on connect (current paginated campaign
list).
Mirror of :mod:`decnet.web.router.identities.api_events`. Auth: JWT
via ``?token=`` query param + ``require_stream_viewer`` role.
Mirror of :mod:`decnet.web.router.identities.api_events`. Auth:
single-use opaque ticket via ``?ticket=`` query param +
``require_stream_viewer`` role.
"""
from __future__ import annotations

View File

@@ -8,8 +8,9 @@ Server-Sent Event to the browser. Emits a one-shot snapshot on connect
fetch to initialise.
Authorization mirrors :mod:`decnet.web.router.topology.api_events` — a
JWT passed via the ``?token=`` query parameter (EventSource can't set
arbitrary headers) + ``require_stream_viewer`` role gate.
single-use opaque ticket passed via the ``?ticket=`` query parameter
(EventSource can't set arbitrary headers) + ``require_stream_viewer``
role gate.
The endpoint is broadly scoped (every identity event, not per-uuid)
because both ``AttackerDetail`` and ``IdentityDetail`` need the same

View File

@@ -12,12 +12,13 @@ from __future__ import annotations
import pathlib
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi import APIRouter, Depends, HTTPException, Request, status
from decnet.logging import get_logger
from decnet.swarm.client import AgentClient
from decnet.web.db.repository import BaseRepository
from decnet.web.dependencies import get_repo, require_admin
from decnet.web.limiter import limiter
from decnet.web.router.swarm._mtls import PeerCert, require_operator_cert
log = get_logger("swarm.decommission")
@@ -32,10 +33,13 @@ router = APIRouter()
401: {"description": "Missing or invalid admin JWT"},
403: {"description": "Authenticated user is not an admin, or operator cert missing"},
404: {"description": "No host with this UUID is enrolled"},
429: {"description": "Too many decommission requests — retry after the window resets"},
},
)
@limiter.limit("20/minute")
async def api_decommission_host(
uuid: str,
request: Request,
repo: BaseRepository = Depends(get_repo),
_admin: dict = Depends(require_admin),
_operator: PeerCert = Depends(require_operator_cert),

View File

@@ -34,6 +34,7 @@ from decnet.swarm.bundle_builder import build_tarball, render_bootstrap
from decnet.web.db.models.swarm import EnrollBundleRequest, EnrollBundleResponse
from decnet.web.db.repository import BaseRepository
from decnet.web.dependencies import get_repo, require_admin
from decnet.web.limiter import limiter
log = get_logger("swarm_mgmt.enroll_bundle")
@@ -117,8 +118,10 @@ async def _lookup_live(token: str) -> _Bundle:
403: {"description": "Insufficient permissions"},
409: {"description": "A worker with this name is already enrolled"},
422: {"description": "Request body validation error"},
429: {"description": "Too many enroll-bundle requests — retry after the window resets"},
},
)
@limiter.limit("10/minute")
async def create_enroll_bundle(
req: EnrollBundleRequest,
request: Request,
@@ -251,6 +254,14 @@ async def get_payload(
# The agent's first connect-back — its source IP is the reachable address
# the master will later use to probe it. Backfill the SwarmHost row here
# so the operator sees the real address instead of an empty placeholder.
#
# PROXY TRUST WARNING: `request.client.host` is the TCP peer's IP.
# If this endpoint sits behind a TCP-terminating reverse proxy (nginx,
# HAProxy, etc.) the recorded address will be the proxy's IP, not the
# agent's. Either bind the API directly on the network reachable by
# agents, or configure the proxy to preserve the original source IP
# (e.g. PROXY Protocol on a loopback listener, *not* X-Forwarded-For
# which is trivially spoofable). See THREAT_MODEL.md §DA-08.
client_host = request.client.host if request.client else ""
if client_host:
try:

View File

@@ -8,8 +8,9 @@ a Server-Sent Event to the browser. Emits a one-shot snapshot on connect
separate fetch to initialise the "pending" buffer.
Authorization matches :mod:`decnet.web.router.stream.api_stream_events`
— a JWT passed via the ``?token=`` query parameter (EventSource can't
set arbitrary headers) + ``require_stream_viewer`` role gate. The
— a single-use opaque ticket passed via the ``?ticket=`` query
parameter (EventSource can't set arbitrary headers) +
``require_stream_viewer`` role gate. The
per-topology 404 is enforced after auth so existence probes can't leak
a topology id to an unauthenticated caller.
"""

View File

@@ -7,7 +7,7 @@ import secrets
from datetime import datetime, timezone
from typing import Any, cast
from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException, Request
from decnet.bus import topics as _topics
from decnet.bus.app import get_app_bus
@@ -22,13 +22,28 @@ from decnet.web.db.models import (
)
from decnet.web.db.models.webhooks import _row_to_response_dict
from decnet.web.dependencies import repo, require_admin
from decnet.web.limiter import limiter
from decnet.webhook.enums import merge_patterns
from decnet.webhook.ssrf import WebhookDestinationError, validate_webhook_url
log = get_logger("api.webhooks")
router = APIRouter()
def _validate_url_or_422(url: str) -> None:
"""Reject a webhook URL that resolves to a forbidden destination.
Runs the same SSRF guard the delivery path enforces, but at
registration time so a bad URL is surfaced to the operator as a clear
422 instead of being silently dropped on every delivery attempt.
"""
try:
validate_webhook_url(url)
except WebhookDestinationError as e:
raise HTTPException(status_code=422, detail=str(e)) from e
async def _notify_subscriptions_changed() -> None:
"""Publish `system.webhook.subscriptions_changed` on the bus.
@@ -60,10 +75,14 @@ def _row_to_response(row: dict[str, Any]) -> WebhookResponse:
responses={
400: {"description": "At least one of simple_events / topic_patterns required"},
409: {"description": "Name already in use"},
422: {"description": "URL resolves to a forbidden (internal) destination"},
429: {"description": "Too many webhook-create requests — retry after the window resets"},
},
)
@limiter.limit("20/minute")
@_traced("api.webhook.create")
async def api_create_webhook(
request: Request,
req: WebhookCreateRequest,
admin: dict = Depends(require_admin),
) -> WebhookCreateResponse:
@@ -78,6 +97,8 @@ async def api_create_webhook(
if existing:
raise HTTPException(status_code=409, detail="Webhook name already exists")
_validate_url_or_422(str(req.url))
# Auto-generate a URL-safe secret if the caller didn't provide one.
# 32 bytes of os-entropy is the same ballpark as a CSRF token.
secret = req.secret or secrets.token_urlsafe(32)
@@ -146,6 +167,7 @@ async def api_get_webhook(
400: {"description": "Empty or invalid patch"},
404: {"description": "Webhook not found"},
409: {"description": "Name already in use"},
422: {"description": "URL resolves to a forbidden (internal) destination"},
},
)
@_traced("api.webhook.update")
@@ -167,6 +189,7 @@ async def api_update_webhook(
patch["name"] = req.name
if req.url is not None:
_validate_url_or_422(str(req.url))
patch["url"] = str(req.url)
if req.secret is not None:

View File

@@ -22,6 +22,11 @@ import httpx
import orjson
from decnet.logging import get_logger
from decnet.webhook.ssrf import (
ValidatedDestination,
WebhookDestinationError,
validate_webhook_url,
)
log = get_logger("webhook.client")
@@ -121,6 +126,51 @@ def _jittered(delay: float) -> float:
return delay * random.uniform(_JITTER_LOW, _JITTER_HIGH) # nosec B311
def _build_pinned_request(
client: httpx.AsyncClient,
url: str,
dest: ValidatedDestination,
body: bytes,
headers: dict[str, str],
) -> httpx.Request:
"""Build a POST request pinned to a validated IP.
Defeats DNS rebinding: instead of letting httpx re-resolve the hostname
at connect time (which an attacker-controlled DNS could flip to an
internal IP after our check passed), we point the connection at one of
the IPs we already validated, while preserving the original ``Host``
header and TLS SNI so the receiver and certificate validation still see
the real hostname.
"""
pinned_ip = dest.ip_addresses[0]
# httpx brackets IPv6 hosts itself — pass the bare IP.
pinned_url = httpx.URL(url).copy_with(host=pinned_ip)
req_headers = dict(headers)
# Preserve virtual-host routing on the receiver.
req_headers.setdefault("Host", _host_header(dest.host, dest.port, dest.scheme))
# Keep TLS SNI + cert hostname validation bound to the real host, not
# the bare IP we connect to.
extensions = {"sni_hostname": dest.host} if dest.scheme == "https" else {}
return client.build_request(
"POST",
pinned_url,
content=body,
headers=req_headers,
extensions=extensions,
)
def _host_header(host: str, port: int, scheme: str) -> str:
default_port = 443 if scheme == "https" else 80
host_part = f"[{host}]" if ":" in host else host
if port == default_port:
return host_part
return f"{host_part}:{port}"
async def deliver(
sub: dict[str, Any],
event: Any,
@@ -148,6 +198,15 @@ async def deliver(
headers = _build_headers(sub["secret"], body, topic, eid)
url = sub["url"]
# SSRF guard: resolve + validate the destination before any connect.
# Fail closed and treat a forbidden destination as terminal (no retry —
# the URL itself is the problem, not a transient network condition).
try:
dest = validate_webhook_url(url)
except WebhookDestinationError as e:
log.warning("webhook delivery blocked by SSRF guard: %s", e)
return DeliveryResult(ok=False, status_code=None, error=str(e), attempts=0)
owns_client = client is None
if client is None:
client = httpx.AsyncClient(timeout=timeout_s)
@@ -157,7 +216,8 @@ async def deliver(
try:
for attempt in range(1, max_attempts + 1):
try:
resp = await client.post(url, content=body, headers=headers)
request = _build_pinned_request(client, url, dest, body, headers)
resp = await client.send(request, follow_redirects=False)
last_status = resp.status_code
if 200 <= resp.status_code < 300:
return DeliveryResult(

151
decnet/webhook/ssrf.py Normal file
View File

@@ -0,0 +1,151 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""SSRF egress guard for outbound webhook delivery.
Admin-supplied webhook URLs are attacker-influenceable (anyone able to
write a subscription row). Without a destination check the master can be
pointed at internal services — cloud metadata (169.254.169.254), the
loopback API, RFC1918 hosts — turning the egress path into an SSRF
primitive.
This module resolves the URL host to concrete IPs and rejects any that
are private / loopback / link-local / unspecified / reserved / multicast,
and rejects non-http(s) schemes. It returns the *validated* IP set so the
caller can connect to a checked address rather than re-resolving (which a
DNS-rebinding attacker could flip between the validation and the connect).
Fail closed: the guard is fully active unless the operator explicitly opts
out via ``DECNET_WEBHOOK_ALLOW_PRIVATE=true``.
"""
from __future__ import annotations
import ipaddress
import socket
from dataclasses import dataclass
from typing import Optional
from urllib.parse import urlsplit
_ALLOWED_SCHEMES = frozenset({"http", "https"})
class WebhookDestinationError(ValueError):
"""Raised when a webhook URL resolves to a forbidden destination.
Subclasses ``ValueError`` so the CRUD layer can turn it into a 422 and
the delivery layer can treat it as a terminal (non-retryable) failure.
"""
@dataclass(frozen=True)
class ValidatedDestination:
"""Result of a successful guard check.
``ip_addresses`` is the set of validated literal IPs the URL host
resolved to. Connecting to one of these (instead of re-resolving the
hostname) closes the DNS-rebinding window.
"""
host: str
port: int
scheme: str
ip_addresses: tuple[str, ...]
def _is_forbidden(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
"""Block anything that is not a routable public address.
``is_global`` is the inverse of the union we care about, but we spell
out the categories so the intent (and the audit mapping) is explicit
and so we also catch reserved/multicast that ``is_private`` misses.
"""
if (
ip.is_private # RFC1918 10/8, 172.16/12, 192.168/16, fc00::/7
or ip.is_loopback # 127/8, ::1
or ip.is_link_local # 169.254/16 (incl. 169.254.169.254), fe80::/10
or ip.is_unspecified # 0.0.0.0, ::
or ip.is_reserved
or ip.is_multicast
):
return True
# IPv4-mapped IPv6 (::ffff:a.b.c.d) hides a v4 address from the checks
# above; unwrap and re-check so 127.0.0.1 can't sneak in as ::ffff:7f00:1.
mapped = getattr(ip, "ipv4_mapped", None)
if mapped is not None:
return _is_forbidden(mapped)
return False
def _resolve(host: str, port: int) -> tuple[str, ...]:
"""Resolve *host* to the set of literal IPs it points at.
A bare IP literal short-circuits getaddrinfo. DNS failures raise
``WebhookDestinationError`` (fail closed — we never deliver to a host
we couldn't resolve and check)."""
try:
ipaddress.ip_address(host)
return (host,)
except ValueError:
pass
try:
infos = socket.getaddrinfo(host, port, proto=socket.IPPROTO_TCP)
except socket.gaierror as exc:
raise WebhookDestinationError(
f"webhook host {host!r} did not resolve: {exc}"
) from exc
addrs = {str(info[4][0]) for info in infos}
if not addrs:
raise WebhookDestinationError(f"webhook host {host!r} resolved to nothing")
return tuple(sorted(addrs))
def validate_webhook_url(url: str, *, allow_private: Optional[bool] = None) -> ValidatedDestination:
"""Validate *url* as a safe webhook egress destination.
Raises ``WebhookDestinationError`` on a bad scheme, missing host, a host
that won't resolve, or any resolved address that is private / loopback /
link-local / unspecified / reserved / multicast.
``allow_private`` defaults to the ``DECNET_WEBHOOK_ALLOW_PRIVATE`` env
flag (resolved lazily so tests can monkeypatch the env module). When
True the IP-category checks are skipped, but scheme + resolvability are
still enforced.
"""
if allow_private is None:
from decnet.env import DECNET_WEBHOOK_ALLOW_PRIVATE
allow_private = DECNET_WEBHOOK_ALLOW_PRIVATE
parts = urlsplit(url)
scheme = parts.scheme.lower()
if scheme not in _ALLOWED_SCHEMES:
raise WebhookDestinationError(
f"webhook URL scheme {scheme!r} is not allowed (use http/https)"
)
host = parts.hostname
if not host:
raise WebhookDestinationError("webhook URL has no host")
port = parts.port or (443 if scheme == "https" else 80)
resolved = _resolve(host, port)
if not allow_private:
for addr in resolved:
try:
ip = ipaddress.ip_address(addr)
except ValueError as exc:
raise WebhookDestinationError(
f"webhook host {host!r} resolved to non-IP {addr!r}"
) from exc
if _is_forbidden(ip):
raise WebhookDestinationError(
f"webhook host {host!r} resolves to forbidden address {addr} "
"(private/loopback/link-local/reserved). Set "
"DECNET_WEBHOOK_ALLOW_PRIVATE=true to permit internal targets."
)
return ValidatedDestination(
host=host, port=port, scheme=scheme, ip_addresses=resolved
)