fix(security): close MEDIUM ASVS findings — JWT pinning, SSE tickets, SSRF, mTLS pin, rate limits + correctness bugs
Auth (V2.1.1/V3.1.2, V2.1.3, V3.1.1): - Pin JWT iss/aud/typ at mint and require+verify them at decode; revocation (jti denylist + tokens_valid_from) still enforced. - Change-password now requires min_length=12. - SSE auth moves off JWT-in-URL to a single-use 60s opaque ticket (POST /auth/sse-ticket); raw JWT in query no longer authenticates a stream. Removed dead fail-open get_stream_user helper. Egress (V5.1.1, V9.1.1/V14.1.3): - Webhook delivery + CRUD reject SSRF destinations (private/loopback/link-local/ metadata, IPv4-mapped, multi-A-record) via resolved-IP validation, pin to the vetted IP, and never auto-follow redirects. Opt-out via DECNET_WEBHOOK_ALLOW_PRIVATE. - UpdaterClient pins the worker leaf cert SHA-256 against the stored per-host fingerprint (fail closed on missing/mismatch); DECNET_VERIFY_HOSTNAME now defaults True. Hardening (V13.1.3, V4.1.4, V13.1.2): - Rate-limit change-password (5/min), enroll-bundle (10/min), webhook-create (20/min), host-delete (20/min) via the existing slowapi limiter. - Correct false 'global auth middleware' comment; document enroll-bundle proxy trust. Correctness (BUG-7..11): - BUG-7 unbound bus in finally; BUG-8 apply_ceiling clamps to min(base,ceiling); BUG-9 commit before emit; BUG-10 multi-actor rearm for sub-threshold identities; BUG-11 normalize naive timestamps to UTC. Already-closed (no change): V14.1.1, V2.1.2/V3.1.3, V5.1.2. Tests added for every fix; unanimous adversarial review.
This commit is contained in:
@@ -112,11 +112,11 @@ class AgentClient:
|
||||
"""Either pass a SwarmHost dict, or explicit address/port.
|
||||
|
||||
``verify_hostname`` defers to ``DECNET_VERIFY_HOSTNAME`` when the
|
||||
caller doesn't pass an explicit value — production deploys flip
|
||||
the env var on so the worker's cert SAN must match the address
|
||||
the master connects to, on top of the existing CA + fingerprint
|
||||
pin. Defaults to False so dev/test enrollments with mismatched
|
||||
SANs keep working unchanged.
|
||||
caller doesn't pass an explicit value — the worker's cert SAN must
|
||||
match the address the master connects to, on top of the existing CA
|
||||
+ fingerprint pin. Defaults to True; operators opt out explicitly
|
||||
via ``DECNET_VERIFY_HOSTNAME=false`` for dev/test enrollments with
|
||||
mismatched SANs.
|
||||
"""
|
||||
if verify_hostname is None:
|
||||
from decnet.env import DECNET_VERIFY_HOSTNAME
|
||||
@@ -155,9 +155,10 @@ class AgentClient:
|
||||
)
|
||||
ctx.load_verify_locations(cafile=str(self._identity.ca_cert_path))
|
||||
ctx.verify_mode = ssl.CERT_REQUIRED
|
||||
# Pin by CA + cert chain, not by DNS — workers enroll with arbitrary
|
||||
# SANs (IPs, hostnames) and we don't want to force operators to keep
|
||||
# those in sync with whatever URL the master happens to use.
|
||||
# Pin by CA + cert chain; hostname verification is on by default
|
||||
# (DECNET_VERIFY_HOSTNAME=true) so the cert SAN must match the
|
||||
# master's connect address. Operators set the env var to false only
|
||||
# for dev/test enrollments with mismatched SANs.
|
||||
ctx.check_hostname = self._verify_hostname
|
||||
return httpx.AsyncClient(
|
||||
base_url=f"https://{self._address}:{self._port}",
|
||||
|
||||
@@ -13,14 +13,20 @@ the connection on purpose (the updater re-execs itself mid-response).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import socket
|
||||
import ssl
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.swarm.client import MasterIdentity, ensure_master_identity
|
||||
from decnet.swarm.client import (
|
||||
FingerprintMismatchError,
|
||||
MasterIdentity,
|
||||
ensure_master_identity,
|
||||
)
|
||||
|
||||
log = get_logger("swarm.updater_client")
|
||||
|
||||
@@ -47,11 +53,19 @@ class UpdaterClient:
|
||||
if host is not None:
|
||||
self._address = host["address"]
|
||||
self._host_name = host.get("name")
|
||||
# SHA-256 of the worker's UPDATER leaf cert, recorded at enroll
|
||||
# time (api_enroll_host.py writes ``updater_cert_fingerprint``).
|
||||
# This is a distinct identity from the agent cert AgentClient
|
||||
# pins — the updater channel pip-installs code as root, so it
|
||||
# gets its own pin against its own cert.
|
||||
fp = host.get("updater_cert_fingerprint")
|
||||
self._expected_fingerprint = fp.lower() if isinstance(fp, str) else None
|
||||
else:
|
||||
if address is None:
|
||||
raise ValueError("UpdaterClient requires host dict or address")
|
||||
self._address = address
|
||||
self._host_name = None
|
||||
self._expected_fingerprint = None
|
||||
self._port = updater_port
|
||||
self._identity = identity or ensure_master_identity()
|
||||
self._client: Optional[httpx.AsyncClient] = None
|
||||
@@ -70,8 +84,64 @@ class UpdaterClient:
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
def _fetch_peer_fingerprint(self) -> str:
|
||||
"""Open a throwaway TLS connection to the updater port and return the
|
||||
SHA-256 hex of the leaf cert it presents. Mirrors
|
||||
``AgentClient._fetch_peer_fingerprint`` exactly."""
|
||||
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
ctx.load_cert_chain(
|
||||
str(self._identity.cert_path), str(self._identity.key_path),
|
||||
)
|
||||
ctx.load_verify_locations(cafile=str(self._identity.ca_cert_path))
|
||||
ctx.verify_mode = ssl.CERT_REQUIRED
|
||||
ctx.check_hostname = self._verify_hostname
|
||||
sock = socket.create_connection((self._address, self._port), timeout=10.0)
|
||||
try:
|
||||
server_hostname = self._address if self._verify_hostname else None
|
||||
with ctx.wrap_socket(sock, server_hostname=server_hostname) as ssock:
|
||||
der = ssock.getpeercert(binary_form=True)
|
||||
finally:
|
||||
try:
|
||||
sock.close()
|
||||
except OSError:
|
||||
pass
|
||||
if not der:
|
||||
raise FingerprintMismatchError(
|
||||
f"{self._address}:{self._port}", self._expected_fingerprint or "", ""
|
||||
)
|
||||
return hashlib.sha256(der).hexdigest().lower()
|
||||
|
||||
async def _verify_pin(self) -> None:
|
||||
"""Fail closed unless the updater leaf cert SHA-256 matches the pin.
|
||||
|
||||
Unlike ``AgentClient`` (which falls through to CA-only when no pin is
|
||||
recorded), the updater channel pip-installs code as root — so a host
|
||||
with NO recorded ``updater_cert_fingerprint`` is rejected outright
|
||||
rather than accepted on CA validity alone. A missing pin means the
|
||||
host was never enrolled with an updater identity; we refuse to drive
|
||||
code into it."""
|
||||
if not self._expected_fingerprint:
|
||||
raise FingerprintMismatchError(
|
||||
f"{self._address}:{self._port}",
|
||||
"<no updater_cert_fingerprint recorded for host>",
|
||||
"",
|
||||
)
|
||||
actual = await asyncio.to_thread(self._fetch_peer_fingerprint)
|
||||
if actual != self._expected_fingerprint:
|
||||
raise FingerprintMismatchError(
|
||||
f"{self._address}:{self._port}",
|
||||
self._expected_fingerprint,
|
||||
actual,
|
||||
)
|
||||
|
||||
async def __aenter__(self) -> "UpdaterClient":
|
||||
self._client = self._build_client(_TIMEOUT_CONTROL)
|
||||
try:
|
||||
await self._verify_pin()
|
||||
except BaseException:
|
||||
await self._client.aclose()
|
||||
self._client = None
|
||||
raise
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc: Any) -> None:
|
||||
|
||||
Reference in New Issue
Block a user