fix(updater): enforce master CN gate and mandatory tarball checksum

The worker-side updater extracted + pip-installed + re-exec'd any tarball
from any caller holding a CA-signed cert; the documented updater@* CN
gating was never implemented. Now:

- require_master_cert gates /update, /update-self, /rollback, /releases:
  the client cert CN must be decnet-master (the identity UpdaterClient
  presents). A worker/agent cert can no longer push code to a peer.
- sha256 is mandatory on /update and /update-self (400 otherwise), so the
  integrity check always runs before extract/install. UpdaterClient
  already sends it; this just hardens the contract.

The transport peer-identity primitives move to decnet/web/_mtls.py (a
light namespace module) so the minimal updater reuses them without
importing the API router tree; router/swarm/_mtls.py re-exports them and
keeps the operator gate. Closes the updater-RCE critical.
This commit is contained in:
2026-05-30 17:22:12 -04:00
parent 30750d294d
commit a4193d7022
4 changed files with 275 additions and 134 deletions

127
decnet/web/_mtls.py Normal file
View File

@@ -0,0 +1,127 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Transport peer-identity primitives shared across DECNET's mTLS endpoints.
Deliberately lives directly under ``decnet.web`` (a namespace package with no
heavy ``__init__``) rather than under ``decnet.web.router`` so the minimal
worker-side updater (``decnet/updater/app.py``) can read its peer cert without
importing the entire API router tree.
Both the swarm controller and the updater run behind uvicorn with
``--ssl-cert-reqs 2`` (``ssl.CERT_REQUIRED``), so the transport layer has
already proven any peer cert is CA-signed; these helpers turn that into an
*application* identity (SHA-256 fingerprint for pinning, CN for role).
"""
from __future__ import annotations
import hashlib
from collections.abc import MutableMapping
from dataclasses import dataclass
from typing import Any, Optional
from cryptography import x509
from cryptography.x509.oid import NameOID
from decnet.logging import get_logger
log = get_logger("web.mtls")
# Hosts treated as "the box itself". A certless request is only ever accepted
# from these — the single-operator loopback boundary (same model as
# docker.sock).
LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost"})
@dataclass(frozen=True)
class PeerCert:
"""The TLS peer's identity, extracted from the ASGI scope."""
sha256: str
cn: Optional[str]
def _extract_peer_der(scope: MutableMapping[str, Any]) -> Optional[bytes]:
"""Pull the DER-encoded peer cert from an ASGI scope, or None.
1. Primary: ``scope["extensions"]["tls"]["client_cert_chain"][0]``
(uvicorn >= 0.30 ASGI TLS extension; populated by
``decnet.web._uvicorn_tls_scope``).
2. Fallback: the transport's ``ssl_object.getpeercert(binary_form=True)``
(older uvicorn builds + some other servers).
"""
peer_der: Optional[bytes] = None
source = "none"
try:
chain = scope.get("extensions", {}).get("tls", {}).get("client_cert_chain")
if chain:
peer_der = chain[0]
source = "primary"
except (AttributeError, KeyError, TypeError):
# scope["extensions"]["tls"] structure varies across uvicorn versions
peer_der = None
if peer_der is None:
transport = scope.get("transport")
try:
ssl_obj = transport.get_extra_info("ssl_object") if transport else None
if ssl_obj is not None:
peer_der = ssl_obj.getpeercert(binary_form=True)
if peer_der:
source = "fallback"
except (AttributeError, OSError):
# transport may not be an SSL transport, or the handshake may be incomplete
peer_der = None
if not peer_der:
log.debug("peer cert extraction failed via none")
return None
log.debug("peer cert extraction succeeded via %s", source)
return peer_der
def _cn_from_der(der: bytes) -> Optional[str]:
"""Best-effort CN parse. Returns None on any malformed/CN-less cert.
Never raises: a fingerprint is still usable for pinning even when the
subject can't be parsed, so callers decide what a missing CN means.
"""
try:
cert = x509.load_der_x509_certificate(der)
attrs = cert.subject.get_attributes_for_oid(NameOID.COMMON_NAME)
if not attrs:
return None
value = attrs[0].value
return value if isinstance(value, str) else value.decode("utf-8", "replace")
except (ValueError, TypeError, IndexError, UnicodeDecodeError):
return None
def extract_peer_cert(scope: MutableMapping[str, Any]) -> Optional[PeerCert]:
"""Return the peer's ``PeerCert`` (fingerprint + CN), or None when no cert.
The fingerprint is always computed when a cert is present; the CN is
best-effort (None when the subject can't be parsed).
"""
der = _extract_peer_der(scope)
if der is None:
return None
return PeerCert(
sha256=hashlib.sha256(der).hexdigest().lower(),
cn=_cn_from_der(der),
)
def extract_peer_fingerprint(scope: MutableMapping[str, Any]) -> Optional[str]:
"""Convenience: just the lowercase hex SHA-256 of the peer cert, or None."""
der = _extract_peer_der(scope)
if der is None:
return None
return hashlib.sha256(der).hexdigest().lower()
def client_is_loopback(request: Any) -> bool:
"""True iff the request originated from the box's loopback interface."""
client = getattr(request, "client", None)
host = getattr(client, "host", None) if client is not None else None
return host in LOOPBACK_HOSTS

View File

@@ -1,38 +1,41 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Shared mTLS peer-identity extraction + authorization for the swarm control plane.
"""Operator authorization for the swarm control plane.
The swarm controller (``decnet/web/swarm_api.py``) and the per-worker updater
(``decnet/updater/app.py``) both run behind uvicorn with ``--ssl-cert-reqs 2``
(``ssl.CERT_REQUIRED``), so the transport layer guarantees the peer cert is
CA-signed. This module turns that transport guarantee into an *application*
identity check: it pulls the peer cert out of the ASGI scope and exposes both
its SHA-256 fingerprint (for per-host pinning) and its CN (for role
distinction).
The transport peer-identity primitives live in :mod:`decnet.web._mtls` so the
minimal worker-side updater can reuse them without importing the API router
tree. This module adds the swarm-controller-specific operator gate on top and
re-exports the primitives for existing importers.
Role distinction is by CN, which the PKI already assigns per identity
Role distinction is by CN, which the PKI assigns per identity
(``decnet/swarm/pki.py:issue_worker_cert``):
decnet-master master / operator client
swarmctl operator CLI server identity
{agent_name} worker agent
updater@{agent_name} per-worker updater
Two extraction paths are tried because uvicorn has stashed the peer cert in
different scope slots across versions; both this module and the heartbeat
endpoint fail closed when neither yields a cert.
"""
from __future__ import annotations
import hashlib
from collections.abc import MutableMapping
from dataclasses import dataclass
from typing import Any, Optional
from cryptography import x509
from cryptography.x509.oid import NameOID
from fastapi import HTTPException, Request
from decnet.logging import get_logger
from decnet.web._mtls import ( # re-exported for existing importers
LOOPBACK_HOSTS,
PeerCert,
client_is_loopback,
extract_peer_cert,
extract_peer_fingerprint,
)
__all__ = [
"LOOPBACK_HOSTS",
"PeerCert",
"client_is_loopback",
"extract_peer_cert",
"extract_peer_fingerprint",
"OPERATOR_CNS",
"require_operator_cert",
]
log = get_logger("swarm.mtls")
@@ -42,107 +45,6 @@ log = get_logger("swarm.mtls")
# or tear the fleet down.
OPERATOR_CNS = frozenset({"decnet-master", "swarmctl"})
# Hosts treated as "the master box itself". A certless request is only accepted
# from these — the single-operator loopback boundary (same model as
# docker.sock). Any routable bind is forced onto mTLS by the swarmctl startup
# guard, so a certless request can never legitimately arrive from off-box.
LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost"})
@dataclass(frozen=True)
class PeerCert:
"""The TLS peer's identity, extracted from the ASGI scope."""
sha256: str
cn: Optional[str]
def _extract_peer_der(scope: MutableMapping[str, Any]) -> Optional[bytes]:
"""Pull the DER-encoded peer cert from an ASGI scope, or None.
1. Primary: ``scope["extensions"]["tls"]["client_cert_chain"][0]``
(uvicorn >= 0.30 ASGI TLS extension).
2. Fallback: the transport's ``ssl_object.getpeercert(binary_form=True)``
(older uvicorn builds + some other servers).
"""
peer_der: Optional[bytes] = None
source = "none"
try:
chain = scope.get("extensions", {}).get("tls", {}).get("client_cert_chain")
if chain:
peer_der = chain[0]
source = "primary"
except (AttributeError, KeyError, TypeError):
# scope["extensions"]["tls"] structure varies across uvicorn versions
peer_der = None
if peer_der is None:
transport = scope.get("transport")
try:
ssl_obj = transport.get_extra_info("ssl_object") if transport else None
if ssl_obj is not None:
peer_der = ssl_obj.getpeercert(binary_form=True)
if peer_der:
source = "fallback"
except (AttributeError, OSError):
# transport may not be an SSL transport, or the handshake may be incomplete
peer_der = None
if not peer_der:
log.debug("peer cert extraction failed via none")
return None
log.debug("peer cert extraction succeeded via %s", source)
return peer_der
def _cn_from_der(der: bytes) -> Optional[str]:
"""Best-effort CN parse. Returns None on any malformed/CN-less cert.
Never raises: a fingerprint is still usable for pinning even when the
subject can't be parsed, so callers decide what a missing CN means.
"""
try:
cert = x509.load_der_x509_certificate(der)
attrs = cert.subject.get_attributes_for_oid(NameOID.COMMON_NAME)
if not attrs:
return None
value = attrs[0].value
return value if isinstance(value, str) else value.decode("utf-8", "replace")
except (ValueError, TypeError, IndexError, UnicodeDecodeError):
return None
def extract_peer_cert(scope: MutableMapping[str, Any]) -> Optional[PeerCert]:
"""Return the peer's ``PeerCert`` (fingerprint + CN), or None when no cert.
The fingerprint is always computed when a cert is present; the CN is
best-effort (None when the subject can't be parsed).
"""
der = _extract_peer_der(scope)
if der is None:
return None
return PeerCert(
sha256=hashlib.sha256(der).hexdigest().lower(),
cn=_cn_from_der(der),
)
def extract_peer_fingerprint(scope: MutableMapping[str, Any]) -> Optional[str]:
"""Convenience: just the lowercase hex SHA-256 of the peer cert, or None."""
der = _extract_peer_der(scope)
if der is None:
return None
return hashlib.sha256(der).hexdigest().lower()
def _client_is_loopback(request: Request) -> bool:
"""True iff the request originated from the master box's loopback."""
client = getattr(request, "client", None)
host = getattr(client, "host", None) if client is not None else None
return host in LOOPBACK_HOSTS
def require_operator_cert(request: Request) -> PeerCert:
"""FastAPI dependency authorizing a swarm control-plane operation.
@@ -167,7 +69,7 @@ def require_operator_cert(request: Request) -> PeerCert:
log.warning("rejected non-operator cert on control plane: cn=%r", peer.cn)
raise HTTPException(status_code=403, detail="operator certificate required")
return peer
if _client_is_loopback(request):
if client_is_loopback(request):
# Local operator on the master box; no client cert over plaintext loopback.
return PeerCert(sha256="", cn=None)
raise HTTPException(status_code=403, detail="operator certificate required")