refactor(swarm): shared mTLS peer-identity helper

Extract peer-cert extraction from the heartbeat endpoint into
decnet/web/router/swarm/_mtls.py, adding CN parsing alongside the
SHA-256 fingerprint and a require_operator_cert dependency (CN in
{decnet-master, swarmctl}). api_heartbeat delegates to it; behaviour
unchanged. Prerequisite for control-plane and updater authz.
This commit is contained in:
2026-05-30 17:03:13 -04:00
parent 431c86bbe8
commit e7a686206c
3 changed files with 274 additions and 41 deletions

View File

@@ -0,0 +1,147 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Shared mTLS peer-identity extraction + authorization for the swarm control plane.
The swarm controller (``decnet/web/swarm_api.py``) and the per-worker updater
(``decnet/updater/app.py``) both run behind uvicorn with ``--ssl-cert-reqs 2``
(``ssl.CERT_REQUIRED``), so the transport layer guarantees the peer cert is
CA-signed. This module turns that transport guarantee into an *application*
identity check: it pulls the peer cert out of the ASGI scope and exposes both
its SHA-256 fingerprint (for per-host pinning) and its CN (for role
distinction).
Role distinction is by CN, which the PKI already assigns per identity
(``decnet/swarm/pki.py:issue_worker_cert``):
decnet-master master / operator client
swarmctl operator CLI server identity
{agent_name} worker agent
updater@{agent_name} per-worker updater
Two extraction paths are tried because uvicorn has stashed the peer cert in
different scope slots across versions; both this module and the heartbeat
endpoint fail closed when neither yields a cert.
"""
from __future__ import annotations
import hashlib
from collections.abc import MutableMapping
from dataclasses import dataclass
from typing import Any, Optional
from cryptography import x509
from cryptography.x509.oid import NameOID
from fastapi import HTTPException, Request
from decnet.logging import get_logger
log = get_logger("swarm.mtls")
# Operator identities permitted to drive the control plane (enroll / deploy /
# teardown / host management). Worker and updater certs are intentionally
# excluded — a worker's still-valid cert must not be able to enroll new hosts
# or tear the fleet down.
OPERATOR_CNS = frozenset({"decnet-master", "swarmctl"})
@dataclass(frozen=True)
class PeerCert:
"""The TLS peer's identity, extracted from the ASGI scope."""
sha256: str
cn: Optional[str]
def _extract_peer_der(scope: MutableMapping[str, Any]) -> Optional[bytes]:
"""Pull the DER-encoded peer cert from an ASGI scope, or None.
1. Primary: ``scope["extensions"]["tls"]["client_cert_chain"][0]``
(uvicorn >= 0.30 ASGI TLS extension).
2. Fallback: the transport's ``ssl_object.getpeercert(binary_form=True)``
(older uvicorn builds + some other servers).
"""
peer_der: Optional[bytes] = None
source = "none"
try:
chain = scope.get("extensions", {}).get("tls", {}).get("client_cert_chain")
if chain:
peer_der = chain[0]
source = "primary"
except (AttributeError, KeyError, TypeError):
# scope["extensions"]["tls"] structure varies across uvicorn versions
peer_der = None
if peer_der is None:
transport = scope.get("transport")
try:
ssl_obj = transport.get_extra_info("ssl_object") if transport else None
if ssl_obj is not None:
peer_der = ssl_obj.getpeercert(binary_form=True)
if peer_der:
source = "fallback"
except (AttributeError, OSError):
# transport may not be an SSL transport, or the handshake may be incomplete
peer_der = None
if not peer_der:
log.debug("peer cert extraction failed via none")
return None
log.debug("peer cert extraction succeeded via %s", source)
return peer_der
def _cn_from_der(der: bytes) -> Optional[str]:
"""Best-effort CN parse. Returns None on any malformed/CN-less cert.
Never raises: a fingerprint is still usable for pinning even when the
subject can't be parsed, so callers decide what a missing CN means.
"""
try:
cert = x509.load_der_x509_certificate(der)
attrs = cert.subject.get_attributes_for_oid(NameOID.COMMON_NAME)
if not attrs:
return None
value = attrs[0].value
return value if isinstance(value, str) else value.decode("utf-8", "replace")
except (ValueError, TypeError, IndexError, UnicodeDecodeError):
return None
def extract_peer_cert(scope: MutableMapping[str, Any]) -> Optional[PeerCert]:
"""Return the peer's ``PeerCert`` (fingerprint + CN), or None when no cert.
The fingerprint is always computed when a cert is present; the CN is
best-effort (None when the subject can't be parsed).
"""
der = _extract_peer_der(scope)
if der is None:
return None
return PeerCert(
sha256=hashlib.sha256(der).hexdigest().lower(),
cn=_cn_from_der(der),
)
def extract_peer_fingerprint(scope: MutableMapping[str, Any]) -> Optional[str]:
"""Convenience: just the lowercase hex SHA-256 of the peer cert, or None."""
der = _extract_peer_der(scope)
if der is None:
return None
return hashlib.sha256(der).hexdigest().lower()
def require_operator_cert(request: Request) -> PeerCert:
"""FastAPI dependency: require a CA-signed cert whose CN is an operator.
Transport-layer mTLS (``--ssl-cert-reqs 2``) has already proven the cert is
CA-signed; here we enforce that its CN is in :data:`OPERATOR_CNS`. Worker
and ``updater@*`` certs are rejected with 403.
"""
peer = extract_peer_cert(request.scope)
if peer is None:
raise HTTPException(status_code=403, detail="peer cert unavailable")
if peer.cn not in OPERATOR_CNS:
log.warning("rejected non-operator cert on control plane: cn=%r", peer.cn)
raise HTTPException(status_code=403, detail="operator certificate required")
return peer

View File

@@ -14,7 +14,6 @@ Mismatch (or decommissioned host) → 403.
"""
from __future__ import annotations
import hashlib
import json
from datetime import datetime, timezone
from collections.abc import MutableMapping
@@ -28,6 +27,7 @@ from decnet.config import DeckyConfig
from decnet.logging import get_logger
from decnet.web.db.repository import BaseRepository
from decnet.web.dependencies import get_repo
from decnet.web.router.swarm import _mtls
log = get_logger("swarm.heartbeat")
@@ -49,47 +49,13 @@ class HeartbeatRequest(BaseModel):
def _extract_peer_fingerprint(scope: MutableMapping[str, Any]) -> Optional[str]:
"""Pull the peer cert's SHA-256 fingerprint from an ASGI scope.
Tries two extraction paths because uvicorn has historically stashed
the TLS peer cert in different scope keys across versions:
1. Primary: ``scope["extensions"]["tls"]["client_cert_chain"][0]``
(uvicorn ≥ 0.30 ASGI TLS extension).
2. Fallback: the transport object's ``ssl_object.getpeercert(binary_form=True)``
(older uvicorn builds + some other servers).
Returns the lowercase hex SHA-256 of the DER-encoded cert, or None
when neither path yields bytes. The endpoint fails closed on None.
Thin wrapper over :func:`decnet.web.router.swarm._mtls.extract_peer_fingerprint`
kept as a module-level name so ``_verify_peer_matches_host`` resolves it via
the module global (and tests can monkeypatch it). Returns the lowercase hex
SHA-256 of the DER-encoded peer cert, or None when no cert is present; the
endpoint fails closed on None.
"""
peer_der: Optional[bytes] = None
source = "none"
try:
chain = scope.get("extensions", {}).get("tls", {}).get("client_cert_chain")
if chain:
peer_der = chain[0]
source = "primary"
except (AttributeError, KeyError, TypeError):
# scope["extensions"]["tls"] structure varies across uvicorn versions
peer_der = None
if peer_der is None:
transport = scope.get("transport")
try:
ssl_obj = transport.get_extra_info("ssl_object") if transport else None
if ssl_obj is not None:
peer_der = ssl_obj.getpeercert(binary_form=True)
if peer_der:
source = "fallback"
except (AttributeError, OSError):
# transport may not be an SSL transport, or the handshake may be incomplete
peer_der = None
if not peer_der:
log.debug("heartbeat: peer cert extraction failed via none")
return None
log.debug("heartbeat: peer cert extraction succeeded via %s", source)
return hashlib.sha256(peer_der).hexdigest().lower()
return _mtls.extract_peer_fingerprint(scope)
async def _verify_peer_matches_host(

120
tests/swarm/test_mtls.py Normal file
View File

@@ -0,0 +1,120 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Tests for the shared swarm mTLS peer-identity helper (``_mtls``).
No live TLS: peer certs are minted via the real PKI and fed in through a
fabricated ASGI scope, exactly the way uvicorn's TLS-scope shim would.
"""
from __future__ import annotations
import hashlib
from unittest.mock import MagicMock
import pytest
from decnet.swarm import pki
from decnet.web.router.swarm import _mtls
# ------------------------- cert fixtures ------------------------------------
@pytest.fixture
def ca(tmp_path, monkeypatch: pytest.MonkeyPatch):
monkeypatch.setattr(pki, "DEFAULT_CA_DIR", tmp_path / "ca")
return pki.ensure_ca()
def _der_for(ca, cn: str) -> bytes:
"""Issue a cert with the given CN and return its DER bytes."""
from cryptography import x509
issued = pki.issue_worker_cert(ca, cn, [])
cert = x509.load_pem_x509_certificate(issued.cert_pem)
from cryptography.hazmat.primitives import serialization
return cert.public_bytes(serialization.Encoding.DER)
def _scope_with(der: bytes) -> dict:
return {"extensions": {"tls": {"client_cert_chain": [der]}}}
# ------------------------- extraction --------------------------------------
def test_extract_peer_cert_parses_fingerprint_and_cn(ca) -> None:
der = _der_for(ca, "decnet-master")
peer = _mtls.extract_peer_cert(_scope_with(der))
assert peer is not None
assert peer.sha256 == hashlib.sha256(der).hexdigest().lower()
assert peer.cn == "decnet-master"
def test_extract_peer_cert_fallback_transport_path(ca) -> None:
der = _der_for(ca, "swarmctl")
ssl_obj = MagicMock()
ssl_obj.getpeercert.return_value = der
transport = MagicMock()
transport.get_extra_info.return_value = ssl_obj
peer = _mtls.extract_peer_cert({"transport": transport})
assert peer is not None and peer.cn == "swarmctl"
ssl_obj.getpeercert.assert_called_with(binary_form=True)
def test_extract_peer_cert_none_when_no_cert() -> None:
assert _mtls.extract_peer_cert({}) is None
def test_extract_fingerprint_works_on_non_cert_der() -> None:
# Fingerprint must be computed even when the bytes aren't a parseable
# cert (CN parse fails → None), matching the heartbeat unit tests.
der = b"\x30\x82not-a-real-cert"
scope = _scope_with(der)
assert _mtls.extract_peer_fingerprint(scope) == hashlib.sha256(der).hexdigest()
peer = _mtls.extract_peer_cert(scope)
assert peer is not None and peer.cn is None
# ------------------------- require_operator_cert ---------------------------
def _request_with(scope: dict) -> MagicMock:
req = MagicMock()
req.scope = scope
return req
def test_require_operator_accepts_master(ca) -> None:
peer = _mtls.require_operator_cert(_request_with(_scope_with(_der_for(ca, "decnet-master"))))
assert peer.cn == "decnet-master"
def test_require_operator_accepts_swarmctl(ca) -> None:
peer = _mtls.require_operator_cert(_request_with(_scope_with(_der_for(ca, "swarmctl"))))
assert peer.cn == "swarmctl"
def test_require_operator_rejects_worker_cn(ca) -> None:
from fastapi import HTTPException
with pytest.raises(HTTPException) as ei:
_mtls.require_operator_cert(_request_with(_scope_with(_der_for(ca, "worker-1"))))
assert ei.value.status_code == 403
def test_require_operator_rejects_updater_cn(ca) -> None:
from fastapi import HTTPException
with pytest.raises(HTTPException) as ei:
_mtls.require_operator_cert(_request_with(_scope_with(_der_for(ca, "updater@worker-1"))))
assert ei.value.status_code == 403
def test_require_operator_rejects_no_cert() -> None:
from fastapi import HTTPException
with pytest.raises(HTTPException) as ei:
_mtls.require_operator_cert(_request_with({}))
assert ei.value.status_code == 403
assert "unavailable" in ei.value.detail