diff --git a/decnet/updater/app.py b/decnet/updater/app.py index 50471f61..c14a8c34 100644 --- a/decnet/updater/app.py +++ b/decnet/updater/app.py @@ -4,9 +4,12 @@ Mirrors the shape of ``decnet/agent/app.py``: bare FastAPI, docs disabled, handlers delegate to ``decnet.updater.executor``. -Mounted by uvicorn via ``decnet.updater.server`` with ``--ssl-cert-reqs 2``; -the CN on the peer cert tells us which endpoints are legal (``updater@*`` -only — agent certs are rejected). +Mounted by uvicorn via ``decnet.updater.server`` with ``--ssl-cert-reqs 2``, +so every caller already presents a CA-signed cert. On top of that transport +guarantee, the mutating endpoints app-gate the *client* CN to the master +(``decnet-master``, the identity ``UpdaterClient`` presents via +``ensure_master_identity``): a compromised worker/agent cert must never be +able to pip-install and re-exec arbitrary code on a peer worker. """ from __future__ import annotations @@ -17,7 +20,10 @@ import pathlib from contextlib import asynccontextmanager from typing import Optional -from fastapi import FastAPI, File, Form, HTTPException, UploadFile +# Importing this shim patches uvicorn so the TLS peer cert lands in the ASGI +# scope, where require_master_cert can read it. Must import before serving. +from decnet.web import _uvicorn_tls_scope # noqa: F401 +from fastapi import Depends, FastAPI, File, Form, HTTPException, Request, UploadFile from pydantic import BaseModel from decnet.bus.factory import get_bus @@ -25,9 +31,27 @@ from decnet.bus.publish import run_health_heartbeat from decnet.logging import get_logger from decnet.swarm import pki from decnet.updater import executor as _exec +from decnet.web._mtls import extract_peer_cert log = get_logger("updater.app") +# Only the master may push code to a worker's updater. UpdaterClient presents +# the master identity (CN=decnet-master); worker/agent certs are rejected. +_PUSHER_CN = "decnet-master" + + +def require_master_cert(request: Request) -> None: + """Reject any caller whose client-cert CN is not the master's. + + Transport mTLS has proven the cert is CA-signed; this stops a non-master + CA-signed cert (e.g. a worker agent's) from driving an update/rollback. + Fails closed when no cert is present. + """ + peer = extract_peer_cert(request.scope) + if peer is None or peer.cn != _PUSHER_CN: + log.warning("updater: rejected push from cn=%r", peer.cn if peer else None) + raise HTTPException(status_code=403, detail="master certificate required") + _bus_heartbeat_task: Optional[asyncio.Task] = None @@ -122,7 +146,7 @@ async def health() -> dict: @app.get("/releases") -async def releases() -> dict: +async def releases(_pusher: None = Depends(require_master_cert)) -> dict: return {"releases": [r.to_dict() for r in _exec.list_releases(_Config.install_dir)]} @@ -131,7 +155,12 @@ async def update( tarball: UploadFile = File(..., description="tar.gz of the working tree"), sha: str = Form("", description="git SHA of the tree for provenance"), sha256: str = Form("", description="hex SHA-256 of the tarball bytes; verified before extract"), + _pusher: None = Depends(require_master_cert), ) -> dict: + if not sha256: + # Mandatory: guarantees _verify_tarball_sha256 runs before we extract + + # pip-install. An update with no integrity check is refused outright. + raise HTTPException(status_code=400, detail="sha256 of the tarball is required") body = await tarball.read() try: return _exec.run_update( @@ -153,12 +182,15 @@ async def update_self( sha: str = Form(""), sha256: str = Form("", description="hex SHA-256 of the tarball bytes; verified before extract"), confirm_self: str = Form("", description="Must be 'true' to proceed"), + _pusher: None = Depends(require_master_cert), ) -> dict: if confirm_self.lower() != "true": raise HTTPException( status_code=400, detail="self-update requires confirm_self=true (no auto-rollback)", ) + if not sha256: + raise HTTPException(status_code=400, detail="sha256 of the tarball is required") body = await tarball.read() try: return _exec.run_update_self( @@ -174,7 +206,7 @@ async def update_self( @app.post("/rollback") -async def rollback() -> dict: +async def rollback(_pusher: None = Depends(require_master_cert)) -> dict: try: return _exec.run_rollback( install_dir=_Config.install_dir, agent_dir=_Config.agent_dir, diff --git a/decnet/web/_mtls.py b/decnet/web/_mtls.py new file mode 100644 index 00000000..d48745e3 --- /dev/null +++ b/decnet/web/_mtls.py @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Transport peer-identity primitives shared across DECNET's mTLS endpoints. + +Deliberately lives directly under ``decnet.web`` (a namespace package with no +heavy ``__init__``) rather than under ``decnet.web.router`` so the minimal +worker-side updater (``decnet/updater/app.py``) can read its peer cert without +importing the entire API router tree. + +Both the swarm controller and the updater run behind uvicorn with +``--ssl-cert-reqs 2`` (``ssl.CERT_REQUIRED``), so the transport layer has +already proven any peer cert is CA-signed; these helpers turn that into an +*application* identity (SHA-256 fingerprint for pinning, CN for role). +""" +from __future__ import annotations + +import hashlib +from collections.abc import MutableMapping +from dataclasses import dataclass +from typing import Any, Optional + +from cryptography import x509 +from cryptography.x509.oid import NameOID + +from decnet.logging import get_logger + +log = get_logger("web.mtls") + +# Hosts treated as "the box itself". A certless request is only ever accepted +# from these — the single-operator loopback boundary (same model as +# docker.sock). +LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost"}) + + +@dataclass(frozen=True) +class PeerCert: + """The TLS peer's identity, extracted from the ASGI scope.""" + + sha256: str + cn: Optional[str] + + +def _extract_peer_der(scope: MutableMapping[str, Any]) -> Optional[bytes]: + """Pull the DER-encoded peer cert from an ASGI scope, or None. + + 1. Primary: ``scope["extensions"]["tls"]["client_cert_chain"][0]`` + (uvicorn >= 0.30 ASGI TLS extension; populated by + ``decnet.web._uvicorn_tls_scope``). + 2. Fallback: the transport's ``ssl_object.getpeercert(binary_form=True)`` + (older uvicorn builds + some other servers). + """ + peer_der: Optional[bytes] = None + source = "none" + + try: + chain = scope.get("extensions", {}).get("tls", {}).get("client_cert_chain") + if chain: + peer_der = chain[0] + source = "primary" + except (AttributeError, KeyError, TypeError): + # scope["extensions"]["tls"] structure varies across uvicorn versions + peer_der = None + + if peer_der is None: + transport = scope.get("transport") + try: + ssl_obj = transport.get_extra_info("ssl_object") if transport else None + if ssl_obj is not None: + peer_der = ssl_obj.getpeercert(binary_form=True) + if peer_der: + source = "fallback" + except (AttributeError, OSError): + # transport may not be an SSL transport, or the handshake may be incomplete + peer_der = None + + if not peer_der: + log.debug("peer cert extraction failed via none") + return None + + log.debug("peer cert extraction succeeded via %s", source) + return peer_der + + +def _cn_from_der(der: bytes) -> Optional[str]: + """Best-effort CN parse. Returns None on any malformed/CN-less cert. + + Never raises: a fingerprint is still usable for pinning even when the + subject can't be parsed, so callers decide what a missing CN means. + """ + try: + cert = x509.load_der_x509_certificate(der) + attrs = cert.subject.get_attributes_for_oid(NameOID.COMMON_NAME) + if not attrs: + return None + value = attrs[0].value + return value if isinstance(value, str) else value.decode("utf-8", "replace") + except (ValueError, TypeError, IndexError, UnicodeDecodeError): + return None + + +def extract_peer_cert(scope: MutableMapping[str, Any]) -> Optional[PeerCert]: + """Return the peer's ``PeerCert`` (fingerprint + CN), or None when no cert. + + The fingerprint is always computed when a cert is present; the CN is + best-effort (None when the subject can't be parsed). + """ + der = _extract_peer_der(scope) + if der is None: + return None + return PeerCert( + sha256=hashlib.sha256(der).hexdigest().lower(), + cn=_cn_from_der(der), + ) + + +def extract_peer_fingerprint(scope: MutableMapping[str, Any]) -> Optional[str]: + """Convenience: just the lowercase hex SHA-256 of the peer cert, or None.""" + der = _extract_peer_der(scope) + if der is None: + return None + return hashlib.sha256(der).hexdigest().lower() + + +def client_is_loopback(request: Any) -> bool: + """True iff the request originated from the box's loopback interface.""" + client = getattr(request, "client", None) + host = getattr(client, "host", None) if client is not None else None + return host in LOOPBACK_HOSTS diff --git a/decnet/web/router/swarm/_mtls.py b/decnet/web/router/swarm/_mtls.py index 29df36aa..c913be16 100644 --- a/decnet/web/router/swarm/_mtls.py +++ b/decnet/web/router/swarm/_mtls.py @@ -1,38 +1,41 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -"""Shared mTLS peer-identity extraction + authorization for the swarm control plane. +"""Operator authorization for the swarm control plane. -The swarm controller (``decnet/web/swarm_api.py``) and the per-worker updater -(``decnet/updater/app.py``) both run behind uvicorn with ``--ssl-cert-reqs 2`` -(``ssl.CERT_REQUIRED``), so the transport layer guarantees the peer cert is -CA-signed. This module turns that transport guarantee into an *application* -identity check: it pulls the peer cert out of the ASGI scope and exposes both -its SHA-256 fingerprint (for per-host pinning) and its CN (for role -distinction). +The transport peer-identity primitives live in :mod:`decnet.web._mtls` so the +minimal worker-side updater can reuse them without importing the API router +tree. This module adds the swarm-controller-specific operator gate on top and +re-exports the primitives for existing importers. -Role distinction is by CN, which the PKI already assigns per identity +Role distinction is by CN, which the PKI assigns per identity (``decnet/swarm/pki.py:issue_worker_cert``): decnet-master master / operator client swarmctl operator CLI server identity {agent_name} worker agent updater@{agent_name} per-worker updater - -Two extraction paths are tried because uvicorn has stashed the peer cert in -different scope slots across versions; both this module and the heartbeat -endpoint fail closed when neither yields a cert. """ from __future__ import annotations -import hashlib -from collections.abc import MutableMapping -from dataclasses import dataclass -from typing import Any, Optional - -from cryptography import x509 -from cryptography.x509.oid import NameOID from fastapi import HTTPException, Request from decnet.logging import get_logger +from decnet.web._mtls import ( # re-exported for existing importers + LOOPBACK_HOSTS, + PeerCert, + client_is_loopback, + extract_peer_cert, + extract_peer_fingerprint, +) + +__all__ = [ + "LOOPBACK_HOSTS", + "PeerCert", + "client_is_loopback", + "extract_peer_cert", + "extract_peer_fingerprint", + "OPERATOR_CNS", + "require_operator_cert", +] log = get_logger("swarm.mtls") @@ -42,107 +45,6 @@ log = get_logger("swarm.mtls") # or tear the fleet down. OPERATOR_CNS = frozenset({"decnet-master", "swarmctl"}) -# Hosts treated as "the master box itself". A certless request is only accepted -# from these — the single-operator loopback boundary (same model as -# docker.sock). Any routable bind is forced onto mTLS by the swarmctl startup -# guard, so a certless request can never legitimately arrive from off-box. -LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost"}) - - -@dataclass(frozen=True) -class PeerCert: - """The TLS peer's identity, extracted from the ASGI scope.""" - - sha256: str - cn: Optional[str] - - -def _extract_peer_der(scope: MutableMapping[str, Any]) -> Optional[bytes]: - """Pull the DER-encoded peer cert from an ASGI scope, or None. - - 1. Primary: ``scope["extensions"]["tls"]["client_cert_chain"][0]`` - (uvicorn >= 0.30 ASGI TLS extension). - 2. Fallback: the transport's ``ssl_object.getpeercert(binary_form=True)`` - (older uvicorn builds + some other servers). - """ - peer_der: Optional[bytes] = None - source = "none" - - try: - chain = scope.get("extensions", {}).get("tls", {}).get("client_cert_chain") - if chain: - peer_der = chain[0] - source = "primary" - except (AttributeError, KeyError, TypeError): - # scope["extensions"]["tls"] structure varies across uvicorn versions - peer_der = None - - if peer_der is None: - transport = scope.get("transport") - try: - ssl_obj = transport.get_extra_info("ssl_object") if transport else None - if ssl_obj is not None: - peer_der = ssl_obj.getpeercert(binary_form=True) - if peer_der: - source = "fallback" - except (AttributeError, OSError): - # transport may not be an SSL transport, or the handshake may be incomplete - peer_der = None - - if not peer_der: - log.debug("peer cert extraction failed via none") - return None - - log.debug("peer cert extraction succeeded via %s", source) - return peer_der - - -def _cn_from_der(der: bytes) -> Optional[str]: - """Best-effort CN parse. Returns None on any malformed/CN-less cert. - - Never raises: a fingerprint is still usable for pinning even when the - subject can't be parsed, so callers decide what a missing CN means. - """ - try: - cert = x509.load_der_x509_certificate(der) - attrs = cert.subject.get_attributes_for_oid(NameOID.COMMON_NAME) - if not attrs: - return None - value = attrs[0].value - return value if isinstance(value, str) else value.decode("utf-8", "replace") - except (ValueError, TypeError, IndexError, UnicodeDecodeError): - return None - - -def extract_peer_cert(scope: MutableMapping[str, Any]) -> Optional[PeerCert]: - """Return the peer's ``PeerCert`` (fingerprint + CN), or None when no cert. - - The fingerprint is always computed when a cert is present; the CN is - best-effort (None when the subject can't be parsed). - """ - der = _extract_peer_der(scope) - if der is None: - return None - return PeerCert( - sha256=hashlib.sha256(der).hexdigest().lower(), - cn=_cn_from_der(der), - ) - - -def extract_peer_fingerprint(scope: MutableMapping[str, Any]) -> Optional[str]: - """Convenience: just the lowercase hex SHA-256 of the peer cert, or None.""" - der = _extract_peer_der(scope) - if der is None: - return None - return hashlib.sha256(der).hexdigest().lower() - - -def _client_is_loopback(request: Request) -> bool: - """True iff the request originated from the master box's loopback.""" - client = getattr(request, "client", None) - host = getattr(client, "host", None) if client is not None else None - return host in LOOPBACK_HOSTS - def require_operator_cert(request: Request) -> PeerCert: """FastAPI dependency authorizing a swarm control-plane operation. @@ -167,7 +69,7 @@ def require_operator_cert(request: Request) -> PeerCert: log.warning("rejected non-operator cert on control plane: cn=%r", peer.cn) raise HTTPException(status_code=403, detail="operator certificate required") return peer - if _client_is_loopback(request): + if client_is_loopback(request): # Local operator on the master box; no client cert over plaintext loopback. return PeerCert(sha256="", cn=None) raise HTTPException(status_code=403, detail="operator certificate required") diff --git a/tests/updater/test_updater_app.py b/tests/updater/test_updater_app.py index 8690dcf8..33088dd9 100644 --- a/tests/updater/test_updater_app.py +++ b/tests/updater/test_updater_app.py @@ -29,14 +29,18 @@ def _tarball(files: dict[str, str] | None = None) -> bytes: @pytest.fixture -def client(tmp_path: pathlib.Path) -> TestClient: +def client(tmp_path: pathlib.Path): app_mod.configure( install_dir=tmp_path / "install", updater_install_dir=tmp_path / "install" / "updater", agent_dir=tmp_path / "agent", ) (tmp_path / "install" / "releases").mkdir(parents=True) - return TestClient(app_mod.app) + # Bypass the master-cert gate for wire-format tests (no live TLS peer). + app_mod.app.dependency_overrides[app_mod.require_master_cert] = lambda: None + with TestClient(app_mod.app) as c: + yield c + app_mod.app.dependency_overrides.clear() def test_health_returns_role_and_releases(client: TestClient, monkeypatch: pytest.MonkeyPatch) -> None: @@ -57,7 +61,7 @@ def test_update_happy_path(client: TestClient, monkeypatch: pytest.MonkeyPatch) r = client.post( "/update", files={"tarball": ("tree.tgz", _tarball(), "application/gzip")}, - data={"sha": "ABC123"}, + data={"sha": "ABC123", "sha256": "0" * 64}, ) assert r.status_code == 200, r.text assert r.json()["release"]["sha"] == "ABC123" @@ -71,7 +75,7 @@ def test_update_rollback_returns_409(client: TestClient, monkeypatch: pytest.Mon r = client.post( "/update", files={"tarball": ("t.tgz", _tarball(), "application/gzip")}, - data={"sha": ""}, + data={"sha": "", "sha256": "0" * 64}, ) assert r.status_code == 409, r.text detail = r.json()["detail"] @@ -84,7 +88,11 @@ def test_update_hard_failure_returns_500(client: TestClient, monkeypatch: pytest raise ex.UpdateError("pip install failed", stderr="resolver error") monkeypatch.setattr(ex, "run_update", _boom) - r = client.post("/update", files={"tarball": ("t.tgz", _tarball(), "application/gzip")}) + r = client.post( + "/update", + files={"tarball": ("t.tgz", _tarball(), "application/gzip")}, + data={"sha256": "0" * 64}, + ) assert r.status_code == 500 assert r.json()["detail"]["rolled_back"] is False @@ -103,7 +111,7 @@ def test_update_self_happy_path(client: TestClient, monkeypatch: pytest.MonkeyPa r = client.post( "/update-self", files={"tarball": ("t.tgz", _tarball(), "application/gzip")}, - data={"sha": "S", "confirm_self": "true"}, + data={"sha": "S", "sha256": "0" * 64, "confirm_self": "true"}, ) assert r.status_code == 200 assert r.json()["status"] == "self_update_queued" @@ -137,3 +145,75 @@ def test_releases_lists_slots(client: TestClient, monkeypatch: pytest.MonkeyPatc assert r.status_code == 200 slots = [rel["slot"] for rel in r.json()["releases"]] assert slots == ["active", "prev"] + + +def test_update_without_sha256_is_rejected(client: TestClient) -> None: + # Mandatory integrity: no checksum → 400, before any extract/install. + r = client.post( + "/update", + files={"tarball": ("t.tgz", _tarball(), "application/gzip")}, + data={"sha": "ABC"}, + ) + assert r.status_code == 400 + assert "sha256" in r.json()["detail"] + + +# ------------------------- master-cert gate --------------------------------- + + +@pytest.fixture +def raw_client(tmp_path: pathlib.Path): + """Client with the real require_master_cert gate active (no override).""" + app_mod.configure( + install_dir=tmp_path / "install", + updater_install_dir=tmp_path / "install" / "updater", + agent_dir=tmp_path / "agent", + ) + (tmp_path / "install" / "releases").mkdir(parents=True) + with TestClient(app_mod.app) as c: + yield c + + +@pytest.mark.parametrize("path", ["/update", "/update-self", "/rollback", "/releases"]) +def test_endpoints_reject_callers_without_master_cert(raw_client: TestClient, path: str) -> None: + # No TLS peer cert in a TestClient scope → require_master_cert fails closed. + if path == "/rollback": + r = raw_client.post(path) + elif path == "/releases": + r = raw_client.get(path) + else: + r = raw_client.post( + path, + files={"tarball": ("t.tgz", _tarball(), "application/gzip")}, + data={"sha256": "0" * 64, "confirm_self": "true"}, + ) + assert r.status_code == 403 + assert "master certificate" in r.json()["detail"] + + +def test_require_master_cert_accepts_decnet_master(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None: + from unittest.mock import MagicMock + + from decnet.swarm import pki + from cryptography import x509 + from cryptography.hazmat.primitives import serialization + + monkeypatch.setattr(pki, "DEFAULT_CA_DIR", tmp_path / "ca") + ca = pki.ensure_ca() + + def _der(cn: str) -> bytes: + issued = pki.issue_worker_cert(ca, cn, []) + cert = x509.load_pem_x509_certificate(issued.cert_pem) + return cert.public_bytes(serialization.Encoding.DER) + + def _req(cn: str) -> MagicMock: + req = MagicMock() + req.scope = {"extensions": {"tls": {"client_cert_chain": [_der(cn)]}}} + return req + + # master cert → allowed (returns None) + assert app_mod.require_master_cert(_req("decnet-master")) is None + # a worker/agent cert is CA-signed but must be rejected + with pytest.raises(app_mod.HTTPException) as ei: + app_mod.require_master_cert(_req("worker-7")) + assert ei.value.status_code == 403