diff --git a/decnet/cli.py b/decnet/cli.py index 0990558..274f2ba 100644 --- a/decnet/cli.py +++ b/decnet/cli.py @@ -188,6 +188,10 @@ def swarmctl( host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"), daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"), no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"), + tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"), + cert: Optional[str] = typer.Option(None, "--cert", help="BYOC: path to TLS server cert (PEM). Auto-issues from the DECNET CA if omitted."), + key: Optional[str] = typer.Option(None, "--key", help="BYOC: path to TLS server private key (PEM)."), + client_ca: Optional[str] = typer.Option(None, "--client-ca", help="CA bundle used to verify worker client certs. Defaults to the DECNET CA."), ) -> None: """Run the DECNET SWARM controller (master-side, separate process from `decnet api`). @@ -197,6 +201,12 @@ def swarmctl( survives swarmctl restarts and crashes — if it dies on its own, restart it manually with `decnet listener --daemon …`. Pass --no-listener to skip. + + Pass ``--tls`` to serve over HTTPS with mutual-TLS enforcement. By + default the server cert is auto-issued from the DECNET CA under + ``~/.decnet/swarmctl/`` so enrolled workers (which already ship that + CA's ``ca.crt``) trust it out of the box. BYOC via ``--cert``/``--key`` + if you need a publicly-trusted or externally-managed cert. """ import subprocess # nosec B404 import sys @@ -226,10 +236,35 @@ def swarmctl( log.warning("swarmctl could not auto-spawn listener: %s", e) console.print(f"[yellow]listener auto-spawn skipped: {e}[/]") - log.info("swarmctl command invoked host=%s port=%d", host, port) - console.print(f"[green]Starting DECNET SWARM controller on {host}:{port}...[/]") + log.info("swarmctl command invoked host=%s port=%d tls=%s", host, port, tls) + scheme = "https" if tls else "http" + console.print(f"[green]Starting DECNET SWARM controller on {scheme}://{host}:{port}...[/]") _cmd = [sys.executable, "-m", "uvicorn", "decnet.web.swarm_api:app", "--host", host, "--port", str(port)] + if tls: + from decnet.swarm import pki as _pki + # BYOC path: operator supplied cert+key explicitly. Else auto-issue + # from the existing DECNET CA so workers' already-deployed ca.crt + # verifies the endpoint with no extra steps. + if cert and key: + cert_path, key_path = cert, key + elif cert or key: + console.print("[red]--cert and --key must be provided together.[/]") + raise typer.Exit(code=2) + else: + auto_cert, auto_key, _auto_ca = _pki.ensure_swarmctl_cert(host) + cert_path, key_path = str(auto_cert), str(auto_key) + console.print(f"[dim]Auto-issued swarmctl server cert → {cert_path}[/]") + ca_path = client_ca or str(_pki.DEFAULT_CA_DIR / "ca.crt") + _cmd += [ + "--ssl-keyfile", key_path, + "--ssl-certfile", cert_path, + "--ssl-ca-certs", ca_path, + # CERT_REQUIRED — a worker must present a CA-signed client cert + # before FastAPI sees the request. The heartbeat endpoint then + # pins the cert fingerprint per-host on top of this. + "--ssl-cert-reqs", "2", + ] try: proc = subprocess.Popen(_cmd, start_new_session=True) # nosec B603 B404 try: diff --git a/decnet/swarm/pki.py b/decnet/swarm/pki.py index ecd7966..2a870e7 100644 --- a/decnet/swarm/pki.py +++ b/decnet/swarm/pki.py @@ -43,6 +43,7 @@ from cryptography.x509.oid import NameOID DEFAULT_CA_DIR = pathlib.Path(os.path.expanduser("~/.decnet/ca")) DEFAULT_AGENT_DIR = pathlib.Path(os.path.expanduser("~/.decnet/agent")) +DEFAULT_SWARMCTL_DIR = pathlib.Path(os.path.expanduser("~/.decnet/swarmctl")) CA_KEY_BITS = 4096 WORKER_KEY_BITS = 2048 @@ -277,6 +278,45 @@ def load_worker_bundle( ) +def ensure_swarmctl_cert( + bind_host: str, + ca_dir: pathlib.Path = DEFAULT_CA_DIR, + swarmctl_dir: pathlib.Path = DEFAULT_SWARMCTL_DIR, + extra_sans: Optional[list[str]] = None, +) -> tuple[pathlib.Path, pathlib.Path, pathlib.Path]: + """Return (cert_path, key_path, ca_path), auto-issuing if missing. + + Uses the existing DECNET CA (ensuring it exists first) so workers + whose bundle already includes ``ca.crt`` can verify the swarmctl + endpoint without additional trust configuration. Self-signed is + intentionally not the default — a cert signed by the same CA the + workers already trust is the friction-free path. + + Callers that want BYOC should skip this and pass their own + cert/key paths directly to uvicorn. + """ + swarmctl_dir.mkdir(parents=True, exist_ok=True) + os.chmod(swarmctl_dir, 0o700) + cert_path = swarmctl_dir / "server.crt" + key_path = swarmctl_dir / "server.key" + ca_cert_path = ca_dir / "ca.crt" + + if cert_path.exists() and key_path.exists() and ca_cert_path.exists(): + return cert_path, key_path, ca_cert_path + + ca = ensure_ca(ca_dir) + sans = list({bind_host, "127.0.0.1", "localhost", *(extra_sans or [])}) + issued = issue_worker_cert(ca, "swarmctl", sans) + cert_path.write_bytes(issued.cert_pem) + key_path.write_bytes(issued.key_pem) + os.chmod(key_path, 0o600) + # ensure_ca already wrote ca.crt under ca_dir, but save_ca is only + # called on generate — re-mirror it here to guarantee the path exists. + if not ca_cert_path.exists(): + ca_cert_path.write_bytes(ca.cert_pem) + return cert_path, key_path, ca_cert_path + + def fingerprint(cert_pem: bytes) -> str: """SHA-256 hex fingerprint of a cert (DER-encoded).""" cert = x509.load_pem_x509_certificate(cert_pem) diff --git a/tests/swarm/test_pki.py b/tests/swarm/test_pki.py index 956ceba..874b41b 100644 --- a/tests/swarm/test_pki.py +++ b/tests/swarm/test_pki.py @@ -52,6 +52,38 @@ def test_load_worker_bundle_returns_none_if_missing(tmp_path: pathlib.Path) -> N assert pki.load_worker_bundle(tmp_path / "empty") is None +def test_ensure_swarmctl_cert_issues_from_same_ca(tmp_path: pathlib.Path) -> None: + ca_dir = tmp_path / "ca" + swarmctl_dir = tmp_path / "swarmctl" + cert_path, key_path, ca_path = pki.ensure_swarmctl_cert( + "0.0.0.0", ca_dir=ca_dir, swarmctl_dir=swarmctl_dir + ) + assert cert_path.exists() and key_path.exists() and ca_path.exists() + # Server cert is signed by the same CA that workers will ship — that's + # the whole point of the auto-issue path. + cert = x509.load_pem_x509_certificate(cert_path.read_bytes()) + ca_cert = x509.load_pem_x509_certificate(ca_path.read_bytes()) + assert cert.issuer == ca_cert.subject + san = cert.extensions.get_extension_for_class(x509.SubjectAlternativeName).value + ips = {str(v) for v in san.get_values_for_type(x509.IPAddress)} + dns = set(san.get_values_for_type(x509.DNSName)) + assert "0.0.0.0" in ips + assert "localhost" in dns + # Key perm is the same 0600 we enforce on worker.key. + assert (key_path.stat().st_mode & 0o777) == 0o600 + + +def test_ensure_swarmctl_cert_is_idempotent(tmp_path: pathlib.Path) -> None: + # Second call must NOT re-issue — otherwise a restart of swarmctl + # would rotate the server cert and break any worker mid-TLS-session. + ca_dir = tmp_path / "ca" + swarmctl_dir = tmp_path / "swarmctl" + first = pki.ensure_swarmctl_cert("0.0.0.0", ca_dir=ca_dir, swarmctl_dir=swarmctl_dir) + first_pem = first[0].read_bytes() + second = pki.ensure_swarmctl_cert("0.0.0.0", ca_dir=ca_dir, swarmctl_dir=swarmctl_dir) + assert second[0].read_bytes() == first_pem + + def test_fingerprint_stable_across_calls(tmp_path: pathlib.Path) -> None: ca = pki.ensure_ca(tmp_path / "ca") issued = pki.issue_worker_cert(ca, "worker-03", ["127.0.0.1"])