The swarm controller (port 8770) exposed 9 routes with zero app-layer auth, and swarmctl --tls defaulted off — anyone able to reach the port could enroll workers (minting CA-signed certs + private keys), deploy, or tear down the fleet. Two fail-closed layers: - require_operator_cert gates every operator route (enroll/deploy/ teardown/hosts/check/deckies). When mTLS is on, the peer cert's CN must be an operator identity (decnet-master/swarmctl); worker and updater@* certs are rejected. Plaintext loopback (single-host master) is accepted as the local operator — the docker.sock boundary. - swarmctl refuses to bind a routable interface without --tls, so a network-exposed plaintext control plane can never start. /heartbeat keeps its worker fingerprint pinning. Closes the two ASVS criticals (control-plane no-auth, unauthenticated cert minting).
143 lines
6.7 KiB
Python
143 lines
6.7 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import signal
|
|
import subprocess # nosec B404
|
|
import sys
|
|
from typing import Optional
|
|
|
|
import typer
|
|
|
|
from . import utils as _utils
|
|
from .gating import _require_master_mode
|
|
from .utils import console, log
|
|
|
|
# Hosts that keep the controller on the master box itself. A routable bind
|
|
# (anything else, incl. 0.0.0.0) exposes the control plane to the network and
|
|
# MUST run mTLS — the app-layer operator gate trusts the transport to have
|
|
# verified a CA-signed client cert. See decnet/web/router/swarm/_mtls.py.
|
|
_LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost"})
|
|
|
|
|
|
def _guard_bind(host: str, tls: bool) -> None:
|
|
"""Fail closed: refuse to bind a routable interface without --tls.
|
|
|
|
On loopback the controller may run plaintext (single-operator master box,
|
|
same boundary as docker.sock); off-box it would be an UNAUTHENTICATED
|
|
control plane, so we hard-refuse to start.
|
|
"""
|
|
if host not in _LOOPBACK_HOSTS and not tls:
|
|
console.print(
|
|
f"[red]Refusing to bind the swarm controller to {host} without --tls.[/]"
|
|
)
|
|
console.print(
|
|
"[red]A routable bind without mTLS exposes an UNAUTHENTICATED control "
|
|
"plane (enroll / deploy / teardown).[/]"
|
|
)
|
|
console.print(
|
|
"[yellow]Re-run with --tls for mutual-TLS, or bind 127.0.0.1 for a "
|
|
"local-only master.[/]"
|
|
)
|
|
raise typer.Exit(code=2)
|
|
|
|
|
|
def register(app: typer.Typer) -> None:
|
|
@app.command()
|
|
def swarmctl(
|
|
port: int = typer.Option(
|
|
8770, "--port",
|
|
envvar="DECNET_SWARMCTL_PORT",
|
|
help="Port for the swarm controller. Defaults to [swarm] swarmctl-port from /etc/decnet/decnet.ini, else 8770.",
|
|
),
|
|
host: str = typer.Option(
|
|
"127.0.0.1", "--host",
|
|
envvar="DECNET_SWARMCTL_HOST",
|
|
help="Bind address for the swarm controller. Defaults to [swarm] swarmctl-host from /etc/decnet/decnet.ini, else 127.0.0.1.",
|
|
),
|
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
|
no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
|
|
tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
|
|
cert: Optional[str] = typer.Option(None, "--cert", help="BYOC: path to TLS server cert (PEM). Auto-issues from the DECNET CA if omitted."),
|
|
key: Optional[str] = typer.Option(None, "--key", help="BYOC: path to TLS server private key (PEM)."),
|
|
client_ca: Optional[str] = typer.Option(None, "--client-ca", help="CA bundle used to verify worker client certs. Defaults to the DECNET CA."),
|
|
) -> None:
|
|
"""Run the DECNET SWARM controller (master-side, separate process from `decnet api`).
|
|
|
|
By default, `decnet swarmctl` auto-spawns `decnet listener` as a fully-
|
|
detached sibling process so the master starts accepting forwarder
|
|
connections on 6514 without a second manual invocation. The listener
|
|
survives swarmctl restarts and crashes — if it dies on its own,
|
|
restart it manually with `decnet listener --daemon …`. Pass
|
|
--no-listener to skip.
|
|
|
|
Pass ``--tls`` to serve over HTTPS with mutual-TLS enforcement. By
|
|
default the server cert is auto-issued from the DECNET CA under
|
|
``~/.decnet/swarmctl/`` so enrolled workers (which already ship that
|
|
CA's ``ca.crt``) trust it out of the box. BYOC via ``--cert``/``--key``
|
|
if you need a publicly-trusted or externally-managed cert.
|
|
"""
|
|
_require_master_mode("swarmctl")
|
|
_guard_bind(host, tls)
|
|
if daemon:
|
|
log.info("swarmctl daemonizing host=%s port=%d", host, port)
|
|
_utils._daemonize()
|
|
|
|
if not no_listener:
|
|
listener_host = os.environ.get("DECNET_LISTENER_HOST", "0.0.0.0") # nosec B104
|
|
listener_port = int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))
|
|
lst_argv = [
|
|
sys.executable, "-m", "decnet", "listener",
|
|
"--host", listener_host,
|
|
"--port", str(listener_port),
|
|
"--daemon",
|
|
]
|
|
try:
|
|
pid = _utils._spawn_detached(lst_argv, _utils._pid_dir() / "listener.pid")
|
|
log.info("swarmctl auto-spawned listener pid=%d bind=%s:%d",
|
|
pid, listener_host, listener_port)
|
|
console.print(f"[dim]Auto-spawned listener (pid {pid}) on {listener_host}:{listener_port}.[/]")
|
|
except Exception as e: # noqa: BLE001
|
|
log.warning("swarmctl could not auto-spawn listener: %s", e)
|
|
console.print(f"[yellow]listener auto-spawn skipped: {e}[/]")
|
|
|
|
log.info("swarmctl command invoked host=%s port=%d tls=%s", host, port, tls)
|
|
scheme = "https" if tls else "http"
|
|
console.print(f"[green]Starting DECNET SWARM controller on {scheme}://{host}:{port}...[/]")
|
|
_cmd = [sys.executable, "-m", "uvicorn", "decnet.web.swarm_api:app",
|
|
"--host", host, "--port", str(port)]
|
|
if tls:
|
|
from decnet.swarm import pki as _pki
|
|
if cert and key:
|
|
cert_path, key_path = cert, key
|
|
elif cert or key:
|
|
console.print("[red]--cert and --key must be provided together.[/]")
|
|
raise typer.Exit(code=2)
|
|
else:
|
|
auto_cert, auto_key, _auto_ca = _pki.ensure_swarmctl_cert(host)
|
|
cert_path, key_path = str(auto_cert), str(auto_key)
|
|
console.print(f"[dim]Auto-issued swarmctl server cert → {cert_path}[/]")
|
|
ca_path = client_ca or str(_pki.DEFAULT_CA_DIR / "ca.crt")
|
|
_cmd += [
|
|
"--ssl-keyfile", key_path,
|
|
"--ssl-certfile", cert_path,
|
|
"--ssl-ca-certs", ca_path,
|
|
"--ssl-cert-reqs", "2",
|
|
]
|
|
try:
|
|
proc = subprocess.Popen(_cmd, start_new_session=True) # nosec B603 B404
|
|
try:
|
|
proc.wait()
|
|
except KeyboardInterrupt:
|
|
try:
|
|
os.killpg(proc.pid, signal.SIGTERM)
|
|
try:
|
|
proc.wait(timeout=10)
|
|
except subprocess.TimeoutExpired:
|
|
os.killpg(proc.pid, signal.SIGKILL)
|
|
proc.wait()
|
|
except ProcessLookupError:
|
|
pass
|
|
except (FileNotFoundError, subprocess.SubprocessError):
|
|
console.print("[red]Failed to start swarmctl. Ensure 'uvicorn' is installed in the current environment.[/]")
|