fix(swarm): mTLS client-cert authz on the swarm control plane
The swarm controller (port 8770) exposed 9 routes with zero app-layer auth, and swarmctl --tls defaulted off — anyone able to reach the port could enroll workers (minting CA-signed certs + private keys), deploy, or tear down the fleet. Two fail-closed layers: - require_operator_cert gates every operator route (enroll/deploy/ teardown/hosts/check/deckies). When mTLS is on, the peer cert's CN must be an operator identity (decnet-master/swarmctl); worker and updater@* certs are rejected. Plaintext loopback (single-host master) is accepted as the local operator — the docker.sock boundary. - swarmctl refuses to bind a routable interface without --tls, so a network-exposed plaintext control plane can never start. /heartbeat keeps its worker fingerprint pinning. Closes the two ASVS criticals (control-plane no-auth, unauthenticated cert minting).
This commit is contained in:
@@ -51,7 +51,9 @@ def client(repo, ca_dir: pathlib.Path):
|
||||
return repo
|
||||
|
||||
app.dependency_overrides[get_repo] = _override
|
||||
with TestClient(app) as c:
|
||||
# loopback client so /swarm/enroll (operator-gated) accepts the certless
|
||||
# local-operator path during test setup.
|
||||
with TestClient(app, client=("127.0.0.1", 50000)) as c:
|
||||
yield c
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
@@ -47,7 +47,8 @@ def client(repo, ca_dir: pathlib.Path):
|
||||
async def _override() -> Any:
|
||||
return repo
|
||||
app.dependency_overrides[get_repo] = _override
|
||||
with TestClient(app) as c:
|
||||
# loopback client so operator-gated /swarm/enroll accepts the local operator.
|
||||
with TestClient(app, client=("127.0.0.1", 50000)) as c:
|
||||
yield c
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
@@ -57,7 +57,8 @@ def client(repo, ca_dir):
|
||||
return repo
|
||||
|
||||
app.dependency_overrides[get_repo] = _override
|
||||
with TestClient(app) as c:
|
||||
# loopback client so operator-gated /swarm/enroll accepts the local operator.
|
||||
with TestClient(app, client=("127.0.0.1", 50000)) as c:
|
||||
yield c
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
@@ -79,9 +79,10 @@ def test_extract_fingerprint_works_on_non_cert_der() -> None:
|
||||
# ------------------------- require_operator_cert ---------------------------
|
||||
|
||||
|
||||
def _request_with(scope: dict) -> MagicMock:
|
||||
def _request_with(scope: dict, client_host: str | None = None) -> MagicMock:
|
||||
req = MagicMock()
|
||||
req.scope = scope
|
||||
req.client = None if client_host is None else MagicMock(host=client_host)
|
||||
return req
|
||||
|
||||
|
||||
@@ -96,10 +97,14 @@ def test_require_operator_accepts_swarmctl(ca) -> None:
|
||||
|
||||
|
||||
def test_require_operator_rejects_worker_cn(ca) -> None:
|
||||
# A worker cert is CA-signed but must not drive the control plane, even
|
||||
# from loopback — the CN gate fires before the loopback fallback.
|
||||
from fastapi import HTTPException
|
||||
|
||||
with pytest.raises(HTTPException) as ei:
|
||||
_mtls.require_operator_cert(_request_with(_scope_with(_der_for(ca, "worker-1"))))
|
||||
_mtls.require_operator_cert(
|
||||
_request_with(_scope_with(_der_for(ca, "worker-1")), client_host="127.0.0.1")
|
||||
)
|
||||
assert ei.value.status_code == 403
|
||||
|
||||
|
||||
@@ -111,10 +116,25 @@ def test_require_operator_rejects_updater_cn(ca) -> None:
|
||||
assert ei.value.status_code == 403
|
||||
|
||||
|
||||
def test_require_operator_rejects_no_cert() -> None:
|
||||
def test_require_operator_allows_certless_loopback() -> None:
|
||||
# Shipping default: plaintext loopback, no client cert → local operator.
|
||||
peer = _mtls.require_operator_cert(_request_with({}, client_host="127.0.0.1"))
|
||||
assert peer.cn is None and peer.sha256 == ""
|
||||
|
||||
|
||||
def test_require_operator_rejects_certless_non_loopback() -> None:
|
||||
# No cert from off-box → fail closed (the startup guard makes this
|
||||
# unreachable in practice, but defense in depth).
|
||||
from fastapi import HTTPException
|
||||
|
||||
with pytest.raises(HTTPException) as ei:
|
||||
_mtls.require_operator_cert(_request_with({}))
|
||||
_mtls.require_operator_cert(_request_with({}, client_host="10.0.0.9"))
|
||||
assert ei.value.status_code == 403
|
||||
|
||||
|
||||
def test_require_operator_rejects_certless_unknown_client() -> None:
|
||||
from fastapi import HTTPException
|
||||
|
||||
with pytest.raises(HTTPException) as ei:
|
||||
_mtls.require_operator_cert(_request_with({}, client_host=None))
|
||||
assert ei.value.status_code == 403
|
||||
assert "unavailable" in ei.value.detail
|
||||
|
||||
@@ -56,7 +56,10 @@ def client(repo, ca_dir: pathlib.Path):
|
||||
return repo
|
||||
|
||||
app.dependency_overrides[get_repo] = _override
|
||||
with TestClient(app) as c:
|
||||
# client=loopback so the operator-cert gate takes its certless-loopback
|
||||
# path (the shipping single-host default); TestClient otherwise reports
|
||||
# host "testclient", which the gate treats as off-box.
|
||||
with TestClient(app, client=("127.0.0.1", 50000)) as c:
|
||||
yield c
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
135
tests/swarm/test_swarm_authz.py
Normal file
135
tests/swarm/test_swarm_authz.py
Normal file
@@ -0,0 +1,135 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Authorization for the swarm control plane.
|
||||
|
||||
Two layers, both fail-closed:
|
||||
1. ``_guard_bind`` refuses a routable bind without --tls (CLI startup).
|
||||
2. ``require_operator_cert`` gates every controller endpoint (HTTP layer).
|
||||
|
||||
No live TLS: the off-box case is simulated by giving the TestClient a
|
||||
non-loopback client address with no peer cert in scope.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pathlib
|
||||
from typing import Any
|
||||
|
||||
import contextlib
|
||||
|
||||
import pytest
|
||||
import typer
|
||||
from fastapi.testclient import TestClient
|
||||
from typer.testing import CliRunner
|
||||
|
||||
from decnet.cli.swarmctl import _guard_bind
|
||||
from decnet.web.db.factory import get_repository
|
||||
from decnet.web.dependencies import get_repo
|
||||
|
||||
|
||||
# ------------------------- layer 1: bind guard ------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize("host", ["127.0.0.1", "::1", "localhost"])
|
||||
def test_guard_bind_allows_loopback_plaintext(host: str) -> None:
|
||||
_guard_bind(host, tls=False) # must not raise
|
||||
|
||||
|
||||
@pytest.mark.parametrize("host", ["0.0.0.0", "10.0.0.5", "192.168.1.10"])
|
||||
def test_guard_bind_allows_routable_with_tls(host: str) -> None:
|
||||
_guard_bind(host, tls=True) # mTLS makes a routable bind legitimate
|
||||
|
||||
|
||||
@pytest.mark.parametrize("host", ["0.0.0.0", "10.0.0.5"])
|
||||
def test_guard_bind_refuses_routable_plaintext(host: str) -> None:
|
||||
with pytest.raises(typer.Exit) as ei:
|
||||
_guard_bind(host, tls=False)
|
||||
assert ei.value.exit_code == 2
|
||||
|
||||
|
||||
def test_swarmctl_cli_refuses_routable_plaintext(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
# Wiring check: the guard fires before any subprocess is spawned.
|
||||
import subprocess
|
||||
|
||||
from decnet.cli import app
|
||||
|
||||
called = {"popen": False}
|
||||
|
||||
def _no_popen(*a: Any, **k: Any): # pragma: no cover - must not run
|
||||
called["popen"] = True
|
||||
raise AssertionError("subprocess.Popen must not be reached")
|
||||
|
||||
monkeypatch.setattr(subprocess, "Popen", _no_popen)
|
||||
result = CliRunner().invoke(app, ["swarmctl", "--host", "0.0.0.0", "--no-listener"])
|
||||
assert result.exit_code == 2
|
||||
assert called["popen"] is False
|
||||
|
||||
|
||||
# ------------------------- layer 2: endpoint operator gate ------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ca_dir(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
|
||||
ca = tmp_path / "ca"
|
||||
from decnet.swarm import pki
|
||||
from decnet.web.router.swarm import api_enroll_host as enroll_mod
|
||||
|
||||
monkeypatch.setattr(pki, "DEFAULT_CA_DIR", ca)
|
||||
monkeypatch.setattr(enroll_mod, "pki", pki)
|
||||
return ca
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch):
|
||||
r = get_repository(db_path=str(tmp_path / "authz.db"))
|
||||
import decnet.web.dependencies as deps
|
||||
import decnet.web.swarm_api as swarm_api_mod
|
||||
|
||||
monkeypatch.setattr(deps, "repo", r)
|
||||
monkeypatch.setattr(swarm_api_mod, "repo", r)
|
||||
return r
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _client(repo, client_addr: tuple[str, int]):
|
||||
# The `with TestClient(...)` form runs the controller lifespan, which
|
||||
# creates the swarm schema against the test repo.
|
||||
from decnet.web.swarm_api import app
|
||||
|
||||
async def _override() -> Any:
|
||||
return repo
|
||||
|
||||
app.dependency_overrides[get_repo] = _override
|
||||
try:
|
||||
with TestClient(app, client=client_addr) as c:
|
||||
yield c
|
||||
finally:
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
def test_offbox_certless_caller_is_refused_on_every_operator_route(
|
||||
repo, ca_dir: pathlib.Path
|
||||
) -> None:
|
||||
# No TLS peer cert + non-loopback client = an off-box attacker. Every
|
||||
# operator route must 403 (the bind guard makes this combination
|
||||
# unreachable in production, but the HTTP layer fails closed regardless).
|
||||
with _client(repo, ("10.0.0.99", 40000)) as c:
|
||||
assert c.post(
|
||||
"/swarm/enroll",
|
||||
json={"name": "evil", "address": "10.0.0.99", "agent_port": 8765},
|
||||
).status_code == 403
|
||||
assert c.get("/swarm/hosts").status_code == 403
|
||||
assert c.post("/swarm/check").status_code == 403
|
||||
assert c.get("/swarm/deckies").status_code == 403
|
||||
assert c.post("/swarm/teardown", json={}).status_code == 403
|
||||
|
||||
|
||||
def test_loopback_operator_is_allowed(repo, ca_dir: pathlib.Path) -> None:
|
||||
# The shipping single-host default: local operator over plaintext loopback.
|
||||
with _client(repo, ("127.0.0.1", 40000)) as c:
|
||||
enrolled = c.post(
|
||||
"/swarm/enroll",
|
||||
json={"name": "worker-ok", "address": "10.0.0.5", "agent_port": 8765},
|
||||
)
|
||||
assert enrolled.status_code == 201, enrolled.text
|
||||
listed = c.get("/swarm/hosts")
|
||||
assert listed.status_code == 200
|
||||
assert any(h["name"] == "worker-ok" for h in listed.json())
|
||||
Reference in New Issue
Block a user