merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

0
tests/swarm/__init__.py Normal file
View File

View File

@@ -0,0 +1,95 @@
"""Agent FastAPI app — static/contract checks only.
We deliberately do NOT spin uvicorn up in-process here: the mTLS layer is
enforced by uvicorn itself (via --ssl-cert-reqs 2) and is validated in the
VM integration suite. What we CAN assert in unit scope is the route
surface + request/response schema.
"""
from __future__ import annotations
from fastapi.testclient import TestClient
from decnet.agent.app import app
def test_health_endpoint() -> None:
client = TestClient(app)
resp = client.get("/health")
assert resp.status_code == 200
assert resp.json() == {"status": "ok"}
def test_status_when_not_deployed() -> None:
client = TestClient(app)
resp = client.get("/status")
assert resp.status_code == 200
body = resp.json()
assert "deployed" in body
assert "deckies" in body
def test_mutate_is_501() -> None:
client = TestClient(app)
resp = client.post("/mutate", json={"decky_id": "decky-01", "services": ["ssh"]})
assert resp.status_code == 501
def test_deploy_rejects_malformed_body() -> None:
client = TestClient(app)
resp = client.post("/deploy", json={"not": "a config"})
assert resp.status_code == 422 # pydantic validation
def test_route_set() -> None:
paths = {r.path for r in app.routes if hasattr(r, "path")}
assert {"/health", "/status", "/deploy", "/teardown", "/mutate", "/self-destruct"} <= paths
def test_self_destruct_spawns_reaper_and_returns_fast(monkeypatch, tmp_path) -> None:
"""/self-destruct must write the reaper script and spawn it detached
(start_new_session=True). We intercept Popen so the test doesn't
actually nuke anything."""
from decnet.agent import executor as _exec
spawned: list[dict] = []
class _FakePopen:
def __init__(self, args, **kw):
spawned.append({"args": args, "kw": kw})
monkeypatch.setattr(_exec, "_deployer", type("X", (), {
"teardown": staticmethod(lambda _id: None),
})())
monkeypatch.setattr(_exec, "clear_state", lambda: None)
import subprocess as _sp
monkeypatch.setattr(_sp, "Popen", _FakePopen)
client = TestClient(app)
resp = client.post("/self-destruct")
assert resp.status_code == 200
assert resp.json()["status"] == "self_destruct_scheduled"
assert len(spawned) == 1
assert spawned[0]["kw"].get("start_new_session") is True
script_candidates = [
a for a in spawned[0]["args"]
if isinstance(a, str) and a.startswith("/tmp/decnet-reaper-")
]
assert len(script_candidates) == 1, spawned[0]["args"]
script_path = script_candidates[0]
# Reaper content sanity check — covers the paths the operator asked for.
import pathlib
body = pathlib.Path(script_path).read_text()
assert "/opt/decnet*" in body
assert "/etc/systemd/system/decnet-" in body
assert "/var/lib/decnet/*" in body
assert "/usr/local/bin/decnet*" in body
assert "/etc/decnet" in body
# Logs must be preserved — no `rm` line should touch /var/log.
for line in body.splitlines():
stripped = line.strip()
if stripped.startswith("#") or not stripped:
continue
if stripped.startswith("rm "):
assert "/var/log" not in stripped
pathlib.Path(script_path).unlink(missing_ok=True)

View File

@@ -0,0 +1,122 @@
"""Tests for the worker-side heartbeat loop (decnet.agent.heartbeat)."""
from __future__ import annotations
import asyncio
from typing import Any
import httpx
import pytest
from decnet.agent import heartbeat as hb
@pytest.fixture(autouse=True)
def _reset_module_task(monkeypatch: pytest.MonkeyPatch):
# Each test gets a fresh _task slot so start()/stop() state doesn't
# leak between cases.
monkeypatch.setattr(hb, "_task", None)
yield
monkeypatch.setattr(hb, "_task", None)
class _StubTransport(httpx.AsyncBaseTransport):
"""Record each POST and respond according to ``responder(req)``."""
def __init__(self, responder):
self.calls: list[dict[str, Any]] = []
self._responder = responder
async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
body = request.read()
self.calls.append({"url": str(request.url), "body": body})
return self._responder(request)
@pytest.mark.asyncio
async def test_tick_posts_status_snapshot_and_accepts_204(monkeypatch) -> None:
async def fake_status() -> dict:
return {"deployed": False, "deckies": []}
monkeypatch.setattr(hb._exec, "status", fake_status)
transport = _StubTransport(lambda req: httpx.Response(204))
async with httpx.AsyncClient(transport=transport) as client:
await hb._tick(client, "https://m/swarm/heartbeat", "uuid-a", "1.2.3")
assert len(transport.calls) == 1
import json
payload = json.loads(transport.calls[0]["body"])
assert payload["host_uuid"] == "uuid-a"
assert payload["agent_version"] == "1.2.3"
assert payload["status"]["deployed"] is False
@pytest.mark.asyncio
async def test_tick_logs_on_non_204_response(monkeypatch, caplog) -> None:
async def fake_status() -> dict:
return {"deployed": False}
monkeypatch.setattr(hb._exec, "status", fake_status)
transport = _StubTransport(lambda req: httpx.Response(403, text="mismatch"))
async with httpx.AsyncClient(transport=transport) as client:
with caplog.at_level("WARNING", logger="agent.heartbeat"):
await hb._tick(client, "https://m/swarm/heartbeat", "uuid-a", "1.2.3")
assert any("rejected" in rec.getMessage() for rec in caplog.records)
def test_start_is_noop_when_identity_missing(monkeypatch) -> None:
# Neither DECNET_HOST_UUID nor DECNET_MASTER_HOST set → start() must
# return None, never raise. Dev runs exercise this path every time.
import decnet.env as env
monkeypatch.setattr(env, "DECNET_HOST_UUID", None)
monkeypatch.setattr(env, "DECNET_MASTER_HOST", None)
assert hb.start() is None
assert hb._task is None
@pytest.mark.asyncio
async def test_start_is_noop_when_ssl_context_unavailable(
monkeypatch, tmp_path
) -> None:
# Identity plumbed, but worker bundle missing on disk → start() logs
# and bails instead of crashing the FastAPI app.
import decnet.env as env
monkeypatch.setattr(env, "DECNET_HOST_UUID", "uuid-a")
monkeypatch.setattr(env, "DECNET_MASTER_HOST", "master.lan")
monkeypatch.setattr(env, "DECNET_SWARMCTL_PORT", 8770)
monkeypatch.setenv("DECNET_AGENT_DIR", str(tmp_path / "empty"))
assert hb.start() is None
@pytest.mark.asyncio
async def test_loop_keeps_ticking_after_5xx_failures(monkeypatch) -> None:
# Simulates a flapping master: first two ticks raise/5xx, third succeeds.
# The loop must not crash — it must sleep and retry.
call_count = {"n": 0}
def _responder(req):
call_count["n"] += 1
if call_count["n"] < 3:
return httpx.Response(503, text="unavailable")
return httpx.Response(204)
async def fake_status() -> dict:
return {"deployed": False}
monkeypatch.setattr(hb._exec, "status", fake_status)
monkeypatch.setattr(hb, "INTERVAL_S", 0.01) # fast-forward the sleep
transport = _StubTransport(_responder)
async def _run():
async with httpx.AsyncClient(transport=transport) as client:
while call_count["n"] < 3:
try:
await hb._tick(client, "https://m/swarm/heartbeat", "uuid-a", "1.2.3")
except Exception:
pass
await asyncio.sleep(0.01)
await asyncio.wait_for(_run(), timeout=2.0)
assert call_count["n"] >= 3

View File

@@ -0,0 +1,147 @@
"""Step 8 regression: the agent must NEVER auto-restore a topology on boot.
Guiding rule: master is authoritative, agent is a dumb executor. If an
agent restarts with a stale applied_topology row in its local cache, it
must not try to replay `docker-compose up` on its own — that would
create a split-brain where a decommissioned topology suddenly reappears
without the master's consent. Instead the agent simply reports whatever
it has via GET /topology/state + heartbeat; master decides whether to
re-push.
"""
from __future__ import annotations
import json
import sqlite3
from pathlib import Path
import pytest
from fastapi.testclient import TestClient
from decnet.agent import app as agent_app
from decnet.agent.topology_store import TopologyStore
def _seed_applied_row(db_path: Path, topology_id: str, hash_: str) -> None:
"""Write a row directly — simulates a pre-existing cache from a
previous process lifecycle."""
db_path.parent.mkdir(parents=True, exist_ok=True)
store = TopologyStore(db_path)
try:
store.put(topology_id, hash_, {"topology": {"id": topology_id}})
finally:
store.close()
@pytest.fixture
def agent_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
d = tmp_path / "agent"
d.mkdir()
monkeypatch.setenv("DECNET_AGENT_DIR", str(d))
# Reset the module-level cached store so the new DECNET_AGENT_DIR
# is honoured for this test.
monkeypatch.setattr(agent_app, "_topology_store", None)
return d
def test_lifespan_startup_does_not_touch_docker(
agent_dir: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Seed a populated topology.db, spin up the agent app, and verify
docker.from_env was never called during startup — the agent must
wait for master instructions, not self-heal from local state."""
_seed_applied_row(agent_dir / "topology.db", "stale-tid", "stale-hash")
calls: list[str] = []
def _boom(*_a, **_k):
calls.append("docker.from_env")
raise AssertionError("agent must not touch docker during startup")
import docker as _docker
monkeypatch.setattr(_docker, "from_env", _boom)
# Bringing up the lifespan is what would run any auto-restore hook.
with TestClient(agent_app.app) as client:
# Sanity: health is live, no apply was triggered.
r = client.get("/health")
assert r.status_code == 200
assert calls == [], "docker was contacted during agent boot"
def test_get_topology_state_reflects_cache_without_replay(
agent_dir: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""GET /topology/state must return the stored hash/id unchanged.
It may also attempt to *observe* live docker state (read-only) — we
stub that so no real docker is required — but it must NEVER
re-materialise bridges/containers from the cache."""
_seed_applied_row(agent_dir / "topology.db", "t-boot", "h-boot")
class _StubDocker:
class networks:
@staticmethod
def list(): return []
class containers:
@staticmethod
def list(all=False): return []
import docker as _docker
monkeypatch.setattr(_docker, "from_env", lambda: _StubDocker)
with TestClient(agent_app.app) as client:
r = client.get("/topology/state")
assert r.status_code == 200, r.text
body = r.json()
assert body["topology_id"] == "t-boot"
assert body["applied_version_hash"] == "h-boot"
# observed is read-only — empty live state is fine, it's what the
# master uses to decide whether to re-push.
assert body["observed"] == {"bridges": [], "containers": []}
def test_topology_store_has_no_restore_hook() -> None:
"""Static guard: if someone adds a `restore()` / `replay()` method
to TopologyStore this test will fail, forcing them to re-read the
module docstring and the Step 8 rationale before merging."""
forbidden = {"restore", "replay", "reapply", "rehydrate", "auto_restore"}
present = {n for n in dir(TopologyStore) if not n.startswith("_")}
overlap = forbidden & present
assert not overlap, (
f"TopologyStore must stay a passive cache — found {overlap}. "
"The agent never self-heals; master decides."
)
def test_seeded_db_survives_process_restart_verbatim(tmp_path: Path) -> None:
"""Opening a pre-populated store in a fresh process yields the same
row — no on-open mutation, no stale-row scrubbing. This is the
behavior the master relies on for the 'agent reports old hash →
needs_resync' detection path."""
db = tmp_path / "t.db"
# Process 1.
s1 = TopologyStore(db)
s1.put("t-x", "h-x", {"topology": {"id": "t-x"}})
s1.close()
# Raw sqlite read — confirms nothing in the file rewrites itself
# between opens.
with sqlite3.connect(str(db)) as raw:
row = raw.execute(
"SELECT topology_id, applied_version_hash, hydrated_blob_json"
" FROM applied_topology"
).fetchone()
assert row[0] == "t-x"
assert row[1] == "h-x"
assert json.loads(row[2]) == {"topology": {"id": "t-x"}}
# Process 2 (new store, same file).
s2 = TopologyStore(db)
try:
cur = s2.current()
assert cur is not None
assert cur.topology_id == "t-x"
assert cur.applied_version_hash == "h-x"
finally:
s2.close()

View File

@@ -0,0 +1,118 @@
"""Worker agent re-localizes master-built configs to its own NIC/subnet.
The master ships a DecnetConfig populated from *its own* network (master
NIC name, master subnet, master-chosen decky IPs). The worker cannot run
the deployer against that as-is: `ip addr show <master-nic>` blows up on
any worker whose NIC differs from the master's, which is ~always the
case in a heterogeneous fleet.
The agent's executor overrides interface/subnet/gateway/host_ip with
locally-detected values before calling into the deployer, and if the
subnet doesn't match, it re-allocates decky IPs from the local subnet.
"""
from __future__ import annotations
import pytest
from decnet.agent import executor
from decnet.models import DecnetConfig, DeckyConfig
def _cfg(subnet: str, interface: str = "wlp6s0") -> DecnetConfig:
return DecnetConfig(
mode="swarm",
interface=interface,
subnet=subnet,
gateway=subnet.rsplit(".", 1)[0] + ".1",
deckies=[
DeckyConfig(
name=f"decky-0{i}",
ip=subnet.rsplit(".", 1)[0] + f".{10 + i}",
services=["ssh"],
distro="debian",
base_image="debian:bookworm-slim",
hostname=f"decky-0{i}",
)
for i in range(1, 3)
],
)
def test_relocalize_swaps_interface_and_subnet(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(executor, "detect_interface", lambda: "enp0s3")
monkeypatch.setattr(executor, "detect_subnet", lambda _i: ("10.0.0.0/24", "10.0.0.1"))
monkeypatch.setattr(executor, "get_host_ip", lambda _i: "10.0.0.99")
monkeypatch.setattr(
executor, "allocate_ips",
lambda **kw: [f"10.0.0.{20 + i}" for i in range(kw["count"])],
)
incoming = _cfg("192.168.1.0/24")
out = executor._relocalize(incoming)
assert out.interface == "enp0s3"
assert out.subnet == "10.0.0.0/24"
assert out.gateway == "10.0.0.1"
# Subnet changed → IPs re-allocated from the worker's subnet.
assert [d.ip for d in out.deckies] == ["10.0.0.20", "10.0.0.21"]
# Non-network fields survive.
assert [d.name for d in out.deckies] == ["decky-01", "decky-02"]
assert [d.services for d in out.deckies] == [["ssh"], ["ssh"]]
def test_relocalize_keeps_ips_when_subnet_matches(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(executor, "detect_interface", lambda: "enp0s3")
monkeypatch.setattr(executor, "detect_subnet", lambda _i: ("192.168.1.0/24", "192.168.1.1"))
monkeypatch.setattr(executor, "get_host_ip", lambda _i: "192.168.1.50")
# allocate_ips should NOT be called in the matching-subnet branch.
def _fail(**_kw): # pragma: no cover
raise AssertionError("allocate_ips must not be called when subnets match")
monkeypatch.setattr(executor, "allocate_ips", _fail)
incoming = _cfg("192.168.1.0/24")
out = executor._relocalize(incoming)
assert out.interface == "enp0s3"
assert out.subnet == "192.168.1.0/24"
# Decky IPs preserved verbatim.
assert [d.ip for d in out.deckies] == ["192.168.1.11", "192.168.1.12"]
@pytest.mark.asyncio
async def test_deploy_relocalizes_before_calling_deployer(monkeypatch: pytest.MonkeyPatch) -> None:
"""End-to-end: agent.deploy(..) must not pass the master's interface
through to the blocking deployer."""
monkeypatch.setattr(executor, "detect_interface", lambda: "enp0s3")
monkeypatch.setattr(executor, "detect_subnet", lambda _i: ("192.168.1.0/24", "192.168.1.1"))
monkeypatch.setattr(executor, "get_host_ip", lambda _i: "192.168.1.50")
seen: dict = {}
def _fake_deploy(cfg, dry_run, no_cache, parallel):
seen["interface"] = cfg.interface
seen["subnet"] = cfg.subnet
monkeypatch.setattr(executor._deployer, "deploy", _fake_deploy)
await executor.deploy(_cfg("192.168.1.0/24", interface="wlp6s0-master"), dry_run=True)
assert seen == {"interface": "enp0s3", "subnet": "192.168.1.0/24"}
@pytest.mark.asyncio
async def test_deploy_unihost_mode_skips_relocalize(monkeypatch: pytest.MonkeyPatch) -> None:
"""Unihost configs have already been built against the local box — we
must not second-guess them."""
def _fail(*_a, **_kw): # pragma: no cover
raise AssertionError("detect_interface must not be called for unihost")
monkeypatch.setattr(executor, "detect_interface", _fail)
captured: dict = {}
def _fake_deploy(cfg, dry_run, no_cache, parallel):
captured["interface"] = cfg.interface
monkeypatch.setattr(executor._deployer, "deploy", _fake_deploy)
cfg = _cfg("192.168.1.0/24", interface="eth0").model_copy(update={"mode": "unihost"})
await executor.deploy(cfg, dry_run=True)
assert captured["interface"] == "eth0"

View File

@@ -0,0 +1,168 @@
"""Agent topology endpoints — contract-level tests with mocked ops."""
from __future__ import annotations
import pathlib
import pytest
from fastapi.testclient import TestClient
from decnet.agent import app as _agent_app
from decnet.agent import topology_ops as _ops
from decnet.agent.topology_store import AlreadyApplied
@pytest.fixture(autouse=True)
def _isolate_store(monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path):
"""Point the singleton at a tmp dir and reset it between tests."""
monkeypatch.setenv("DECNET_AGENT_DIR", str(tmp_path))
# Force a fresh store per test.
if _agent_app._topology_store is not None:
_agent_app._topology_store.close()
_agent_app._topology_store = None
yield
if _agent_app._topology_store is not None:
_agent_app._topology_store.close()
_agent_app._topology_store = None
def _hydrated(topology_id: str = "top-1") -> dict:
return {
"topology": {"id": topology_id, "name": "n", "mode": "agent"},
"lans": [],
"deckies": [],
"edges": [],
}
def test_topology_state_idle() -> None:
client = TestClient(_agent_app.app)
resp = client.get("/topology/state")
assert resp.status_code == 200
body = resp.json()
assert body["topology_id"] is None
assert body["applied_version_hash"] is None
assert "observed" in body
def test_topology_apply_routes_to_ops(monkeypatch: pytest.MonkeyPatch) -> None:
called: dict = {}
async def _fake_apply(hydrated, version_hash, store):
called["hydrated"] = hydrated
called["version_hash"] = version_hash
# Simulate ops bookkeeping.
store.put(hydrated["topology"]["id"], version_hash, hydrated)
monkeypatch.setattr(_ops, "apply", _fake_apply)
client = TestClient(_agent_app.app)
resp = client.post(
"/topology/apply",
json={"hydrated": _hydrated(), "version_hash": "abc"},
)
assert resp.status_code == 200, resp.text
assert resp.json() == {"status": "applied", "version_hash": "abc"}
assert called["version_hash"] == "abc"
def test_topology_apply_hash_mismatch_is_400(monkeypatch: pytest.MonkeyPatch) -> None:
async def _boom(*_a, **_kw):
raise _ops.HashMismatch("master hash != agent hash")
monkeypatch.setattr(_ops, "apply", _boom)
client = TestClient(_agent_app.app)
resp = client.post(
"/topology/apply",
json={"hydrated": _hydrated(), "version_hash": "wrong"},
)
assert resp.status_code == 400
assert "hash" in resp.json()["detail"].lower()
def test_topology_apply_conflict_is_409(monkeypatch: pytest.MonkeyPatch) -> None:
async def _boom(*_a, **_kw):
raise AlreadyApplied("another topology already applied")
monkeypatch.setattr(_ops, "apply", _boom)
client = TestClient(_agent_app.app)
resp = client.post(
"/topology/apply",
json={"hydrated": _hydrated("top-2"), "version_hash": "h"},
)
assert resp.status_code == 409
def test_topology_apply_docker_failure_is_500_and_records_error(
monkeypatch: pytest.MonkeyPatch,
) -> None:
async def _boom(*_a, **_kw):
raise RuntimeError("docker down")
monkeypatch.setattr(_ops, "apply", _boom)
# Stub docker.from_env for the /topology/state observed() call so
# the state endpoint doesn't need a real daemon.
class _StubDocker:
class networks:
@staticmethod
def list(): return []
class containers:
@staticmethod
def list(all=False): return []
import docker as _docker
monkeypatch.setattr(_docker, "from_env", lambda: _StubDocker)
client = TestClient(_agent_app.app)
resp = client.post(
"/topology/apply",
json={"hydrated": _hydrated("top-err"), "version_hash": "h"},
)
assert resp.status_code == 500
assert "docker down" in resp.json()["detail"]
# The error must be persisted so GET /topology/state surfaces it,
# and the stored hash stays empty so master's heartbeat check flags
# the topology for resync rather than assuming it's applied.
state = client.get("/topology/state").json()
assert state["topology_id"] == "top-err"
assert state["applied_version_hash"] == ""
assert state["last_error"] == "docker down"
def test_topology_teardown_routes_to_ops(monkeypatch: pytest.MonkeyPatch) -> None:
called: dict = {}
async def _fake_teardown(topology_id, store):
called["topology_id"] = topology_id
store.clear(topology_id)
monkeypatch.setattr(_ops, "teardown", _fake_teardown)
client = TestClient(_agent_app.app)
resp = client.post(
"/topology/teardown", json={"topology_id": "top-gone"}
)
assert resp.status_code == 200
assert called["topology_id"] == "top-gone"
def test_topology_teardown_failure_is_500(monkeypatch: pytest.MonkeyPatch) -> None:
async def _boom(*_a, **_kw):
raise RuntimeError("compose refused")
monkeypatch.setattr(_ops, "teardown", _boom)
client = TestClient(_agent_app.app)
resp = client.post(
"/topology/teardown", json={"topology_id": "top-1"}
)
assert resp.status_code == 500
def test_routes_registered() -> None:
paths = {r.path for r in _agent_app.app.routes if hasattr(r, "path")}
assert {"/topology/apply", "/topology/teardown", "/topology/state"} <= paths

View File

@@ -0,0 +1,160 @@
"""Tests for :mod:`decnet.agent.topology_store`."""
from __future__ import annotations
import pathlib
import pytest
from decnet.agent.topology_store import (
AlreadyApplied,
TopologyStore,
observed,
)
def _store(tmp_path: pathlib.Path) -> TopologyStore:
return TopologyStore(tmp_path / "topology.db")
def test_idle_by_default(tmp_path: pathlib.Path) -> None:
s = _store(tmp_path)
assert s.current() is None
s.close()
def test_put_then_current(tmp_path: pathlib.Path) -> None:
s = _store(tmp_path)
s.put("t1", "hash-a", {"topology": {"id": "t1"}, "lans": []})
row = s.current()
assert row is not None
assert row.topology_id == "t1"
assert row.applied_version_hash == "hash-a"
assert row.hydrated["topology"]["id"] == "t1"
assert row.last_error is None
s.close()
def test_put_same_id_is_idempotent_update(tmp_path: pathlib.Path) -> None:
s = _store(tmp_path)
s.put("t1", "hash-a", {"k": 1})
s.put("t1", "hash-b", {"k": 2})
row = s.current()
assert row is not None
assert row.applied_version_hash == "hash-b"
assert row.hydrated == {"k": 2}
s.close()
def test_put_different_id_rejected(tmp_path: pathlib.Path) -> None:
s = _store(tmp_path)
s.put("t1", "hash-a", {})
with pytest.raises(AlreadyApplied):
s.put("t2", "hash-b", {})
s.close()
def test_record_error_then_put_clears(tmp_path: pathlib.Path) -> None:
s = _store(tmp_path)
s.put("t1", "h", {})
s.record_error("t1", "kaboom")
assert s.current().last_error == "kaboom"
# Re-applying clears the error flag.
s.put("t1", "h2", {})
assert s.current().last_error is None
s.close()
def test_record_error_upserts_when_no_prior_row(tmp_path: pathlib.Path) -> None:
"""Apply failure mid-materialise: put() hasn't written a row yet but
we still want the error surfaced on GET /topology/state and the
next heartbeat. The marker uses empty hash so master sees drift."""
s = _store(tmp_path)
s.record_error("t-fail", "docker refused connection")
row = s.current()
assert row is not None
assert row.topology_id == "t-fail"
assert row.applied_version_hash == ""
assert row.applied_at == 0
assert row.last_error == "docker refused connection"
s.close()
def test_record_error_then_successful_put_replaces_marker(tmp_path: pathlib.Path) -> None:
"""Once a retry succeeds, the marker row must be replaced with a
real applied row — no stale error or empty hash left behind."""
s = _store(tmp_path)
s.record_error("t-retry", "first try failed")
s.put("t-retry", "real-hash", {"topology": {"id": "t-retry"}})
row = s.current()
assert row.applied_version_hash == "real-hash"
assert row.last_error is None
assert row.applied_at > 0
s.close()
def test_clear(tmp_path: pathlib.Path) -> None:
s = _store(tmp_path)
s.put("t1", "h", {})
s.clear("t1")
assert s.current() is None
# Clearing a missing id is a no-op (teardown idempotency).
s.clear("t1")
s.close()
def test_persists_across_reopen(tmp_path: pathlib.Path) -> None:
s = _store(tmp_path)
s.put("t1", "h", {"x": 1})
s.close()
s2 = _store(tmp_path)
row = s2.current()
assert row is not None
assert row.topology_id == "t1"
s2.close()
# -------------------------------------------------------- observed() helper
class _FakeNet:
def __init__(self, name: str, driver: str) -> None:
self.name = name
self.attrs = {"Driver": driver}
class _FakeContainer:
def __init__(self, name: str) -> None:
self.name = name
class _FakeDocker:
def __init__(self, nets, containers) -> None:
self.networks = type("N", (), {"list": lambda _self: nets})()
self.containers = type(
"C", (), {"list": lambda _self, all=False: containers}
)()
def test_observed_filters_by_prefix() -> None:
nets = [
_FakeNet("decnet-topology-abc", "bridge"),
_FakeNet("bridge", "bridge"),
_FakeNet("decnet-topology-xyz", "overlay"), # wrong driver — filtered
]
containers = [_FakeContainer("decnet-deaddeck"), _FakeContainer("sshd")]
snap = observed(_FakeDocker(nets, containers))
assert snap == {
"bridges": ["decnet-topology-abc"],
"containers": ["decnet-deaddeck"],
}
def test_observed_reports_error_on_failure() -> None:
class _Broken:
@property
def networks(self):
raise RuntimeError("docker down")
snap = observed(_Broken())
assert "error" in snap
assert "docker down" in snap["error"]

View File

@@ -0,0 +1,39 @@
"""CLI surface for `decnet forwarder`. Only guard clauses — the async
loop itself is covered by tests/swarm/test_log_forwarder.py."""
from __future__ import annotations
import pathlib
import pytest
from typer.testing import CliRunner
from decnet.cli import app
runner = CliRunner()
def test_forwarder_requires_master_host(monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path) -> None:
monkeypatch.delenv("DECNET_SWARM_MASTER_HOST", raising=False)
# Also patch the already-imported module-level constant.
monkeypatch.setattr("decnet.env.DECNET_SWARM_MASTER_HOST", None, raising=False)
result = runner.invoke(app, ["forwarder", "--log-file", str(tmp_path / "decnet.log")])
assert result.exit_code == 2
assert "master-host" in result.output
def test_forwarder_requires_bundle(monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path) -> None:
agent_dir = tmp_path / "agent" # empty
log_file = tmp_path / "decnet.log"
log_file.write_text("")
result = runner.invoke(
app,
[
"forwarder",
"--master-host", "127.0.0.1",
"--log-file", str(log_file),
"--agent-dir", str(agent_dir),
],
)
assert result.exit_code == 2
assert "bundle" in result.output

View File

@@ -0,0 +1,292 @@
"""CLI `decnet swarm {enroll,list,decommission}` + `deploy --mode swarm`.
Controller HTTP is stubbed via monkeypatching `_http_request`; we aren't
testing the controller (that has its own test file) or httpx itself. We
*are* testing: arg parsing, URL construction, round-robin sharding of
deckies, bundle file output, error paths when the controller rejects.
"""
from __future__ import annotations
import json
import pathlib
from typing import Any
import pytest
from typer.testing import CliRunner
from decnet import cli as cli_mod
from decnet.cli import app, deploy as cli_deploy, utils as cli_utils
runner = CliRunner()
class _FakeResp:
def __init__(self, payload: Any, status: int = 200):
self._payload = payload
self.status_code = status
self.text = json.dumps(payload) if not isinstance(payload, str) else payload
def json(self) -> Any:
return self._payload
class _HttpStub(list):
"""Both a call log and a scripted-reply registry."""
def __init__(self) -> None:
super().__init__()
self.script: dict[tuple[str, str], _FakeResp] = {}
@pytest.fixture
def http_stub(monkeypatch: pytest.MonkeyPatch) -> _HttpStub:
calls = _HttpStub()
def _fake(method, url, *, json_body=None, timeout=30.0):
calls.append((method, url, json_body))
for (m, suffix), resp in calls.script.items():
if m == method and url.endswith(suffix):
return resp
raise AssertionError(f"Unscripted HTTP call: {method} {url}")
monkeypatch.setattr(cli_utils, "_http_request", _fake)
return calls
# ------------------------------------------------------------- swarm list
def test_swarm_list_empty(http_stub) -> None:
http_stub.script[("GET", "/swarm/hosts")] = _FakeResp([])
result = runner.invoke(app, ["swarm", "list"])
assert result.exit_code == 0
assert "No workers" in result.output
def test_swarm_list_with_rows(http_stub) -> None:
http_stub.script[("GET", "/swarm/hosts")] = _FakeResp([
{"uuid": "u1", "name": "decky01", "address": "10.0.0.1",
"agent_port": 8765, "status": "active", "last_heartbeat": None,
"enrolled_at": "2026-04-18T00:00:00Z", "notes": None,
"client_cert_fingerprint": "ab:cd"},
])
result = runner.invoke(app, ["swarm", "list"])
assert result.exit_code == 0
assert "decky01" in result.output
assert "10.0.0.1" in result.output
def test_swarm_list_passes_status_filter(http_stub) -> None:
http_stub.script[("GET", "/swarm/hosts?host_status=active")] = _FakeResp([])
result = runner.invoke(app, ["swarm", "list", "--status", "active"])
assert result.exit_code == 0
# last call URL ended with the filter suffix
assert http_stub[-1][1].endswith("/swarm/hosts?host_status=active")
# ------------------------------------------------------------- swarm enroll
def test_swarm_enroll_writes_bundle(http_stub, tmp_path: pathlib.Path) -> None:
http_stub.script[("POST", "/swarm/enroll")] = _FakeResp({
"host_uuid": "u-123", "name": "decky01", "address": "10.0.0.1",
"agent_port": 8765, "fingerprint": "de:ad:be:ef",
"ca_cert_pem": "CA-PEM", "worker_cert_pem": "CRT-PEM",
"worker_key_pem": "KEY-PEM",
})
out = tmp_path / "bundle"
result = runner.invoke(app, [
"swarm", "enroll",
"--name", "decky01", "--address", "10.0.0.1",
"--sans", "decky01.lan,10.0.0.1",
"--out-dir", str(out),
])
assert result.exit_code == 0, result.output
assert (out / "ca.crt").read_text() == "CA-PEM"
assert (out / "worker.crt").read_text() == "CRT-PEM"
assert (out / "worker.key").read_text() == "KEY-PEM"
# SANs were forwarded in the JSON body.
_, _, body = http_stub[0]
assert body["sans"] == ["decky01.lan", "10.0.0.1"]
# ------------------------------------------------------------- swarm check
def test_swarm_check_prints_table(http_stub) -> None:
http_stub.script[("POST", "/swarm/check")] = _FakeResp({
"results": [
{"host_uuid": "u-a", "name": "decky01", "address": "10.0.0.1",
"reachable": True, "detail": {"status": "ok"}},
{"host_uuid": "u-b", "name": "decky02", "address": "10.0.0.2",
"reachable": False, "detail": "connection refused"},
]
})
result = runner.invoke(app, ["swarm", "check"])
assert result.exit_code == 0, result.output
assert "decky01" in result.output
assert "decky02" in result.output
# Both reachable=true and reachable=false render.
assert "yes" in result.output.lower()
assert "no" in result.output.lower()
def test_swarm_check_empty(http_stub) -> None:
http_stub.script[("POST", "/swarm/check")] = _FakeResp({"results": []})
result = runner.invoke(app, ["swarm", "check"])
assert result.exit_code == 0
assert "No workers" in result.output
def test_swarm_check_json_output(http_stub) -> None:
http_stub.script[("POST", "/swarm/check")] = _FakeResp({
"results": [
{"host_uuid": "u-a", "name": "decky01", "address": "10.0.0.1",
"reachable": True, "detail": {"status": "ok"}},
]
})
result = runner.invoke(app, ["swarm", "check", "--json"])
assert result.exit_code == 0
# JSON mode emits structured output, not the rich table.
assert '"reachable"' in result.output
assert '"decky01"' in result.output
# ------------------------------------------------------------- swarm deckies
def test_swarm_deckies_empty(http_stub) -> None:
http_stub.script[("GET", "/swarm/deckies")] = _FakeResp([])
result = runner.invoke(app, ["swarm", "deckies"])
assert result.exit_code == 0, result.output
assert "No deckies" in result.output
def test_swarm_deckies_renders_table(http_stub) -> None:
http_stub.script[("GET", "/swarm/deckies")] = _FakeResp([
{"decky_name": "decky-01", "host_uuid": "u-1", "host_name": "w1",
"host_address": "10.0.0.1", "host_status": "active",
"services": ["ssh"], "state": "running", "last_error": None,
"compose_hash": None, "updated_at": "2026-04-18T00:00:00Z"},
{"decky_name": "decky-02", "host_uuid": "u-2", "host_name": "w2",
"host_address": "10.0.0.2", "host_status": "active",
"services": ["smb", "ssh"], "state": "failed", "last_error": "boom",
"compose_hash": None, "updated_at": "2026-04-18T00:00:00Z"},
])
result = runner.invoke(app, ["swarm", "deckies"])
assert result.exit_code == 0, result.output
assert "decky-01" in result.output
assert "decky-02" in result.output
assert "w1" in result.output and "w2" in result.output
assert "smb,ssh" in result.output
def test_swarm_deckies_json_output(http_stub) -> None:
http_stub.script[("GET", "/swarm/deckies")] = _FakeResp([
{"decky_name": "decky-01", "host_uuid": "u-1", "host_name": "w1",
"host_address": "10.0.0.1", "host_status": "active",
"services": ["ssh"], "state": "running", "last_error": None,
"compose_hash": None, "updated_at": "2026-04-18T00:00:00Z"},
])
result = runner.invoke(app, ["swarm", "deckies", "--json"])
assert result.exit_code == 0
assert '"decky_name"' in result.output
assert '"decky-01"' in result.output
def test_swarm_deckies_filter_by_host_name_looks_up_uuid(http_stub) -> None:
http_stub.script[("GET", "/swarm/hosts")] = _FakeResp([
{"uuid": "u-x", "name": "w1"},
])
http_stub.script[("GET", "/swarm/deckies?host_uuid=u-x")] = _FakeResp([])
result = runner.invoke(app, ["swarm", "deckies", "--host", "w1"])
assert result.exit_code == 0
assert http_stub[-1][1].endswith("/swarm/deckies?host_uuid=u-x")
def test_swarm_deckies_filter_by_state(http_stub) -> None:
http_stub.script[("GET", "/swarm/deckies?state=failed")] = _FakeResp([])
result = runner.invoke(app, ["swarm", "deckies", "--state", "failed"])
assert result.exit_code == 0
assert http_stub[-1][1].endswith("/swarm/deckies?state=failed")
# ------------------------------------------------------------- swarm decommission
def test_swarm_decommission_by_name_looks_up_uuid(http_stub) -> None:
http_stub.script[("GET", "/swarm/hosts")] = _FakeResp([
{"uuid": "u-x", "name": "decky02"},
])
http_stub.script[("DELETE", "/swarm/hosts/u-x")] = _FakeResp({}, status=204)
result = runner.invoke(app, ["swarm", "decommission", "--name", "decky02", "--yes"])
assert result.exit_code == 0, result.output
methods = [c[0] for c in http_stub]
assert methods == ["GET", "DELETE"]
def test_swarm_decommission_name_not_found(http_stub) -> None:
http_stub.script[("GET", "/swarm/hosts")] = _FakeResp([])
result = runner.invoke(app, ["swarm", "decommission", "--name", "ghost", "--yes"])
assert result.exit_code == 1
assert "No enrolled worker" in result.output
def test_swarm_decommission_requires_identifier() -> None:
result = runner.invoke(app, ["swarm", "decommission", "--yes"])
assert result.exit_code == 2
# ------------------------------------------------------------- deploy --mode swarm
def test_deploy_swarm_round_robins_and_posts(http_stub, monkeypatch: pytest.MonkeyPatch) -> None:
"""deploy --mode swarm fetches hosts, assigns host_uuid round-robin,
POSTs to /swarm/deploy with the sharded config."""
# Two enrolled workers, zero active.
http_stub.script[("GET", "/swarm/hosts?host_status=enrolled")] = _FakeResp([
{"uuid": "u-a", "name": "A", "address": "10.0.0.1", "agent_port": 8765,
"status": "enrolled"},
{"uuid": "u-b", "name": "B", "address": "10.0.0.2", "agent_port": 8765,
"status": "enrolled"},
])
http_stub.script[("GET", "/swarm/hosts?host_status=active")] = _FakeResp([])
http_stub.script[("POST", "/swarm/deploy")] = _FakeResp({
"results": [
{"host_uuid": "u-a", "host_name": "A", "ok": True, "detail": {"status": "ok"}},
{"host_uuid": "u-b", "host_name": "B", "ok": True, "detail": {"status": "ok"}},
],
})
# Stub network detection so we don't need root / real NICs.
monkeypatch.setattr(cli_deploy, "detect_interface", lambda: "eth0")
monkeypatch.setattr(cli_deploy, "detect_subnet", lambda _iface: ("10.0.0.0/24", "10.0.0.254"))
monkeypatch.setattr(cli_deploy, "get_host_ip", lambda _iface: "10.0.0.100")
result = runner.invoke(app, [
"deploy", "--mode", "swarm", "--deckies", "3",
"--services", "ssh", "--dry-run",
])
assert result.exit_code == 0, result.output
# Find the POST /swarm/deploy body and confirm round-robin sharding.
post = next(c for c in http_stub if c[0] == "POST" and c[1].endswith("/swarm/deploy"))
body = post[2]
uuids = [d["host_uuid"] for d in body["config"]["deckies"]]
assert uuids == ["u-a", "u-b", "u-a"]
assert body["dry_run"] is True
def test_deploy_swarm_fails_if_no_workers(http_stub, monkeypatch: pytest.MonkeyPatch) -> None:
http_stub.script[("GET", "/swarm/hosts?host_status=enrolled")] = _FakeResp([])
http_stub.script[("GET", "/swarm/hosts?host_status=active")] = _FakeResp([])
monkeypatch.setattr(cli_deploy, "detect_interface", lambda: "eth0")
monkeypatch.setattr(cli_deploy, "detect_subnet", lambda _iface: ("10.0.0.0/24", "10.0.0.254"))
monkeypatch.setattr(cli_deploy, "get_host_ip", lambda _iface: "10.0.0.100")
result = runner.invoke(app, [
"deploy", "--mode", "swarm", "--deckies", "2",
"--services", "ssh", "--dry-run",
])
assert result.exit_code == 1
assert "No enrolled workers" in result.output

View File

@@ -0,0 +1,192 @@
"""CLI `decnet swarm update` — target resolution, tarring, push aggregation.
The UpdaterClient is stubbed: we are testing the CLI's orchestration, not
the wire protocol (that has test_updater_app.py and UpdaterClient round-
trips live under test_swarm_api.py integration).
"""
from __future__ import annotations
import json
import pathlib
from typing import Any
import pytest
from typer.testing import CliRunner
from decnet import cli as cli_mod
from decnet.cli import app, utils as cli_utils
runner = CliRunner()
class _FakeResp:
def __init__(self, payload: Any, status: int = 200):
self._payload = payload
self.status_code = status
self.text = json.dumps(payload) if not isinstance(payload, str) else payload
self.content = self.text.encode()
def json(self) -> Any:
return self._payload
@pytest.fixture
def http_stub(monkeypatch: pytest.MonkeyPatch) -> dict:
state: dict = {"hosts": []}
def _fake(method, url, *, json_body=None, timeout=30.0):
if method == "GET" and url.endswith("/swarm/hosts"):
return _FakeResp(state["hosts"])
raise AssertionError(f"Unscripted HTTP call: {method} {url}")
monkeypatch.setattr(cli_utils, "_http_request", _fake)
return state
class _StubUpdaterClient:
"""Mirrors UpdaterClient's async-context-manager surface."""
instances: list["_StubUpdaterClient"] = []
behavior: dict[str, Any] = {}
def __init__(self, host, *, updater_port: int = 8766, **_: Any):
self.host = host
self.port = updater_port
self.calls: list[str] = []
_StubUpdaterClient.instances.append(self)
async def __aenter__(self) -> "_StubUpdaterClient":
return self
async def __aexit__(self, *exc: Any) -> None:
return None
async def update(self, tarball: bytes, sha: str = "") -> _FakeResp:
self.calls.append("update")
return _StubUpdaterClient.behavior.get(
self.host.get("name"),
_FakeResp({"status": "updated", "release": {"sha": sha}}, 200),
)
async def update_self(self, tarball: bytes, sha: str = "") -> _FakeResp:
self.calls.append("update_self")
return _FakeResp({"status": "self_update_queued"}, 200)
@pytest.fixture
def stub_updater(monkeypatch: pytest.MonkeyPatch):
_StubUpdaterClient.instances.clear()
_StubUpdaterClient.behavior.clear()
monkeypatch.setattr("decnet.swarm.updater_client.UpdaterClient", _StubUpdaterClient)
# Also patch the module-level import inside cli.py's swarm_update closure.
import decnet.cli # noqa: F401
return _StubUpdaterClient
def _mk_source_tree(tmp_path: pathlib.Path) -> pathlib.Path:
root = tmp_path / "src"
root.mkdir()
(root / "decnet").mkdir()
(root / "decnet" / "a.py").write_text("x = 1")
return root
# ------------------------------------------------------------- arg validation
def test_update_requires_host_or_all(http_stub) -> None:
r = runner.invoke(app, ["swarm", "update"])
assert r.exit_code == 2
def test_update_host_and_all_are_mutex(http_stub) -> None:
r = runner.invoke(app, ["swarm", "update", "--host", "w1", "--all"])
assert r.exit_code == 2
def test_update_unknown_host_exits_1(http_stub) -> None:
http_stub["hosts"] = [{"uuid": "u1", "name": "other", "address": "10.0.0.1", "status": "active"}]
r = runner.invoke(app, ["swarm", "update", "--host", "nope"])
assert r.exit_code == 1
assert "No enrolled worker" in r.output
# ---------------------------------------------------------------- happy paths
def test_update_single_host(http_stub, stub_updater, tmp_path: pathlib.Path) -> None:
http_stub["hosts"] = [
{"uuid": "u1", "name": "w1", "address": "10.0.0.1", "status": "active"},
{"uuid": "u2", "name": "w2", "address": "10.0.0.2", "status": "active"},
]
root = _mk_source_tree(tmp_path)
r = runner.invoke(app, ["swarm", "update", "--host", "w1", "--root", str(root)])
assert r.exit_code == 0, r.output
assert "w1" in r.output
# Only w1 got a client; w2 is untouched.
names = [c.host["name"] for c in stub_updater.instances]
assert names == ["w1"]
def test_update_all_skips_decommissioned(http_stub, stub_updater, tmp_path: pathlib.Path) -> None:
http_stub["hosts"] = [
{"uuid": "u1", "name": "w1", "address": "10.0.0.1", "status": "active"},
{"uuid": "u2", "name": "w2", "address": "10.0.0.2", "status": "decommissioned"},
{"uuid": "u3", "name": "w3", "address": "10.0.0.3", "status": "enrolled"},
]
root = _mk_source_tree(tmp_path)
r = runner.invoke(app, ["swarm", "update", "--all", "--root", str(root)])
assert r.exit_code == 0, r.output
hit = sorted(c.host["name"] for c in stub_updater.instances)
assert hit == ["w1", "w3"]
def test_update_include_self_calls_both(
http_stub, stub_updater, tmp_path: pathlib.Path,
) -> None:
http_stub["hosts"] = [{"uuid": "u1", "name": "w1", "address": "10.0.0.1", "status": "active"}]
root = _mk_source_tree(tmp_path)
r = runner.invoke(app, ["swarm", "update", "--all", "--root", str(root), "--include-self"])
assert r.exit_code == 0
assert stub_updater.instances[0].calls == ["update", "update_self"]
# ------------------------------------------------------------- failure modes
def test_update_rollback_status_409_flags_failure(
http_stub, stub_updater, tmp_path: pathlib.Path,
) -> None:
http_stub["hosts"] = [{"uuid": "u1", "name": "w1", "address": "10.0.0.1", "status": "active"}]
_StubUpdaterClient.behavior["w1"] = _FakeResp(
{"detail": {"error": "probe failed", "rolled_back": True}},
status=409,
)
root = _mk_source_tree(tmp_path)
r = runner.invoke(app, ["swarm", "update", "--all", "--root", str(root)])
assert r.exit_code == 1
assert "rolled-back" in r.output
def test_update_include_self_skipped_when_agent_update_failed(
http_stub, stub_updater, tmp_path: pathlib.Path,
) -> None:
http_stub["hosts"] = [{"uuid": "u1", "name": "w1", "address": "10.0.0.1", "status": "active"}]
_StubUpdaterClient.behavior["w1"] = _FakeResp(
{"detail": {"error": "pip failed"}}, status=500,
)
root = _mk_source_tree(tmp_path)
r = runner.invoke(app, ["swarm", "update", "--all", "--root", str(root), "--include-self"])
assert r.exit_code == 1
# update_self must NOT have been called — agent update failed.
assert stub_updater.instances[0].calls == ["update"]
# --------------------------------------------------------------------- dry run
def test_update_dry_run_does_not_call_updater(
http_stub, stub_updater, tmp_path: pathlib.Path,
) -> None:
http_stub["hosts"] = [{"uuid": "u1", "name": "w1", "address": "10.0.0.1", "status": "active"}]
root = _mk_source_tree(tmp_path)
r = runner.invoke(app, ["swarm", "update", "--all", "--root", str(root), "--dry-run"])
assert r.exit_code == 0
assert stub_updater.instances == []
assert "dry-run" in r.output.lower()

View File

@@ -0,0 +1,170 @@
"""End-to-end test: AgentClient talks to a live worker agent over mTLS.
Spins up uvicorn in-process on an ephemeral port with real cert files on
disk. Confirms:
1. The health endpoint works when the client presents a CA-signed cert.
2. An impostor client (cert signed by a different CA) is rejected at TLS
time.
"""
from __future__ import annotations
import asyncio
import pathlib
import socket
import threading
import time
import ssl
import httpx
import pytest
import uvicorn
from decnet.agent.app import app as agent_app
from decnet.swarm import client as swarm_client
from decnet.swarm import pki
def _free_port() -> int:
s = socket.socket()
s.bind(("127.0.0.1", 0))
port = s.getsockname()[1]
s.close()
return port
def _start_agent(
tmp_path: pathlib.Path, port: int
) -> tuple[uvicorn.Server, threading.Thread, swarm_client.MasterIdentity]:
"""Provision a CA, sign a worker cert + a master cert, start uvicorn."""
ca_dir = tmp_path / "ca"
pki.ensure_ca(ca_dir)
# Worker bundle
worker_dir = tmp_path / "agent"
pki.write_worker_bundle(
pki.issue_worker_cert(pki.load_ca(ca_dir), "worker-test", ["127.0.0.1"]),
worker_dir,
)
# Master identity (used by AgentClient as a client cert)
master_id = swarm_client.ensure_master_identity(ca_dir)
config = uvicorn.Config(
agent_app,
host="127.0.0.1",
port=port,
log_level="warning",
ssl_keyfile=str(worker_dir / "worker.key"),
ssl_certfile=str(worker_dir / "worker.crt"),
ssl_ca_certs=str(worker_dir / "ca.crt"),
# 2 == ssl.CERT_REQUIRED
ssl_cert_reqs=2,
)
server = uvicorn.Server(config)
def _run() -> None:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(server.serve())
loop.close()
thread = threading.Thread(target=_run, daemon=True)
thread.start()
# Wait for server to be listening
deadline = time.time() + 5
while time.time() < deadline:
if server.started:
return server, thread, master_id
time.sleep(0.05)
raise RuntimeError("agent did not start within 5s")
@pytest.mark.asyncio
async def test_client_health_roundtrip(tmp_path: pathlib.Path) -> None:
port = _free_port()
server, thread, master_id = _start_agent(tmp_path, port)
try:
async with swarm_client.AgentClient(
address="127.0.0.1", agent_port=port, identity=master_id
) as agent:
body = await agent.health()
assert body == {"status": "ok"}
snap = await agent.status()
assert "deployed" in snap
finally:
server.should_exit = True
thread.join(timeout=5)
@pytest.mark.asyncio
async def test_fingerprint_pin_accepts_matching_cert(tmp_path: pathlib.Path) -> None:
"""AgentClient with the correct expected fingerprint connects normally."""
port = _free_port()
server, thread, master_id = _start_agent(tmp_path, port)
try:
worker_cert_pem = (tmp_path / "agent" / "worker.crt").read_bytes()
expected = pki.fingerprint(worker_cert_pem)
host = {
"uuid": "h1",
"name": "worker-test",
"address": "127.0.0.1",
"agent_port": port,
"client_cert_fingerprint": expected,
}
async with swarm_client.AgentClient(host=host, identity=master_id) as agent:
assert await agent.health() == {"status": "ok"}
finally:
server.should_exit = True
thread.join(timeout=5)
@pytest.mark.asyncio
async def test_fingerprint_pin_rejects_mismatch(tmp_path: pathlib.Path) -> None:
"""A wrong expected fingerprint must raise FingerprintMismatchError."""
port = _free_port()
server, thread, master_id = _start_agent(tmp_path, port)
try:
host = {
"uuid": "h1",
"name": "worker-test",
"address": "127.0.0.1",
"agent_port": port,
"client_cert_fingerprint": "0" * 64,
}
with pytest.raises(swarm_client.FingerprintMismatchError):
async with swarm_client.AgentClient(host=host, identity=master_id):
pass
finally:
server.should_exit = True
thread.join(timeout=5)
@pytest.mark.asyncio
async def test_impostor_client_cannot_connect(tmp_path: pathlib.Path) -> None:
"""A client whose cert was issued by a DIFFERENT CA must be rejected."""
port = _free_port()
server, thread, _master_id = _start_agent(tmp_path, port)
try:
evil_ca = pki.generate_ca("Evil CA")
evil_dir = tmp_path / "evil"
pki.write_worker_bundle(
pki.issue_worker_cert(evil_ca, "evil-master", ["127.0.0.1"]), evil_dir
)
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
ctx.load_cert_chain(str(evil_dir / "worker.crt"), str(evil_dir / "worker.key"))
ctx.load_verify_locations(cafile=str(evil_dir / "ca.crt"))
ctx.verify_mode = ssl.CERT_REQUIRED
ctx.check_hostname = False
async with httpx.AsyncClient(
base_url=f"https://127.0.0.1:{port}", verify=ctx, timeout=5.0
) as ac:
with pytest.raises(
(httpx.ConnectError, httpx.ReadError, httpx.RemoteProtocolError)
):
await ac.get("/health")
finally:
server.should_exit = True
thread.join(timeout=5)

View File

@@ -0,0 +1,122 @@
"""AgentClient topology methods — unit tests with a mock httpx transport.
Avoids the full uvicorn+mTLS setup used by the roundtrip test; we just
need to prove the client emits the right verb/path/body and surfaces
HTTP errors the way the caller expects.
"""
from __future__ import annotations
import json
import httpx
import pytest
from decnet.swarm.client import AgentClient, MasterIdentity
class _StubIdentity:
"""Satisfies the MasterIdentity shape without requiring real files."""
def _client_with_transport(handler) -> AgentClient:
"""Build an AgentClient whose internal httpx client is backed by
:class:`httpx.MockTransport`. Bypasses _build_client so no real
cert IO happens."""
identity = MasterIdentity(
key_path="/nope/key", # type: ignore[arg-type]
cert_path="/nope/cert", # type: ignore[arg-type]
ca_cert_path="/nope/ca", # type: ignore[arg-type]
)
client = AgentClient(
address="127.0.0.1",
agent_port=8765,
identity=identity,
)
client._client = httpx.AsyncClient(
base_url="https://127.0.0.1:8765",
transport=httpx.MockTransport(handler),
)
return client
@pytest.mark.anyio
async def test_apply_topology_sends_body() -> None:
captured: dict = {}
def handler(request: httpx.Request) -> httpx.Response:
captured["url"] = str(request.url)
captured["body"] = json.loads(request.content)
return httpx.Response(
200, json={"status": "applied", "version_hash": "h"}
)
agent = _client_with_transport(handler)
try:
out = await agent.apply_topology({"topology": {"id": "t1"}}, "h")
finally:
await agent._client.aclose()
assert out == {"status": "applied", "version_hash": "h"}
assert captured["url"].endswith("/topology/apply")
assert captured["body"] == {
"hydrated": {"topology": {"id": "t1"}},
"version_hash": "h",
}
@pytest.mark.anyio
async def test_apply_topology_raises_on_409() -> None:
def handler(_req: httpx.Request) -> httpx.Response:
return httpx.Response(409, json={"detail": "already applied"})
agent = _client_with_transport(handler)
try:
with pytest.raises(httpx.HTTPStatusError) as ei:
await agent.apply_topology({"topology": {"id": "t2"}}, "h")
assert ei.value.response.status_code == 409
finally:
await agent._client.aclose()
@pytest.mark.anyio
async def test_teardown_topology_sends_body() -> None:
captured: dict = {}
def handler(request: httpx.Request) -> httpx.Response:
captured["body"] = json.loads(request.content)
captured["url"] = str(request.url)
return httpx.Response(200, json={"status": "torn_down", "topology_id": "t1"})
agent = _client_with_transport(handler)
try:
out = await agent.teardown_topology("t1")
finally:
await agent._client.aclose()
assert out["status"] == "torn_down"
assert captured["body"] == {"topology_id": "t1"}
assert captured["url"].endswith("/topology/teardown")
@pytest.mark.anyio
async def test_get_topology_state_returns_snapshot() -> None:
def handler(request: httpx.Request) -> httpx.Response:
assert request.method == "GET"
return httpx.Response(
200,
json={
"topology_id": "t1",
"applied_version_hash": "h",
"applied_at": 1,
"last_error": None,
"observed": {"bridges": [], "containers": []},
},
)
agent = _client_with_transport(handler)
try:
snap = await agent.get_topology_state()
finally:
await agent._client.aclose()
assert snap["topology_id"] == "t1"
assert snap["applied_version_hash"] == "h"

View File

@@ -0,0 +1,256 @@
"""Extra resilience tests for the syslog-over-TLS pipeline.
Covers failure modes the happy-path tests in test_log_forwarder.py don't
exercise:
* log rotation (st_size shrinks under the forwarder) resets offset to 0
and re-ships from the start;
* listener restart — forwarder reconnects and continues from the last
persisted offset, no duplicates;
* listener tolerates a client that connects with a valid cert and drops
mid-frame (IncompleteReadError path) without crashing the server task;
* peer_cn + fingerprint_from_ssl degrade gracefully on missing/invalid
peer certificates.
"""
from __future__ import annotations
import asyncio
import pathlib
import socket
import pytest
import ssl
from decnet.swarm import log_forwarder as fwd
from decnet.swarm import log_listener as lst
from decnet.swarm import pki
from decnet.swarm.client import ensure_master_identity
SAMPLE = (
'<13>1 2026-04-18T00:00:00Z decky01 svc 1 - '
'[decnet@53595 decky="decky01" service="ssh-service" '
'event_type="connect" attacker_ip="1.2.3.4" attacker_port="4242"] {msg}\n'
)
def _free_port() -> int:
s = socket.socket()
s.bind(("127.0.0.1", 0))
port = s.getsockname()[1]
s.close()
return port
@pytest.fixture
def _pki_env(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch):
ca_dir = tmp_path / "ca"
pki.ensure_ca(ca_dir)
ensure_master_identity(ca_dir)
worker_dir = tmp_path / "agent"
issued = pki.issue_worker_cert(pki.load_ca(ca_dir), "worker-y", ["127.0.0.1"])
pki.write_worker_bundle(issued, worker_dir)
monkeypatch.setattr(pki, "DEFAULT_CA_DIR", ca_dir)
monkeypatch.setattr(pki, "DEFAULT_AGENT_DIR", worker_dir)
return {"ca_dir": ca_dir, "worker_dir": worker_dir}
async def _wait_for(pred, timeout: float = 5.0, interval: float = 0.1) -> bool:
steps = max(1, int(timeout / interval))
for _ in range(steps):
if pred():
return True
await asyncio.sleep(interval)
return False
# ----------------------------------------------------------- pure helpers
def test_peer_cn_returns_unknown_when_no_ssl_object() -> None:
assert lst.peer_cn(None) == "unknown"
def test_fingerprint_from_ssl_handles_missing_peer_cert() -> None:
assert lst.fingerprint_from_ssl(None) is None
# ---------------------------------------------------- rotation / crash loops
@pytest.mark.asyncio
async def test_forwarder_reships_after_log_rotation(
tmp_path: pathlib.Path, _pki_env: dict
) -> None:
"""If the log file shrinks (logrotate truncation), the forwarder must
reset offset=0 and re-ship the new contents — never get stuck past EOF."""
port = _free_port()
worker_log = tmp_path / "decnet.log"
master_log = tmp_path / "master.log"
master_json = tmp_path / "master.json"
listener_cfg = lst.ListenerConfig(
log_path=master_log, json_path=master_json,
bind_host="127.0.0.1", bind_port=port, ca_dir=_pki_env["ca_dir"],
)
fwd_cfg = fwd.ForwarderConfig(
log_path=worker_log, master_host="127.0.0.1", master_port=port,
agent_dir=_pki_env["worker_dir"], state_db=tmp_path / "fwd.db",
)
stop = asyncio.Event()
lt = asyncio.create_task(lst.run_listener(listener_cfg, stop_event=stop))
await asyncio.sleep(0.2)
ft = asyncio.create_task(fwd.run_forwarder(fwd_cfg, poll_interval=0.05, stop_event=stop))
# Phase 1: write TWO pre-rotation lines so the offset is deep into the file.
worker_log.write_text(SAMPLE.format(msg="rotate-A") + SAMPLE.format(msg="rotate-B"))
ok = await _wait_for(lambda: master_log.exists() and b"rotate-B" in master_log.read_bytes())
assert ok, "pre-rotation lines never reached master"
size_before_rotate = master_log.stat().st_size
# Phase 2: rotate (truncate to a strictly SHORTER content) so the
# forwarder's offset tracker lands past EOF and must reset to 0.
worker_log.write_text(SAMPLE.format(msg="P"))
ok = await _wait_for(
lambda: master_log.stat().st_size > size_before_rotate
and master_log.read_text().rstrip().endswith("P"),
timeout=5.0,
)
assert ok, "forwarder got stuck past EOF after rotation (expected reset → ship post-rotate 'P' line)"
stop.set()
for t in (ft, lt):
try:
await asyncio.wait_for(t, timeout=5)
except asyncio.TimeoutError:
t.cancel()
@pytest.mark.asyncio
async def test_forwarder_resumes_after_listener_restart(
tmp_path: pathlib.Path, _pki_env: dict
) -> None:
"""Listener goes down mid-session, forwarder retries with backoff; on
restart, we must NOT re-ship lines that were already drained."""
port = _free_port()
worker_log = tmp_path / "decnet.log"
master_log = tmp_path / "master.log"
master_json = tmp_path / "master.json"
state_db = tmp_path / "fwd.db"
listener_cfg = lst.ListenerConfig(
log_path=master_log, json_path=master_json,
bind_host="127.0.0.1", bind_port=port, ca_dir=_pki_env["ca_dir"],
)
fwd_cfg = fwd.ForwarderConfig(
log_path=worker_log, master_host="127.0.0.1", master_port=port,
agent_dir=_pki_env["worker_dir"], state_db=state_db,
)
# --- phase 1 ----------------------------------------------------------
stop1 = asyncio.Event()
lt1 = asyncio.create_task(lst.run_listener(listener_cfg, stop_event=stop1))
await asyncio.sleep(0.2)
stop_fwd = asyncio.Event()
ft = asyncio.create_task(fwd.run_forwarder(fwd_cfg, poll_interval=0.05, stop_event=stop_fwd))
worker_log.write_text(SAMPLE.format(msg="before-outage"))
ok = await _wait_for(lambda: master_log.exists() and b"before-outage" in master_log.read_bytes())
assert ok, "phase-1 line never reached master"
# --- outage -----------------------------------------------------------
stop1.set()
try:
await asyncio.wait_for(lt1, timeout=5)
except asyncio.TimeoutError:
lt1.cancel()
# While listener is down, append another line. Forwarder will retry.
with open(worker_log, "a", encoding="utf-8") as f:
f.write(SAMPLE.format(msg="during-outage"))
await asyncio.sleep(0.3)
# --- phase 2: listener back ------------------------------------------
stop2 = asyncio.Event()
lt2 = asyncio.create_task(lst.run_listener(listener_cfg, stop_event=stop2))
ok = await _wait_for(lambda: b"during-outage" in master_log.read_bytes(), timeout=15.0)
assert ok, "forwarder never reshipped the buffered line after listener restart"
# Crucially, "before-outage" appears exactly once — not re-shipped.
body = master_log.read_text()
assert body.count("before-outage") == 1, "forwarder duplicated a line across reconnect"
assert body.count("during-outage") == 1
# --- shutdown ---------------------------------------------------------
stop_fwd.set()
stop2.set()
for t in (ft, lt2):
try:
await asyncio.wait_for(t, timeout=5)
except asyncio.TimeoutError:
t.cancel()
@pytest.mark.asyncio
async def test_listener_tolerates_client_dropping_mid_stream(
tmp_path: pathlib.Path, _pki_env: dict
) -> None:
"""A well-authenticated client that sends a partial frame and drops must
not take the listener down or wedge subsequent connections."""
port = _free_port()
master_log = tmp_path / "master.log"
master_json = tmp_path / "master.json"
listener_cfg = lst.ListenerConfig(
log_path=master_log, json_path=master_json,
bind_host="127.0.0.1", bind_port=port, ca_dir=_pki_env["ca_dir"],
)
stop = asyncio.Event()
listener_task = asyncio.create_task(lst.run_listener(listener_cfg, stop_event=stop))
await asyncio.sleep(0.2)
try:
# Client 1: send a truncated octet-count prefix ("99 ") but no payload
# before closing — exercises IncompleteReadError in read_frame.
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
ctx.load_cert_chain(
str(_pki_env["worker_dir"] / "worker.crt"),
str(_pki_env["worker_dir"] / "worker.key"),
)
ctx.load_verify_locations(cafile=str(_pki_env["worker_dir"] / "ca.crt"))
ctx.verify_mode = ssl.CERT_REQUIRED
ctx.check_hostname = False
r, w = await asyncio.open_connection("127.0.0.1", port, ssl=ctx)
w.write(b"99 ") # promise 99 bytes, send 0
await w.drain()
w.close()
try:
await w.wait_closed()
except Exception: # nosec B110
pass
# Client 2: reconnect cleanly and actually ship a frame. If the
# listener survived client-1's misbehavior, this must succeed.
r2, w2 = await asyncio.open_connection("127.0.0.1", port, ssl=ctx)
payload = b'<13>1 2026-04-18T00:00:00Z decky01 svc - - - post-drop'
w2.write(f"{len(payload)} ".encode() + payload)
await w2.drain()
w2.close()
try:
await w2.wait_closed()
except Exception: # nosec B110
pass
ok = await _wait_for(
lambda: master_log.exists() and b"post-drop" in master_log.read_bytes()
)
assert ok, "listener got wedged by a mid-frame client drop"
finally:
stop.set()
try:
await asyncio.wait_for(listener_task, timeout=5)
except asyncio.TimeoutError:
listener_task.cancel()

View File

@@ -0,0 +1,300 @@
"""Tests for POST /swarm/heartbeat — cert pinning + shard snapshot refresh."""
from __future__ import annotations
import asyncio
import hashlib
import pathlib
from typing import Any
from unittest.mock import MagicMock
import pytest
from fastapi.testclient import TestClient
from decnet.web.db.factory import get_repository
from decnet.web.dependencies import get_repo
from decnet.web.router.swarm import api_heartbeat as hb_mod
# ------------------------- shared fixtures (mirror test_swarm_api.py) ---
@pytest.fixture
def ca_dir(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
ca = tmp_path / "ca"
from decnet.swarm import pki
from decnet.swarm import client as swarm_client
from decnet.web.router.swarm import api_enroll_host as enroll_mod
monkeypatch.setattr(pki, "DEFAULT_CA_DIR", ca)
monkeypatch.setattr(swarm_client, "pki", pki)
monkeypatch.setattr(enroll_mod, "pki", pki)
return ca
@pytest.fixture
def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch):
r = get_repository(db_path=str(tmp_path / "hb.db"))
import decnet.web.dependencies as deps
import decnet.web.swarm_api as swarm_api_mod
monkeypatch.setattr(deps, "repo", r)
monkeypatch.setattr(swarm_api_mod, "repo", r)
return r
@pytest.fixture
def client(repo, ca_dir: pathlib.Path):
from decnet.web.swarm_api import app
async def _override() -> Any:
return repo
app.dependency_overrides[get_repo] = _override
with TestClient(app) as c:
yield c
app.dependency_overrides.clear()
def _enroll(client: TestClient, name: str, address: str = "10.0.0.5") -> dict:
resp = client.post(
"/swarm/enroll",
json={"name": name, "address": address, "agent_port": 8765},
)
assert resp.status_code == 201, resp.text
return resp.json()
def _pin_fingerprint(monkeypatch: pytest.MonkeyPatch, fp: str | None) -> None:
"""Force ``_extract_peer_fingerprint`` to return ``fp`` inside the
endpoint module so we don't need a live TLS peer."""
monkeypatch.setattr(hb_mod, "_extract_peer_fingerprint", lambda scope: fp)
def _status_body(deckies: list[dict], runtime: dict[str, dict]) -> dict:
return {
"deployed": True,
"mode": "swarm",
"compose_path": "/run/decnet/compose.yml",
"deckies": deckies,
"runtime": runtime,
}
def _decky_payload(name: str = "decky-01", ip: str = "10.0.0.50") -> dict:
return {
"name": name,
"hostname": f"{name}.lan",
"distro": "debian-bookworm",
"ip": ip,
"services": ["ssh"],
"base_image": "debian:bookworm-slim",
"service_config": {"ssh": {"port": 22}},
"mutate_interval": 3600,
"last_mutated": 0.0,
"archetype": "generic",
"host_uuid": None,
}
# ------------------------- _extract_peer_fingerprint unit tests ---------
def test_extract_primary_path_returns_fingerprint() -> None:
der = b"\x30\x82test-cert-bytes"
scope = {"extensions": {"tls": {"client_cert_chain": [der]}}}
assert hb_mod._extract_peer_fingerprint(scope) == hashlib.sha256(der).hexdigest()
def test_extract_fallback_path_when_primary_absent() -> None:
der = b"\x30\x82fallback-bytes"
ssl_obj = MagicMock()
ssl_obj.getpeercert.return_value = der
transport = MagicMock()
transport.get_extra_info.return_value = ssl_obj
scope = {"transport": transport}
fp = hb_mod._extract_peer_fingerprint(scope)
assert fp == hashlib.sha256(der).hexdigest()
transport.get_extra_info.assert_called_with("ssl_object")
ssl_obj.getpeercert.assert_called_with(binary_form=True)
def test_extract_returns_none_when_both_paths_empty() -> None:
# No extensions, no transport → fail-closed signal for the endpoint.
assert hb_mod._extract_peer_fingerprint({}) is None
def test_extract_returns_none_when_transport_ssl_object_missing() -> None:
transport = MagicMock()
transport.get_extra_info.return_value = None
scope = {"transport": transport}
assert hb_mod._extract_peer_fingerprint(scope) is None
# ------------------------- endpoint behaviour --------------------------
def test_heartbeat_happy_path_primary_extraction(
client: TestClient, repo, monkeypatch: pytest.MonkeyPatch
) -> None:
host = _enroll(client, "worker-a")
_pin_fingerprint(monkeypatch, host["fingerprint"])
body = {
"host_uuid": host["host_uuid"],
"agent_version": "1.2.3",
"status": _status_body(
[_decky_payload("decky-01")],
{"decky-01": {"running": True}},
),
}
resp = client.post("/swarm/heartbeat", json=body)
assert resp.status_code == 204, resp.text
async def _verify() -> None:
row = await repo.get_swarm_host_by_uuid(host["host_uuid"])
assert row["last_heartbeat"] is not None
assert row["status"] == "active"
shards = await repo.list_decky_shards(host["host_uuid"])
assert len(shards) == 1
s = shards[0]
assert s["decky_name"] == "decky-01"
assert s["decky_ip"] == "10.0.0.50"
assert s["state"] == "running"
assert s["last_seen"] is not None
# snapshot flattening from list_decky_shards
assert s["hostname"] == "decky-01.lan"
assert s["archetype"] == "generic"
assert s["service_config"] == {"ssh": {"port": 22}}
asyncio.run(_verify())
def test_heartbeat_fallback_extraction_path_also_accepted(
client: TestClient, repo, monkeypatch: pytest.MonkeyPatch
) -> None:
# Same endpoint behaviour regardless of which scope path supplied
# the fingerprint — this guards against uvicorn-version drift where
# only the fallback slot is populated.
host = _enroll(client, "worker-b", "10.0.0.6")
_pin_fingerprint(monkeypatch, host["fingerprint"])
resp = client.post(
"/swarm/heartbeat",
json={
"host_uuid": host["host_uuid"],
"status": {"deployed": False, "deckies": []},
},
)
assert resp.status_code == 204
def test_heartbeat_unknown_host_returns_404(
client: TestClient, monkeypatch: pytest.MonkeyPatch
) -> None:
_pin_fingerprint(monkeypatch, "a" * 64)
resp = client.post(
"/swarm/heartbeat",
json={"host_uuid": "does-not-exist", "status": {"deployed": False}},
)
assert resp.status_code == 404
def test_heartbeat_fingerprint_mismatch_returns_403(
client: TestClient, monkeypatch: pytest.MonkeyPatch
) -> None:
host = _enroll(client, "worker-c", "10.0.0.7")
_pin_fingerprint(monkeypatch, "b" * 64) # not the host's fingerprint
resp = client.post(
"/swarm/heartbeat",
json={"host_uuid": host["host_uuid"], "status": {"deployed": False}},
)
assert resp.status_code == 403
assert "mismatch" in resp.json()["detail"]
def test_heartbeat_no_peer_cert_fails_closed(
client: TestClient, monkeypatch: pytest.MonkeyPatch
) -> None:
# Both extraction paths unavailable → 403, never 200. Fail-closed.
host = _enroll(client, "worker-d", "10.0.0.8")
_pin_fingerprint(monkeypatch, None)
resp = client.post(
"/swarm/heartbeat",
json={"host_uuid": host["host_uuid"], "status": {"deployed": False}},
)
assert resp.status_code == 403
assert "unavailable" in resp.json()["detail"]
def test_heartbeat_decommissioned_host_returns_404(
client: TestClient, repo, monkeypatch: pytest.MonkeyPatch
) -> None:
# Enrol, capture the fingerprint, delete the host, then replay the
# heartbeat. Even though the cert is still CA-signed, the decommission
# revoked the host-row so lookup returns None → 404. Prevents ghost
# shards from a decommissioned worker.
host = _enroll(client, "worker-e", "10.0.0.9")
fp = host["fingerprint"]
async def _delete() -> None:
ok = await repo.delete_swarm_host(host["host_uuid"])
assert ok
asyncio.run(_delete())
_pin_fingerprint(monkeypatch, fp)
resp = client.post(
"/swarm/heartbeat",
json={"host_uuid": host["host_uuid"], "status": {"deployed": False}},
)
assert resp.status_code == 404
def test_heartbeat_deployed_false_bumps_host_but_writes_no_shards(
client: TestClient, repo, monkeypatch: pytest.MonkeyPatch
) -> None:
host = _enroll(client, "worker-f", "10.0.0.10")
_pin_fingerprint(monkeypatch, host["fingerprint"])
resp = client.post(
"/swarm/heartbeat",
json={
"host_uuid": host["host_uuid"],
"status": {"deployed": False, "deckies": []},
},
)
assert resp.status_code == 204
async def _verify() -> None:
row = await repo.get_swarm_host_by_uuid(host["host_uuid"])
assert row["last_heartbeat"] is not None
shards = await repo.list_decky_shards(host["host_uuid"])
assert shards == []
asyncio.run(_verify())
def test_heartbeat_decky_missing_from_runtime_is_degraded(
client: TestClient, repo, monkeypatch: pytest.MonkeyPatch
) -> None:
host = _enroll(client, "worker-g", "10.0.0.11")
_pin_fingerprint(monkeypatch, host["fingerprint"])
body = {
"host_uuid": host["host_uuid"],
"status": _status_body(
[_decky_payload("decky-01"), _decky_payload("decky-02", "10.0.0.51")],
{"decky-01": {"running": True}}, # decky-02 absent
),
}
resp = client.post("/swarm/heartbeat", json=body)
assert resp.status_code == 204
async def _verify() -> None:
shards = await repo.list_decky_shards(host["host_uuid"])
by = {s["decky_name"]: s for s in shards}
assert by["decky-01"]["state"] == "running"
assert by["decky-02"]["state"] == "degraded"
asyncio.run(_verify())

View File

@@ -0,0 +1,224 @@
"""Heartbeat-driven topology resync: master flags divergent agents.
When an agent reports an applied_version_hash that differs from what
master computed for the topology pinned to that host (or reports no
topology at all while master expects one), the heartbeat handler must
set ``needs_resync=True`` on the topology row. The mutator reconcile
loop picks it up later — tested separately.
"""
from __future__ import annotations
import pathlib
from typing import Any
import pytest
from fastapi.testclient import TestClient
from decnet.topology.config import TopologyConfig
from decnet.topology.generator import generate
from decnet.topology.hashing import canonical_hash
from decnet.topology.persistence import hydrate, persist, transition_status
from decnet.topology.status import TopologyStatus
from decnet.web.db.factory import get_repository
from decnet.web.dependencies import get_repo
from decnet.web.router.swarm import api_heartbeat as hb_mod
@pytest.fixture
def ca_dir(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
ca = tmp_path / "ca"
from decnet.swarm import pki
from decnet.swarm import client as swarm_client
from decnet.web.router.swarm import api_enroll_host as enroll_mod
monkeypatch.setattr(pki, "DEFAULT_CA_DIR", ca)
monkeypatch.setattr(swarm_client, "pki", pki)
monkeypatch.setattr(enroll_mod, "pki", pki)
return ca
@pytest.fixture
def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch):
r = get_repository(db_path=str(tmp_path / "hb-resync.db"))
import decnet.web.dependencies as deps
import decnet.web.swarm_api as swarm_api_mod
monkeypatch.setattr(deps, "repo", r)
monkeypatch.setattr(swarm_api_mod, "repo", r)
return r
@pytest.fixture
def client(repo, ca_dir):
from decnet.web.swarm_api import app
async def _override() -> Any:
return repo
app.dependency_overrides[get_repo] = _override
with TestClient(app) as c:
yield c
app.dependency_overrides.clear()
def _enroll(c: TestClient, name: str) -> dict:
r = c.post("/swarm/enroll", json={"name": name, "address": "10.0.0.5", "agent_port": 8765})
assert r.status_code == 201, r.text
return r.json()
def _cfg(**kw) -> TopologyConfig:
base = dict(
name="hb-resync",
mode="agent",
depth=1,
branching_factor=1,
deckies_per_lan_min=1,
deckies_per_lan_max=1,
cross_edge_probability=0.0,
randomize_services=False,
services_explicit=["ssh"],
seed=3,
)
base.update(kw)
return TopologyConfig(**base)
async def _persist_active(repo, host_uuid: str) -> tuple[str, str]:
plan = generate(_cfg())
tid = await persist(repo, plan, target_host_uuid=host_uuid)
await transition_status(repo, tid, TopologyStatus.DEPLOYING)
await transition_status(repo, tid, TopologyStatus.ACTIVE)
hydrated = await hydrate(repo, tid)
return tid, canonical_hash(hydrated)
@pytest.mark.anyio
async def test_heartbeat_matching_hash_does_not_flag(
client: TestClient, repo, monkeypatch: pytest.MonkeyPatch
) -> None:
host = _enroll(client, "worker-match")
monkeypatch.setattr(hb_mod, "_extract_peer_fingerprint", lambda s: host["fingerprint"])
tid, expected = await _persist_active(repo, host["host_uuid"])
resp = client.post(
"/swarm/heartbeat",
json={
"host_uuid": host["host_uuid"],
"status": {"deployed": False},
"topology": {
"topology_id": tid,
"applied_version_hash": expected,
"observed": {"bridges": [], "containers": []},
},
},
)
assert resp.status_code == 204, resp.text
row = await repo.get_topology(tid)
assert row["needs_resync"] is False
@pytest.mark.anyio
async def test_heartbeat_hash_mismatch_flags_resync(
client: TestClient, repo, monkeypatch: pytest.MonkeyPatch
) -> None:
host = _enroll(client, "worker-drift")
monkeypatch.setattr(hb_mod, "_extract_peer_fingerprint", lambda s: host["fingerprint"])
tid, _ = await _persist_active(repo, host["host_uuid"])
resp = client.post(
"/swarm/heartbeat",
json={
"host_uuid": host["host_uuid"],
"status": {"deployed": False},
"topology": {
"topology_id": tid,
"applied_version_hash": "stale-hash-" + "0" * 40,
"observed": {"bridges": [], "containers": []},
},
},
)
assert resp.status_code == 204, resp.text
row = await repo.get_topology(tid)
assert row["needs_resync"] is True
@pytest.mark.anyio
async def test_heartbeat_agent_reports_no_topology_flags_resync(
client: TestClient, repo, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Fresh-boot / wiped-cache case: agent says `null` but master expects
an ACTIVE topology pinned here → flag for re-push."""
host = _enroll(client, "worker-fresh")
monkeypatch.setattr(hb_mod, "_extract_peer_fingerprint", lambda s: host["fingerprint"])
tid, _ = await _persist_active(repo, host["host_uuid"])
resp = client.post(
"/swarm/heartbeat",
json={
"host_uuid": host["host_uuid"],
"status": {"deployed": False},
"topology": {
"topology_id": None,
"applied_version_hash": None,
"observed": {"bridges": [], "containers": []},
},
},
)
assert resp.status_code == 204, resp.text
row = await repo.get_topology(tid)
assert row["needs_resync"] is True
@pytest.mark.anyio
async def test_heartbeat_without_topology_block_is_noop_for_resync(
client: TestClient, repo, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Legacy agents that don't send a topology block are still valid;
they just don't contribute to resync detection. But we still should
treat the absence as 'no topology reported' for a pinned ACTIVE
topology → flag."""
host = _enroll(client, "worker-legacy")
monkeypatch.setattr(hb_mod, "_extract_peer_fingerprint", lambda s: host["fingerprint"])
tid, _ = await _persist_active(repo, host["host_uuid"])
resp = client.post(
"/swarm/heartbeat",
json={"host_uuid": host["host_uuid"], "status": {"deployed": False}},
)
assert resp.status_code == 204, resp.text
row = await repo.get_topology(tid)
# Absence of the topology block means agent hasn't reported anything
# → treat like no topology reported → flag.
assert row["needs_resync"] is True
@pytest.mark.anyio
async def test_heartbeat_other_host_topology_unaffected(
client: TestClient, repo, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Reports from one host must not flip resync flags on another
host's topologies."""
host_a = _enroll(client, "worker-a")
host_b = client.post(
"/swarm/enroll",
json={"name": "worker-b", "address": "10.0.0.6", "agent_port": 8765},
).json()
monkeypatch.setattr(hb_mod, "_extract_peer_fingerprint", lambda s: host_b["fingerprint"])
tid_a, hash_a = await _persist_active(repo, host_a["host_uuid"])
resp = client.post(
"/swarm/heartbeat",
json={
"host_uuid": host_b["host_uuid"],
"status": {"deployed": False},
"topology": {
"topology_id": None,
"applied_version_hash": None,
"observed": {},
},
},
)
assert resp.status_code == 204, resp.text
row = await repo.get_topology(tid_a)
assert row["needs_resync"] is False

View File

@@ -0,0 +1,282 @@
"""Tests for the syslog-over-TLS pipeline.
Covers:
* octet-counted framing encode/decode (pure functions);
* offset persistence across reopens;
* end-to-end mTLS roundtrip forwarder → listener;
* impostor-CA worker is rejected at TLS handshake.
"""
from __future__ import annotations
import asyncio
import pathlib
import socket
import pytest
import ssl
from decnet.swarm import log_forwarder as fwd
from decnet.swarm import log_listener as lst
from decnet.swarm import pki
from decnet.swarm.client import ensure_master_identity
def _free_port() -> int:
s = socket.socket()
s.bind(("127.0.0.1", 0))
port = s.getsockname()[1]
s.close()
return port
# ------------------------------------------------------------ pure framing
def test_encode_frame_matches_rfc5425_shape() -> None:
out = fwd.encode_frame("<13>1 2026-04-18T00:00:00Z decky01 svc - - - hi")
# "<len> <msg>" — ASCII digits, space, then the UTF-8 payload.
assert out.startswith(b"47 ")
assert out.endswith(b"hi")
assert int(out.split(b" ", 1)[0]) == len(out.split(b" ", 1)[1])
@pytest.mark.asyncio
async def test_read_frame_roundtrip() -> None:
payload = b"<13>1 2026-04-18T00:00:00Z host app - - - msg"
frame = fwd.encode_frame(payload.decode())
reader = asyncio.StreamReader()
reader.feed_data(frame)
reader.feed_eof()
got = await fwd.read_frame(reader)
assert got == payload
@pytest.mark.asyncio
async def test_read_frame_rejects_bad_prefix() -> None:
reader = asyncio.StreamReader()
reader.feed_data(b"NOTANUMBER msg")
reader.feed_eof()
with pytest.raises(ValueError):
await fwd.read_frame(reader)
# ------------------------------------------------------------- offset store
def test_offset_store_persists_across_reopen(tmp_path: pathlib.Path) -> None:
db = tmp_path / "fwd.db"
s1 = fwd._OffsetStore(db)
assert s1.get() == 0
s1.set(4242)
s1.close()
s2 = fwd._OffsetStore(db)
assert s2.get() == 4242
s2.close()
# ------------------------------------------------------------ TLS roundtrip
@pytest.fixture
def _pki_env(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch):
ca_dir = tmp_path / "ca"
pki.ensure_ca(ca_dir)
# Master identity (also used as listener server cert).
master_id = ensure_master_identity(ca_dir)
# Give master's cert a 127.0.0.1 SAN so workers can resolve it if they
# happen to enable check_hostname; we don't, but future-proof anyway.
# (The default ensure_master_identity() cert already has 127.0.0.1.)
_ = master_id
# Worker bundle — enrolled with 127.0.0.1 SAN.
worker_dir = tmp_path / "agent"
issued = pki.issue_worker_cert(pki.load_ca(ca_dir), "worker-x", ["127.0.0.1"])
pki.write_worker_bundle(issued, worker_dir)
monkeypatch.setattr(pki, "DEFAULT_CA_DIR", ca_dir)
monkeypatch.setattr(pki, "DEFAULT_AGENT_DIR", worker_dir)
return {"ca_dir": ca_dir, "worker_dir": worker_dir}
@pytest.mark.asyncio
async def test_forwarder_to_listener_roundtrip(
tmp_path: pathlib.Path, _pki_env: dict
) -> None:
port = _free_port()
worker_log = tmp_path / "decnet.log"
worker_log.write_text("") # create empty
master_log = tmp_path / "master.log"
master_json = tmp_path / "master.json"
listener_cfg = lst.ListenerConfig(
log_path=master_log,
json_path=master_json,
bind_host="127.0.0.1",
bind_port=port,
ca_dir=_pki_env["ca_dir"],
)
fwd_cfg = fwd.ForwarderConfig(
log_path=worker_log,
master_host="127.0.0.1",
master_port=port,
agent_dir=_pki_env["worker_dir"],
state_db=tmp_path / "fwd.db",
)
stop = asyncio.Event()
listener_task = asyncio.create_task(lst.run_listener(listener_cfg, stop_event=stop))
await asyncio.sleep(0.2) # wait for bind
forwarder_task = asyncio.create_task(
fwd.run_forwarder(fwd_cfg, poll_interval=0.05, stop_event=stop)
)
# Write a few RFC 5424-ish lines into the worker log.
sample = (
'<13>1 2026-04-18T00:00:00Z decky01 ssh-service 1 - '
'[decnet@53595 decky="decky01" service="ssh-service" event_type="connect" '
'attacker_ip="1.2.3.4" attacker_port="4242"] ssh connect\n'
)
with open(worker_log, "a", encoding="utf-8") as f:
for _ in range(3):
f.write(sample)
# Poll for delivery on the master side.
for _ in range(50):
if master_log.exists() and master_log.stat().st_size > 0:
break
await asyncio.sleep(0.1)
stop.set()
for t in (forwarder_task, listener_task):
try:
await asyncio.wait_for(t, timeout=5)
except asyncio.TimeoutError:
t.cancel()
assert master_log.exists()
body = master_log.read_text()
assert body.count("ssh connect") == 3
# Worker provenance tagged in the JSON sink.
assert master_json.exists()
assert "worker-x" in master_json.read_text()
@pytest.mark.asyncio
async def test_forwarder_resumes_from_persisted_offset(
tmp_path: pathlib.Path, _pki_env: dict
) -> None:
"""Simulate a listener outage: forwarder persists offset locally and,
after the listener comes back, only ships lines added AFTER the crash."""
port = _free_port()
worker_log = tmp_path / "decnet.log"
master_log = tmp_path / "master.log"
master_json = tmp_path / "master.json"
state_db = tmp_path / "fwd.db"
# Pre-populate 2 lines and the offset store as if a previous forwarder run
# had already delivered them. The new run must NOT re-ship them.
line = (
'<13>1 2026-04-18T00:00:00Z decky01 svc 1 - [x] old\n'
)
worker_log.write_text(line * 2)
seed = fwd._OffsetStore(state_db)
seed.set(len(line) * 2)
seed.close()
listener_cfg = lst.ListenerConfig(
log_path=master_log, json_path=master_json,
bind_host="127.0.0.1", bind_port=port, ca_dir=_pki_env["ca_dir"],
)
fwd_cfg = fwd.ForwarderConfig(
log_path=worker_log, master_host="127.0.0.1", master_port=port,
agent_dir=_pki_env["worker_dir"], state_db=state_db,
)
stop = asyncio.Event()
lt = asyncio.create_task(lst.run_listener(listener_cfg, stop_event=stop))
await asyncio.sleep(0.2)
ft = asyncio.create_task(fwd.run_forwarder(fwd_cfg, poll_interval=0.05, stop_event=stop))
# Append a NEW line after startup — only this should reach the master.
new_line = (
'<13>1 2026-04-18T00:00:01Z decky01 svc 1 - [x] fresh\n'
)
with open(worker_log, "a", encoding="utf-8") as f:
f.write(new_line)
for _ in range(50):
if master_log.exists() and b"fresh" in master_log.read_bytes():
break
await asyncio.sleep(0.1)
stop.set()
for t in (ft, lt):
try:
await asyncio.wait_for(t, timeout=5)
except asyncio.TimeoutError:
t.cancel()
body = master_log.read_text()
assert "fresh" in body
assert "old" not in body, "forwarder re-shipped lines already acked before restart"
@pytest.mark.asyncio
async def test_impostor_worker_rejected_at_tls(
tmp_path: pathlib.Path, _pki_env: dict
) -> None:
port = _free_port()
master_log = tmp_path / "master.log"
master_json = tmp_path / "master.json"
listener_cfg = lst.ListenerConfig(
log_path=master_log,
json_path=master_json,
bind_host="127.0.0.1",
bind_port=port,
ca_dir=_pki_env["ca_dir"],
)
stop = asyncio.Event()
listener_task = asyncio.create_task(lst.run_listener(listener_cfg, stop_event=stop))
await asyncio.sleep(0.2)
try:
# Build a forwarder SSL context from a DIFFERENT CA — should be rejected.
evil_ca = pki.generate_ca("Evil CA")
evil_dir = tmp_path / "evil"
pki.write_worker_bundle(
pki.issue_worker_cert(evil_ca, "evil-worker", ["127.0.0.1"]), evil_dir
)
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
ctx.load_cert_chain(str(evil_dir / "worker.crt"), str(evil_dir / "worker.key"))
ctx.load_verify_locations(cafile=str(evil_dir / "ca.crt"))
ctx.verify_mode = ssl.CERT_REQUIRED
ctx.check_hostname = False
rejected = False
try:
r, w = await asyncio.open_connection("127.0.0.1", port, ssl=ctx)
# If TLS somehow succeeded, push a byte and expect the server to drop.
w.write(b"5 hello")
await w.drain()
# If the server accepted this from an unknown CA, that's a failure.
await asyncio.sleep(0.2)
w.close()
try:
await w.wait_closed()
except Exception:
pass
except (ssl.SSLError, OSError, ConnectionError):
rejected = True
assert rejected or master_log.stat().st_size == 0, (
"impostor connection must be rejected or produce no log lines"
)
finally:
stop.set()
try:
await asyncio.wait_for(listener_task, timeout=5)
except asyncio.TimeoutError:
listener_task.cancel()

213
tests/swarm/test_pki.py Normal file
View File

@@ -0,0 +1,213 @@
"""PKI roundtrip tests for the DECNET swarm CA."""
from __future__ import annotations
import pathlib
import ssl
import threading
import socket
import time
import pytest
from cryptography import x509
from decnet.swarm import pki
def test_ensure_ca_is_idempotent(tmp_path: pathlib.Path) -> None:
ca_dir = tmp_path / "ca"
first = pki.ensure_ca(ca_dir)
second = pki.ensure_ca(ca_dir)
assert first.key_pem == second.key_pem
assert first.cert_pem == second.cert_pem
def test_issue_worker_cert_signed_by_ca(tmp_path: pathlib.Path) -> None:
ca = pki.ensure_ca(tmp_path / "ca")
issued = pki.issue_worker_cert(ca, "worker-01", ["127.0.0.1", "worker-01"])
cert = x509.load_pem_x509_certificate(issued.cert_pem)
ca_cert = x509.load_pem_x509_certificate(ca.cert_pem)
assert cert.issuer == ca_cert.subject
# SAN should include both the hostname AND the IP we supplied
san = cert.extensions.get_extension_for_class(x509.SubjectAlternativeName).value
dns_names = set(san.get_values_for_type(x509.DNSName))
ip_values = {str(v) for v in san.get_values_for_type(x509.IPAddress)}
assert "worker-01" in dns_names
assert "127.0.0.1" in ip_values
def test_worker_bundle_roundtrip(tmp_path: pathlib.Path) -> None:
ca = pki.ensure_ca(tmp_path / "ca")
issued = pki.issue_worker_cert(ca, "worker-02", ["127.0.0.1"])
agent_dir = tmp_path / "agent"
pki.write_worker_bundle(issued, agent_dir)
# File perms: worker.key must not be world-readable.
mode = (agent_dir / "worker.key").stat().st_mode & 0o777
assert mode == 0o600
loaded = pki.load_worker_bundle(agent_dir)
assert loaded is not None
assert loaded.fingerprint_sha256 == issued.fingerprint_sha256
def test_load_worker_bundle_returns_none_if_missing(tmp_path: pathlib.Path) -> None:
assert pki.load_worker_bundle(tmp_path / "empty") is None
def test_ensure_swarmctl_cert_issues_from_same_ca(tmp_path: pathlib.Path) -> None:
ca_dir = tmp_path / "ca"
swarmctl_dir = tmp_path / "swarmctl"
cert_path, key_path, ca_path = pki.ensure_swarmctl_cert(
"0.0.0.0", ca_dir=ca_dir, swarmctl_dir=swarmctl_dir
)
assert cert_path.exists() and key_path.exists() and ca_path.exists()
# Server cert is signed by the same CA that workers will ship — that's
# the whole point of the auto-issue path.
cert = x509.load_pem_x509_certificate(cert_path.read_bytes())
ca_cert = x509.load_pem_x509_certificate(ca_path.read_bytes())
assert cert.issuer == ca_cert.subject
san = cert.extensions.get_extension_for_class(x509.SubjectAlternativeName).value
ips = {str(v) for v in san.get_values_for_type(x509.IPAddress)}
dns = set(san.get_values_for_type(x509.DNSName))
assert "0.0.0.0" in ips
assert "localhost" in dns
# Key perm is the same 0600 we enforce on worker.key.
assert (key_path.stat().st_mode & 0o777) == 0o600
def test_ensure_swarmctl_cert_is_idempotent(tmp_path: pathlib.Path) -> None:
# Second call must NOT re-issue — otherwise a restart of swarmctl
# would rotate the server cert and break any worker mid-TLS-session.
ca_dir = tmp_path / "ca"
swarmctl_dir = tmp_path / "swarmctl"
first = pki.ensure_swarmctl_cert("0.0.0.0", ca_dir=ca_dir, swarmctl_dir=swarmctl_dir)
first_pem = first[0].read_bytes()
second = pki.ensure_swarmctl_cert("0.0.0.0", ca_dir=ca_dir, swarmctl_dir=swarmctl_dir)
assert second[0].read_bytes() == first_pem
def test_fingerprint_stable_across_calls(tmp_path: pathlib.Path) -> None:
ca = pki.ensure_ca(tmp_path / "ca")
issued = pki.issue_worker_cert(ca, "worker-03", ["127.0.0.1"])
assert pki.fingerprint(issued.cert_pem) == issued.fingerprint_sha256
def test_mtls_handshake_round_trip(tmp_path: pathlib.Path) -> None:
"""End-to-end: issue two worker certs from the same CA, have one act as
TLS server and the other as TLS client, and confirm the handshake
succeeds with mutual auth.
"""
ca = pki.ensure_ca(tmp_path / "ca")
srv_dir = tmp_path / "srv"
cli_dir = tmp_path / "cli"
pki.write_worker_bundle(
pki.issue_worker_cert(ca, "srv", ["127.0.0.1"]), srv_dir
)
pki.write_worker_bundle(
pki.issue_worker_cert(ca, "cli", ["127.0.0.1"]), cli_dir
)
server_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
server_ctx.load_cert_chain(str(srv_dir / "worker.crt"), str(srv_dir / "worker.key"))
server_ctx.load_verify_locations(cafile=str(srv_dir / "ca.crt"))
server_ctx.verify_mode = ssl.CERT_REQUIRED
client_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
client_ctx.load_cert_chain(str(cli_dir / "worker.crt"), str(cli_dir / "worker.key"))
client_ctx.load_verify_locations(cafile=str(cli_dir / "ca.crt"))
client_ctx.check_hostname = False # SAN matches IP, not hostname
client_ctx.verify_mode = ssl.CERT_REQUIRED
sock = socket.socket()
sock.bind(("127.0.0.1", 0))
sock.listen(1)
port = sock.getsockname()[1]
result: dict[str, object] = {}
def _serve() -> None:
try:
conn, _ = sock.accept()
with server_ctx.wrap_socket(conn, server_side=True) as tls:
result["peer_cert"] = tls.getpeercert()
tls.sendall(b"ok")
except Exception as exc: # noqa: BLE001
result["error"] = repr(exc)
t = threading.Thread(target=_serve, daemon=True)
t.start()
time.sleep(0.05)
with socket.create_connection(("127.0.0.1", port)) as raw:
with client_ctx.wrap_socket(raw, server_hostname="127.0.0.1") as tls:
assert tls.recv(2) == b"ok"
t.join(timeout=2)
sock.close()
assert "error" not in result, result.get("error")
assert result.get("peer_cert"), "server did not receive client cert"
def test_unauthenticated_client_rejected(tmp_path: pathlib.Path) -> None:
"""A client presenting a cert from a DIFFERENT CA must be rejected."""
good_ca = pki.ensure_ca(tmp_path / "good-ca")
evil_ca = pki.generate_ca("Evil CA")
srv_dir = tmp_path / "srv"
pki.write_worker_bundle(
pki.issue_worker_cert(good_ca, "srv", ["127.0.0.1"]), srv_dir
)
evil_dir = tmp_path / "evil"
pki.write_worker_bundle(
pki.issue_worker_cert(evil_ca, "evil", ["127.0.0.1"]), evil_dir
)
server_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
server_ctx.load_cert_chain(str(srv_dir / "worker.crt"), str(srv_dir / "worker.key"))
server_ctx.load_verify_locations(cafile=str(srv_dir / "ca.crt"))
server_ctx.verify_mode = ssl.CERT_REQUIRED
client_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
client_ctx.load_cert_chain(str(evil_dir / "worker.crt"), str(evil_dir / "worker.key"))
# The evil client still trusts its own CA for the server cert (so the
# server cert chain verifies from its side); the server-side rejection
# is what we are asserting.
client_ctx.load_verify_locations(cafile=str(srv_dir / "ca.crt"))
client_ctx.check_hostname = False
client_ctx.verify_mode = ssl.CERT_REQUIRED
sock = socket.socket()
sock.bind(("127.0.0.1", 0))
sock.listen(1)
port = sock.getsockname()[1]
errors: list[str] = []
def _serve() -> None:
try:
conn, _ = sock.accept()
with server_ctx.wrap_socket(conn, server_side=True):
pass
except ssl.SSLError as exc:
errors.append(repr(exc))
except Exception as exc: # noqa: BLE001
errors.append(repr(exc))
t = threading.Thread(target=_serve, daemon=True)
t.start()
time.sleep(0.05)
# Rejection may surface on either side (SSL alert on the server closes the
# socket — client may see SSLError, ConnectionResetError, or EOF).
handshake_failed = False
try:
with socket.create_connection(("127.0.0.1", port)) as raw:
with client_ctx.wrap_socket(raw, server_hostname="127.0.0.1") as tls:
tls.do_handshake()
except (ssl.SSLError, OSError):
handshake_failed = True
t.join(timeout=2)
sock.close()
assert handshake_failed or errors, (
"server should have rejected the evil-CA-signed client cert"
)

View File

@@ -0,0 +1,60 @@
"""Backward-compatibility tests for the SWARM state-schema extension.
DeckyConfig gained an optional ``host_uuid`` field in swarm mode. Existing
state files (unihost) must continue to deserialize without change.
"""
from __future__ import annotations
from decnet.models import DeckyConfig, DecnetConfig
def _minimal_decky(name: str = "decky-01") -> dict:
return {
"name": name,
"ip": "192.168.1.10",
"services": ["ssh"],
"distro": "debian",
"base_image": "debian:bookworm-slim",
"hostname": "decky01",
}
def test_decky_config_host_uuid_defaults_to_none() -> None:
"""A decky built from a pre-swarm state blob lands with host_uuid=None."""
d = DeckyConfig(**_minimal_decky())
assert d.host_uuid is None
def test_decky_config_accepts_host_uuid() -> None:
d = DeckyConfig(**_minimal_decky(), host_uuid="host-uuid-abc")
assert d.host_uuid == "host-uuid-abc"
def test_decnet_config_mode_swarm_with_host_assignments() -> None:
"""Full swarm-mode config: every decky carries a host_uuid."""
config = DecnetConfig(
mode="swarm",
interface="eth0",
subnet="192.168.1.0/24",
gateway="192.168.1.1",
deckies=[
DeckyConfig(**_minimal_decky("decky-01"), host_uuid="host-A"),
DeckyConfig(**_minimal_decky("decky-02"), host_uuid="host-B"),
],
)
assert config.mode == "swarm"
assert {d.host_uuid for d in config.deckies} == {"host-A", "host-B"}
def test_legacy_unihost_state_still_parses() -> None:
"""A dict matching the pre-swarm schema deserializes unchanged."""
legacy_blob = {
"mode": "unihost",
"interface": "eth0",
"subnet": "192.168.1.0/24",
"gateway": "192.168.1.1",
"deckies": [_minimal_decky()],
}
config = DecnetConfig.model_validate(legacy_blob)
assert config.mode == "unihost"
assert config.deckies[0].host_uuid is None

View File

@@ -0,0 +1,493 @@
"""Unit tests for the SWARM controller FastAPI app.
Covers the enrollment, host-management, and deployment dispatch routes.
The AgentClient is stubbed so we exercise the controller's logic without
a live mTLS peer (that path has its own roundtrip test).
"""
from __future__ import annotations
import pathlib
from typing import Any
import pytest
from fastapi.testclient import TestClient
from decnet.web.db.factory import get_repository
from decnet.web.dependencies import get_repo
@pytest.fixture
def ca_dir(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
"""Redirect the PKI default CA path into tmp so the test CA never
touches ``~/.decnet/ca``."""
ca = tmp_path / "ca"
from decnet.swarm import pki
monkeypatch.setattr(pki, "DEFAULT_CA_DIR", ca)
# Also patch the already-imported references inside client.py / routers.
from decnet.swarm import client as swarm_client
from decnet.web.router.swarm import api_enroll_host as enroll_mod
monkeypatch.setattr(swarm_client, "pki", pki)
monkeypatch.setattr(enroll_mod, "pki", pki)
return ca
@pytest.fixture
def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch):
r = get_repository(db_path=str(tmp_path / "swarm.db"))
# The controller's lifespan initialises the module-level `repo` in
# decnet.web.dependencies. Swap that singleton for our test repo so
# schema creation targets the temp DB.
import decnet.web.dependencies as deps
import decnet.web.swarm_api as swarm_api_mod
monkeypatch.setattr(deps, "repo", r)
monkeypatch.setattr(swarm_api_mod, "repo", r)
return r
@pytest.fixture
def client(repo, ca_dir: pathlib.Path):
from decnet.web.swarm_api import app
async def _override() -> Any:
return repo
app.dependency_overrides[get_repo] = _override
with TestClient(app) as c:
yield c
app.dependency_overrides.clear()
# ---------------------------------------------------------------- /enroll
def test_enroll_creates_host_and_returns_bundle(client: TestClient) -> None:
resp = client.post(
"/swarm/enroll",
json={"name": "worker-a", "address": "10.0.0.5", "agent_port": 8765},
)
assert resp.status_code == 201, resp.text
body = resp.json()
assert body["name"] == "worker-a"
assert body["address"] == "10.0.0.5"
assert "-----BEGIN CERTIFICATE-----" in body["worker_cert_pem"]
assert "-----BEGIN PRIVATE KEY-----" in body["worker_key_pem"]
assert "-----BEGIN CERTIFICATE-----" in body["ca_cert_pem"]
assert len(body["fingerprint"]) == 64 # sha256 hex
def test_enroll_with_updater_issues_second_cert(client: TestClient, ca_dir) -> None:
resp = client.post(
"/swarm/enroll",
json={"name": "worker-upd", "address": "10.0.0.99", "agent_port": 8765,
"issue_updater_bundle": True},
)
assert resp.status_code == 201, resp.text
body = resp.json()
assert body["updater"] is not None
assert body["updater"]["fingerprint"] != body["fingerprint"]
assert "-----BEGIN CERTIFICATE-----" in body["updater"]["updater_cert_pem"]
assert "-----BEGIN PRIVATE KEY-----" in body["updater"]["updater_key_pem"]
# Cert bundle persisted on master.
upd_bundle = ca_dir / "workers" / "worker-upd" / "updater"
assert (upd_bundle / "updater.crt").is_file()
assert (upd_bundle / "updater.key").is_file()
# DB row carries the updater fingerprint.
row = client.get(f"/swarm/hosts/{body['host_uuid']}").json()
assert row.get("updater_cert_fingerprint") == body["updater"]["fingerprint"]
def test_enroll_without_updater_omits_bundle(client: TestClient) -> None:
resp = client.post(
"/swarm/enroll",
json={"name": "worker-no-upd", "address": "10.0.0.98", "agent_port": 8765},
)
assert resp.status_code == 201
assert resp.json()["updater"] is None
def test_enroll_rejects_duplicate_name(client: TestClient) -> None:
payload = {"name": "worker-dup", "address": "10.0.0.6", "agent_port": 8765}
assert client.post("/swarm/enroll", json=payload).status_code == 201
resp2 = client.post("/swarm/enroll", json=payload)
assert resp2.status_code == 409
# ---------------------------------------------------------------- /hosts
def test_list_hosts_empty(client: TestClient) -> None:
resp = client.get("/swarm/hosts")
assert resp.status_code == 200
assert resp.json() == []
def test_list_and_get_host_after_enroll(client: TestClient) -> None:
reg = client.post(
"/swarm/enroll",
json={"name": "worker-b", "address": "10.0.0.7", "agent_port": 8765},
).json()
uuid = reg["host_uuid"]
lst = client.get("/swarm/hosts").json()
assert len(lst) == 1
assert lst[0]["name"] == "worker-b"
one = client.get(f"/swarm/hosts/{uuid}").json()
assert one["uuid"] == uuid
assert one["status"] == "enrolled"
def test_decommission_removes_host_and_bundle(
client: TestClient, ca_dir: pathlib.Path
) -> None:
reg = client.post(
"/swarm/enroll",
json={"name": "worker-c", "address": "10.0.0.8", "agent_port": 8765},
).json()
uuid = reg["host_uuid"]
bundle_dir = ca_dir / "workers" / "worker-c"
assert bundle_dir.is_dir()
resp = client.delete(f"/swarm/hosts/{uuid}")
assert resp.status_code == 204
assert client.get(f"/swarm/hosts/{uuid}").status_code == 404
assert not bundle_dir.exists()
def test_decommission_dispatches_self_destruct_to_agent(
client: TestClient, ca_dir: pathlib.Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Decommission must ask the worker to wipe its own install. Otherwise
the agent keeps running after the dashboard forgets it exists."""
calls: list[str] = []
class _SelfDestructAgent:
def __init__(self, host=None, **_):
self._host = host or {}
async def __aenter__(self): return self
async def __aexit__(self, *exc): return None
async def self_destruct(self):
calls.append(self._host.get("name") or "?")
return {"status": "self_destruct_scheduled"}
from decnet.web.router.swarm import api_decommission_host as decom_mod
monkeypatch.setattr(decom_mod, "AgentClient", _SelfDestructAgent)
reg = client.post(
"/swarm/enroll",
json={"name": "worker-nuke", "address": "10.0.0.8", "agent_port": 8765},
).json()
resp = client.delete(f"/swarm/hosts/{reg['host_uuid']}")
assert resp.status_code == 204
assert calls == ["worker-nuke"]
def test_decommission_proceeds_when_agent_unreachable(
client: TestClient, ca_dir: pathlib.Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""A dead worker must not block the operator from cleaning up the
dashboard. Self-destruct failure is logged, decommission proceeds."""
class _DeadAgent:
def __init__(self, host=None, **_): pass
async def __aenter__(self): return self
async def __aexit__(self, *exc): return None
async def self_destruct(self):
raise RuntimeError("connection refused")
from decnet.web.router.swarm import api_decommission_host as decom_mod
monkeypatch.setattr(decom_mod, "AgentClient", _DeadAgent)
reg = client.post(
"/swarm/enroll",
json={"name": "worker-dead", "address": "10.0.0.8", "agent_port": 8765},
).json()
resp = client.delete(f"/swarm/hosts/{reg['host_uuid']}")
assert resp.status_code == 204
assert client.get(f"/swarm/hosts/{reg['host_uuid']}").status_code == 404
# ---------------------------------------------------------------- /deploy
class _StubAgentClient:
"""Minimal async-context-manager stub mirroring ``AgentClient``."""
deployed: list[dict[str, Any]] = []
torn_down: list[dict[str, Any]] = []
def __init__(self, host: dict[str, Any] | None = None, **_: Any) -> None:
self._host = host or {}
async def __aenter__(self) -> "_StubAgentClient":
return self
async def __aexit__(self, *exc: Any) -> None:
return None
async def health(self) -> dict[str, Any]:
return {"status": "ok"}
async def deploy(self, config: Any, **kw: Any) -> dict[str, Any]:
_StubAgentClient.deployed.append(
{"host": self._host.get("name"), "deckies": [d.name for d in config.deckies]}
)
return {"status": "deployed", "deckies": len(config.deckies)}
async def teardown(self, decky_id: str | None = None) -> dict[str, Any]:
_StubAgentClient.torn_down.append(
{"host": self._host.get("name"), "decky_id": decky_id}
)
return {"status": "torn_down"}
@pytest.fixture
def stub_agent(monkeypatch: pytest.MonkeyPatch):
_StubAgentClient.deployed.clear()
_StubAgentClient.torn_down.clear()
from decnet.web.router.swarm import api_deploy_swarm as deploy_mod
from decnet.web.router.swarm import api_teardown_swarm as teardown_mod
from decnet.web.router.swarm import api_check_hosts as check_mod
monkeypatch.setattr(deploy_mod, "AgentClient", _StubAgentClient)
monkeypatch.setattr(teardown_mod, "AgentClient", _StubAgentClient)
monkeypatch.setattr(check_mod, "AgentClient", _StubAgentClient)
return _StubAgentClient
def _decky_dict(name: str, host_uuid: str, ip: str) -> dict[str, Any]:
return {
"name": name,
"ip": ip,
"services": ["ssh"],
"distro": "debian",
"base_image": "debian:bookworm-slim",
"hostname": name,
"host_uuid": host_uuid,
}
def test_deploy_shards_across_hosts(client: TestClient, stub_agent) -> None:
h1 = client.post(
"/swarm/enroll",
json={"name": "w1", "address": "10.0.0.1", "agent_port": 8765},
).json()
h2 = client.post(
"/swarm/enroll",
json={"name": "w2", "address": "10.0.0.2", "agent_port": 8765},
).json()
cfg = {
"mode": "swarm",
"interface": "eth0",
"subnet": "192.168.1.0/24",
"gateway": "192.168.1.1",
"deckies": [
_decky_dict("decky-01", h1["host_uuid"], "192.168.1.10"),
_decky_dict("decky-02", h1["host_uuid"], "192.168.1.11"),
_decky_dict("decky-03", h2["host_uuid"], "192.168.1.12"),
],
}
resp = client.post("/swarm/deploy", json={"config": cfg})
assert resp.status_code == 200, resp.text
body = resp.json()
assert len(body["results"]) == 2
assert all(r["ok"] for r in body["results"])
by_host = {d["host"]: d["deckies"] for d in stub_agent.deployed}
assert by_host["w1"] == ["decky-01", "decky-02"]
assert by_host["w2"] == ["decky-03"]
def test_deploy_rejects_missing_host_uuid(client: TestClient, stub_agent) -> None:
cfg = {
"mode": "swarm",
"interface": "eth0",
"subnet": "192.168.1.0/24",
"gateway": "192.168.1.1",
"deckies": [
{
"name": "decky-01",
"ip": "192.168.1.10",
"services": ["ssh"],
"distro": "debian",
"base_image": "debian:bookworm-slim",
"hostname": "decky-01",
# host_uuid deliberately omitted
}
],
}
resp = client.post("/swarm/deploy", json={"config": cfg})
assert resp.status_code == 400
assert "host_uuid" in resp.json()["detail"]
def test_deploy_partial_failure_only_marks_actually_failed_decky(
client: TestClient, repo, monkeypatch: pytest.MonkeyPatch
) -> None:
"""docker compose up is partial-success-friendly: one failed service
doesn't roll back the ones already up. The master must probe /status
after a dispatch exception so healthy deckies aren't painted red just
because a sibling in the same shard failed."""
class _PartialFailAgent:
def __init__(self, host=None, **_):
self._host = host or {}
async def __aenter__(self):
return self
async def __aexit__(self, *exc):
return None
async def deploy(self, config, **kw):
raise RuntimeError("Server error '500 Internal Server Error'")
async def status(self):
return {
"deployed": True,
"runtime": {
"decky1": {"running": True, "services": {"ssh": "running"}},
"decky2": {"running": True, "services": {"ssh": "running"}},
"decky3": {"running": False, "services": {"ssh": "absent"}},
},
}
from decnet.web.router.swarm import api_deploy_swarm as deploy_mod
monkeypatch.setattr(deploy_mod, "AgentClient", _PartialFailAgent)
h1 = client.post(
"/swarm/enroll",
json={"name": "decktest", "address": "192.168.1.47", "agent_port": 8765},
).json()
cfg = {
"mode": "swarm",
"interface": "eth0",
"subnet": "192.168.1.0/24",
"gateway": "192.168.1.1",
"deckies": [
_decky_dict("decky1", h1["host_uuid"], "192.168.1.2"),
_decky_dict("decky2", h1["host_uuid"], "192.168.1.3"),
_decky_dict("decky3", h1["host_uuid"], "192.168.1.4"),
],
}
resp = client.post("/swarm/deploy", json={"config": cfg})
assert resp.status_code == 200
assert resp.json()["results"][0]["ok"] is False
shards = {s["decky_name"]: s for s in client.get("/swarm/deckies").json()}
assert shards["decky1"]["state"] == "running"
assert shards["decky1"]["last_error"] is None
assert shards["decky2"]["state"] == "running"
assert shards["decky3"]["state"] == "failed"
assert "500" in (shards["decky3"]["last_error"] or "")
def test_deploy_rejects_non_swarm_mode(client: TestClient, stub_agent) -> None:
cfg = {
"mode": "unihost",
"interface": "eth0",
"subnet": "192.168.1.0/24",
"gateway": "192.168.1.1",
"deckies": [_decky_dict("decky-01", "fake-uuid", "192.168.1.10")],
}
resp = client.post("/swarm/deploy", json={"config": cfg})
assert resp.status_code == 400
def test_teardown_all_hosts(client: TestClient, stub_agent) -> None:
for i, addr in enumerate(("10.0.0.1", "10.0.0.2"), start=1):
client.post(
"/swarm/enroll",
json={"name": f"td{i}", "address": addr, "agent_port": 8765},
)
resp = client.post("/swarm/teardown", json={})
assert resp.status_code == 200
assert len(resp.json()["results"]) == 2
assert {t["host"] for t in stub_agent.torn_down} == {"td1", "td2"}
# ---------------------------------------------------------------- /check
def test_check_marks_hosts_active(client: TestClient, stub_agent) -> None:
h = client.post(
"/swarm/enroll",
json={"name": "probe-w", "address": "10.0.0.9", "agent_port": 8765},
).json()
resp = client.post("/swarm/check")
assert resp.status_code == 200
results = resp.json()["results"]
assert len(results) == 1
assert results[0]["reachable"] is True
one = client.get(f"/swarm/hosts/{h['host_uuid']}").json()
assert one["status"] == "active"
assert one["last_heartbeat"] is not None
# ---------------------------------------------------------------- /deckies
def test_list_deckies_empty(client: TestClient) -> None:
resp = client.get("/swarm/deckies")
assert resp.status_code == 200
assert resp.json() == []
def test_list_deckies_joins_host_identity(client: TestClient, repo) -> None:
import asyncio
h1 = client.post(
"/swarm/enroll",
json={"name": "deck-host-1", "address": "10.0.0.11", "agent_port": 8765},
).json()
h2 = client.post(
"/swarm/enroll",
json={"name": "deck-host-2", "address": "10.0.0.12", "agent_port": 8765},
).json()
async def _seed() -> None:
await repo.upsert_decky_shard({
"decky_name": "decky-01", "host_uuid": h1["host_uuid"],
"services": ["ssh"], "state": "running",
})
await repo.upsert_decky_shard({
"decky_name": "decky-02", "host_uuid": h2["host_uuid"],
"services": ["smb", "ssh"], "state": "failed", "last_error": "boom",
})
asyncio.run(_seed())
rows = client.get("/swarm/deckies").json()
assert len(rows) == 2
by_name = {r["decky_name"]: r for r in rows}
assert by_name["decky-01"]["host_name"] == "deck-host-1"
assert by_name["decky-01"]["host_address"] == "10.0.0.11"
assert by_name["decky-01"]["state"] == "running"
assert by_name["decky-02"]["services"] == ["smb", "ssh"]
assert by_name["decky-02"]["last_error"] == "boom"
# host_uuid filter
only = client.get(f"/swarm/deckies?host_uuid={h1['host_uuid']}").json()
assert [r["decky_name"] for r in only] == ["decky-01"]
# state filter
failed = client.get("/swarm/deckies?state=failed").json()
assert [r["decky_name"] for r in failed] == ["decky-02"]
# ---------------------------------------------------------------- /health (root)
def test_root_health(client: TestClient) -> None:
resp = client.get("/health")
assert resp.status_code == 200
assert resp.json()["role"] == "swarm-controller"

View File

@@ -0,0 +1,75 @@
"""tar_working_tree: exclude filter, tarball validity, git SHA detection."""
from __future__ import annotations
import io
import pathlib
import tarfile
from decnet.swarm.tar_tree import detect_git_sha, tar_working_tree
def _tree_names(data: bytes) -> set[str]:
with tarfile.open(fileobj=io.BytesIO(data), mode="r:gz") as tar:
return {m.name for m in tar.getmembers()}
def test_tar_excludes_default_patterns(tmp_path: pathlib.Path) -> None:
(tmp_path / "decnet").mkdir()
(tmp_path / "decnet" / "keep.py").write_text("x = 1")
(tmp_path / ".venv").mkdir()
(tmp_path / ".venv" / "pyvenv.cfg").write_text("junk")
(tmp_path / ".git").mkdir()
(tmp_path / ".git" / "HEAD").write_text("ref: refs/heads/main\n")
(tmp_path / "decnet" / "__pycache__").mkdir()
(tmp_path / "decnet" / "__pycache__" / "keep.cpython-311.pyc").write_text("bytecode")
(tmp_path / "wiki-checkout").mkdir()
(tmp_path / "wiki-checkout" / "Home.md").write_text("# wiki")
(tmp_path / "run.db").write_text("sqlite")
(tmp_path / "master.log").write_text("log")
data = tar_working_tree(tmp_path)
names = _tree_names(data)
assert "decnet/keep.py" in names
assert all(".venv" not in n for n in names)
assert all(".git" not in n for n in names)
assert all("__pycache__" not in n for n in names)
assert all("wiki-checkout" not in n for n in names)
assert "run.db" not in names
assert "master.log" not in names
def test_tar_accepts_extra_excludes(tmp_path: pathlib.Path) -> None:
(tmp_path / "a.py").write_text("x")
(tmp_path / "secret.env").write_text("TOKEN=abc")
data = tar_working_tree(tmp_path, extra_excludes=["secret.env"])
names = _tree_names(data)
assert "a.py" in names
assert "secret.env" not in names
def test_tar_skips_symlinks(tmp_path: pathlib.Path) -> None:
(tmp_path / "real.txt").write_text("hi")
try:
(tmp_path / "link.txt").symlink_to(tmp_path / "real.txt")
except (OSError, NotImplementedError):
return # platform doesn't support symlinks — skip
names = _tree_names(tar_working_tree(tmp_path))
assert "real.txt" in names
assert "link.txt" not in names
def test_detect_git_sha_from_ref(tmp_path: pathlib.Path) -> None:
(tmp_path / ".git" / "refs" / "heads").mkdir(parents=True)
(tmp_path / ".git" / "refs" / "heads" / "main").write_text("deadbeef" * 5 + "\n")
(tmp_path / ".git" / "HEAD").write_text("ref: refs/heads/main\n")
assert detect_git_sha(tmp_path).startswith("deadbeef")
def test_detect_git_sha_detached(tmp_path: pathlib.Path) -> None:
(tmp_path / ".git").mkdir()
(tmp_path / ".git" / "HEAD").write_text("f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0\n")
assert detect_git_sha(tmp_path).startswith("f0f0")
def test_detect_git_sha_none_when_not_repo(tmp_path: pathlib.Path) -> None:
assert detect_git_sha(tmp_path) == ""

View File

@@ -0,0 +1,77 @@
"""Regression tests for the uvicorn TLS scope monkey-patch."""
from __future__ import annotations
from typing import Any
import pytest
class _FakeSSLObject:
def __init__(self, der: bytes) -> None:
self._der = der
def getpeercert(self, binary_form: bool = False) -> bytes:
assert binary_form is True
return self._der
class _FakeTransport:
def __init__(self, ssl_obj: Any = None) -> None:
self._ssl = ssl_obj
def get_extra_info(self, key: str) -> Any:
if key == "ssl_object":
return self._ssl
return None
def _make_cycle_cls():
class Cycle:
def __init__(self, scope: dict, transport: Any = None) -> None:
self.scope = scope
self.transport = transport
return Cycle
def test_wrap_cycle_injects_cert_into_scope() -> None:
from decnet.web._uvicorn_tls_scope import _wrap_cycle_init
Cycle = _make_cycle_cls()
_wrap_cycle_init(Cycle)
scope: dict = {"type": "http"}
transport = _FakeTransport(_FakeSSLObject(b"\x30\x82der"))
Cycle(scope, transport=transport)
assert scope["extensions"]["tls"]["client_cert_chain"] == [b"\x30\x82der"]
def test_wrap_cycle_noop_when_no_ssl() -> None:
from decnet.web._uvicorn_tls_scope import _wrap_cycle_init
Cycle = _make_cycle_cls()
_wrap_cycle_init(Cycle)
scope: dict = {"type": "http"}
Cycle(scope, transport=_FakeTransport(ssl_obj=None))
assert "extensions" not in scope or "tls" not in scope.get("extensions", {})
def test_wrap_cycle_noop_when_empty_der() -> None:
from decnet.web._uvicorn_tls_scope import _wrap_cycle_init
Cycle = _make_cycle_cls()
_wrap_cycle_init(Cycle)
scope: dict = {"type": "http"}
Cycle(scope, transport=_FakeTransport(_FakeSSLObject(b"")))
assert "extensions" not in scope or "tls" not in scope.get("extensions", {})
def test_install_is_idempotent() -> None:
from decnet.web import _uvicorn_tls_scope as mod
mod.install()
mod.install() # second call must not double-wrap