Files
DECNET/tests/swarm/test_agent_app.py
anti d1ca96b2f4 feat(agent): /deploy and /mutate become 202 fire-and-forget
The wizard API used to hang because /deckies/deploy ran docker compose
build && up -d synchronously, holding the request thread for minutes.
The worker side of that pipeline now returns 202 Accepted immediately
and runs the deploy in an asyncio.create_task.

On task completion (success or failure) the worker pushes a one-off
heartbeat carrying a lifecycle delta per decky:
  {decky_name, operation, status: succeeded|failed, error?, completed_at}

Master pivots these onto open DeckyLifecycle rows in the heartbeat
handler (next commit).  The scheduled 30s heartbeat tick is the
fallback if the immediate push drops.

- decnet/agent/app.py: /deploy and /mutate return 202; dry_run mutate
  still validates synchronously and returns 200.
- decnet/agent/executor.py: deploy_async + mutate_async wrap the work
  and push the completion delta.
- decnet/agent/heartbeat.py: push_lifecycle_delta() helper builds a
  one-off body and POSTs with the same mTLS context as the loop.
- decnet/swarm/client.py: revert deploy/mutate to control timeout
  (master no longer holds the HTTP request open for compose work).

Worker state.json gains no lifecycle field -- master DeckyLifecycle is
the source of truth; the master sweep handles crashed-mid-deploy
recovery.
2026-05-22 16:31:23 -04:00

230 lines
7.5 KiB
Python

"""Agent FastAPI app — static/contract checks only.
We deliberately do NOT spin uvicorn up in-process here: the mTLS layer is
enforced by uvicorn itself (via --ssl-cert-reqs 2) and is validated in the
VM integration suite. What we CAN assert in unit scope is the route
surface + request/response schema.
"""
from __future__ import annotations
from fastapi.testclient import TestClient
from decnet.agent.app import app
def test_health_endpoint() -> None:
client = TestClient(app)
resp = client.get("/health")
assert resp.status_code == 200
assert resp.json() == {"status": "ok"}
def test_status_when_not_deployed() -> None:
client = TestClient(app)
resp = client.get("/status")
assert resp.status_code == 200
body = resp.json()
assert "deployed" in body
assert "deckies" in body
def _seed_state(monkeypatch, tmp_path):
"""Install a fake load_state/save_state pair backed by a list cell so
tests can both seed and re-read what the handler wrote."""
from decnet.config import DecnetConfig, DeckyConfig
from decnet.agent import app as _app_module
cfg = DecnetConfig(
mode="swarm",
interface="eth0",
subnet="10.66.0.0/24",
gateway="10.66.0.1",
deckies=[
DeckyConfig(
name="decky-01",
ip="10.66.0.10",
services=["ssh"],
distro="debian",
base_image="debian:bookworm-slim",
hostname="d01",
),
],
)
compose_path = tmp_path / "decnet-compose.yml"
cell = {"cfg": cfg, "compose_path": compose_path}
def _fake_load_state():
return (cell["cfg"], cell["compose_path"]) if cell["cfg"] is not None else None
def _fake_save_state(c, p):
cell["cfg"] = c
cell["compose_path"] = p
monkeypatch.setattr("decnet.config.load_state", _fake_load_state)
monkeypatch.setattr("decnet.config.save_state", _fake_save_state)
return cell
def test_mutate_returns_202_and_spawns_task(monkeypatch, tmp_path) -> None:
_seed_state(monkeypatch, tmp_path)
spawned: list = []
real_create_task = __import__("asyncio").create_task
def _capture_create_task(coro, **kw):
spawned.append(kw.get("name", ""))
# Run the coro so it doesn't leak as a never-awaited warning,
# but swap its body out for a no-op.
coro.close()
# Return something task-like for the handler.
async def _noop():
return None
return real_create_task(_noop())
monkeypatch.setattr("decnet.agent.app.asyncio.create_task", _capture_create_task)
client = TestClient(app)
resp = client.post(
"/mutate",
json={"decky_id": "decky-01", "services": ["http", "ftp"]},
)
assert resp.status_code == 202, resp.text
body = resp.json()
assert body == {
"status": "accepted",
"decky_id": "decky-01",
"services": ["http", "ftp"],
}
assert spawned and spawned[0].startswith("mutate-")
def test_mutate_dry_run_404_when_no_state(monkeypatch) -> None:
monkeypatch.setattr("decnet.config.load_state", lambda: None)
client = TestClient(app)
resp = client.post(
"/mutate",
json={"decky_id": "decky-01", "services": ["ssh"], "dry_run": True},
)
assert resp.status_code == 404
def test_mutate_dry_run_404_for_unknown_decky(monkeypatch, tmp_path) -> None:
_seed_state(monkeypatch, tmp_path)
client = TestClient(app)
resp = client.post(
"/mutate",
json={"decky_id": "ghost", "services": ["ssh"], "dry_run": True},
)
assert resp.status_code == 404
def test_mutate_dry_run_returns_services_without_touching_docker(
monkeypatch, tmp_path,
) -> None:
_seed_state(monkeypatch, tmp_path)
composed: list = []
monkeypatch.setattr(
"decnet.engine._compose_with_retry",
lambda *a, **kw: composed.append((a, kw)),
)
client = TestClient(app)
resp = client.post(
"/mutate",
json={"decky_id": "decky-01", "services": ["http"], "dry_run": True},
)
assert resp.status_code == 200
assert resp.json()["status"] == "dry_run"
assert composed == []
def test_deploy_returns_202_and_spawns_task(monkeypatch) -> None:
from decnet.config import DecnetConfig, DeckyConfig
cfg = DecnetConfig(
mode="unihost", interface="eth0",
subnet="10.66.0.0/24", gateway="10.66.0.1",
deckies=[DeckyConfig(
name="decky-01", ip="10.66.0.10",
services=["ssh"], distro="debian",
base_image="debian:bookworm-slim", hostname="d01",
)],
)
spawned: list = []
real_create_task = __import__("asyncio").create_task
def _capture_create_task(coro, **kw):
spawned.append(kw.get("name", ""))
coro.close()
async def _noop():
return None
return real_create_task(_noop())
monkeypatch.setattr("decnet.agent.app.asyncio.create_task", _capture_create_task)
client = TestClient(app)
resp = client.post("/deploy", json={"config": cfg.model_dump(mode="json")})
assert resp.status_code == 202, resp.text
body = resp.json()
assert body["status"] == "accepted"
assert body["deckies"] == ["decky-01"]
assert spawned and spawned[0].startswith("deploy-")
def test_deploy_rejects_malformed_body() -> None:
client = TestClient(app)
resp = client.post("/deploy", json={"not": "a config"})
assert resp.status_code == 422 # pydantic validation
def test_route_set() -> None:
paths = {r.path for r in app.routes if hasattr(r, "path")}
assert {"/health", "/status", "/deploy", "/teardown", "/mutate", "/self-destruct"} <= paths
def test_self_destruct_spawns_reaper_and_returns_fast(monkeypatch, tmp_path) -> None:
"""/self-destruct must write the reaper script and spawn it detached
(start_new_session=True). We intercept Popen so the test doesn't
actually nuke anything."""
from decnet.agent import executor as _exec
spawned: list[dict] = []
class _FakePopen:
def __init__(self, args, **kw):
spawned.append({"args": args, "kw": kw})
monkeypatch.setattr(_exec, "_deployer", type("X", (), {
"teardown": staticmethod(lambda _id: None),
})())
monkeypatch.setattr(_exec, "clear_state", lambda: None)
import subprocess as _sp
monkeypatch.setattr(_sp, "Popen", _FakePopen)
client = TestClient(app)
resp = client.post("/self-destruct")
assert resp.status_code == 200
assert resp.json()["status"] == "self_destruct_scheduled"
assert len(spawned) == 1
assert spawned[0]["kw"].get("start_new_session") is True
script_candidates = [
a for a in spawned[0]["args"]
if isinstance(a, str) and a.startswith("/tmp/decnet-reaper-")
]
assert len(script_candidates) == 1, spawned[0]["args"]
script_path = script_candidates[0]
# Reaper content sanity check — covers the paths the operator asked for.
import pathlib
body = pathlib.Path(script_path).read_text()
assert "/opt/decnet*" in body
assert "/etc/systemd/system/decnet-" in body
assert "/var/lib/decnet/*" in body
assert "/usr/local/bin/decnet*" in body
assert "/etc/decnet" in body
# Logs must be preserved — no `rm` line should touch /var/log.
for line in body.splitlines():
stripped = line.strip()
if stripped.startswith("#") or not stripped:
continue
if stripped.startswith("rm "):
assert "/var/log" not in stripped
pathlib.Path(script_path).unlink(missing_ok=True)