feat(canary): seed baseline canaries on MazeNET deckies

Topology deploys now plant the configured canary baseline set on every
decky in the topology, mirroring the fleet-deploy hook. Containers are
resolved via resolve_topology_container — <decky>-ssh when the decky
exposes an ssh service, else the topology base container
decnet_t_<id8>_<decky>.

The planter's plant/revoke/seed_baseline grow an optional container=
kwarg; default preserves the fleet <name>-ssh resolution.
This commit is contained in:
2026-04-28 22:30:11 -04:00
parent 04b0637c24
commit 5802de1f86
3 changed files with 176 additions and 2 deletions

View File

@@ -52,6 +52,21 @@ def _container_for(decky_name: str) -> str:
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
def resolve_topology_container(
topology_id: str, decky_name: str, services: Iterable[str],
) -> str:
"""Container name to docker-exec into for a MazeNET decky.
The ssh service container (when present) wins because it carries the
most realistic filesystem layout — same rationale as the fleet path.
Otherwise we target the base container, whose name is set by
:func:`decnet.topology.compose._container_name`.
"""
if "ssh" in set(services):
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
return f"decnet_t_{topology_id[:8]}_{decky_name}"
def _dirname(path: str) -> str:
idx = path.rfind("/")
if idx <= 0:
@@ -139,6 +154,7 @@ async def plant(
repo: Optional[BaseRepository] = None,
publish: bool = True,
bus: Optional[BaseBus] = None,
container: Optional[str] = None,
) -> tuple[bool, Optional[str]]:
"""Write *artifact* into the decky's ssh container.
@@ -158,9 +174,10 @@ async def plant(
return False, err
sh_cmd, stdin_payload = _build_plant_command(artifact)
target_container = container or _container_for(decky_name)
# ``-i`` keeps stdin attached so base64 -d inside the container can
# consume the encoded payload streamed from the host.
argv = [_DOCKER, "exec", "-i", _container_for(decky_name), "sh", "-c", sh_cmd]
argv = [_DOCKER, "exec", "-i", target_container, "sh", "-c", sh_cmd]
rc, _stdout, stderr = await _run(argv, stdin_bytes=stdin_payload)
success = rc == 0
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
@@ -196,6 +213,7 @@ async def revoke(
repo: Optional[BaseRepository] = None,
publish: bool = True,
bus: Optional[BaseBus] = None,
container: Optional[str] = None,
) -> tuple[bool, Optional[str]]:
"""Best-effort unlink + state transition + bus publish.
@@ -204,7 +222,8 @@ async def revoke(
already missing); only docker / container-down errors return False.
"""
sh_cmd = f"rm -f {shlex.quote(placement_path)}"
argv = [_DOCKER, "exec", _container_for(decky_name), "sh", "-c", sh_cmd]
target_container = container or _container_for(decky_name)
argv = [_DOCKER, "exec", target_container, "sh", "-c", sh_cmd]
rc, _stdout, stderr = await _run(argv)
success = rc == 0
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
@@ -250,6 +269,7 @@ async def seed_baseline(
persona: str = "linux",
created_by: str = "system",
bus: Optional[BaseBus] = None,
container: Optional[str] = None,
) -> list[dict[str, Any]]:
"""Plant the configured baseline canary set on one decky.
@@ -293,9 +313,59 @@ async def seed_baseline(
await plant(
decky_name, artifact,
token_uuid=token_uuid, repo=repo, publish=True, bus=bus,
container=container,
)
out.append({
"token_uuid": token_uuid, "generator": gen_name, "kind": kind,
"callback_token": slug, "placement_path": artifact.path,
})
return out
async def seed_baseline_topology(
repo: BaseRepository,
topology_id: str,
*,
created_by: str = "system",
bus: Optional[BaseBus] = None,
) -> list[dict[str, Any]]:
"""Plant baseline canaries on every decky in a MazeNET topology.
Mirrors :func:`seed_baseline` for the topology path. Container name
resolution uses :func:`resolve_topology_container` since topology
deckies may not have an ssh service — in that case we target the
base container instead.
Best-effort: failures on any single decky are logged inside
:func:`plant`; the deploy hook treats the return value as
informational. Returns a flat list of per-token dicts (with an added
``decky_name`` key) across all deckies.
"""
from decnet.topology.persistence import hydrate
hydrated = await hydrate(repo, topology_id)
if hydrated is None:
log.warning(
"canary.seed_baseline_topology: topology %s not found", topology_id,
)
return []
out: list[dict[str, Any]] = []
for decky in hydrated["deckies"]:
cfg = decky.get("decky_config") or {}
decky_name = cfg.get("name") or decky.get("name")
if not decky_name:
continue
services = decky.get("services") or []
container = resolve_topology_container(topology_id, decky_name, services)
# MazeNET deckies don't carry an OS persona today; default to
# linux (every base image we ship is Linux).
rows = await seed_baseline(
decky_name, repo,
persona="linux", created_by=created_by, bus=bus,
container=container,
)
for r in rows:
r["decky_name"] = decky_name
out.append(r)
return out

View File

@@ -954,6 +954,18 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N
await transition_status(repo, topology_id, TopologyStatus.ACTIVE)
log.info("topology %s deployed n_lans=%d", topology_id, len(lans))
# Best-effort canary baseline seed across every decky in the
# topology. Same resilience contract as the fleet path: failures
# surface as state=failed token rows, never abort the deploy.
try:
from decnet.canary import planter as _canary_planter
await _canary_planter.seed_baseline_topology(repo, topology_id)
except Exception as exc: # noqa: BLE001
log.warning(
"canary baseline seed failed (best-effort) topology=%s err=%s",
topology_id, exc,
)
@_traced("engine.teardown_topology")
async def teardown_topology(repo, topology_id: str) -> None:

View File

@@ -233,6 +233,98 @@ async def test_seed_baseline_skips_unknown_generator(repo: SQLiteRepository, mon
assert {r["generator"] for r in rows} == {"env_file"}
@pytest.mark.asyncio
async def test_plant_honours_explicit_container_override(repo: SQLiteRepository) -> None:
"""``container=`` lets MazeNET callers target a non-``<name>-ssh`` container."""
await repo.create_canary_token({
"uuid": "tok-c", "kind": "http", "decky_name": "web1",
"generator": "env_file", "placement_path": "/x",
"callback_token": "slugC", "secret_seed": "s", "created_by": "u1",
})
art = CanaryArtifact(path="/x", content=b"y", generator="env_file")
patcher, captured, _stdin = _patch_subprocess(rc=0)
with patcher:
ok, _err = await planter.plant(
"web1", art, token_uuid="tok-c", repo=repo,
container="decnet_t_abc12345_web1",
)
assert ok is True
# docker exec -i <override-container> ...
assert captured[0][3] == "decnet_t_abc12345_web1"
def test_resolve_topology_container_prefers_ssh_service() -> None:
name = planter.resolve_topology_container(
"abc123def456", "web1", services=["ssh", "http"],
)
assert name == "web1-ssh"
def test_resolve_topology_container_falls_back_to_base() -> None:
name = planter.resolve_topology_container(
"abc123def456789", "router", services=["dns"],
)
# decnet_t_<id8>_<decky_name>; matches topology.compose._container_name.
assert name == "decnet_t_abc123de_router"
@pytest.mark.asyncio
async def test_seed_baseline_topology_iterates_deckies_and_resolves_container(
repo: SQLiteRepository, monkeypatch
) -> None:
"""Topology seed: ssh-bearing decky → ``<name>-ssh``; bare decky → base."""
monkeypatch.setenv("DECNET_CANARY_BASELINE", "env_file")
topo_id = "abcdef0123456789"
async def _fake_hydrate(_repo, _topo_id):
assert _topo_id == topo_id
return {
"topology": {"id": topo_id},
"lans": [],
"deckies": [
{
"uuid": "u1", "name": "web1",
"decky_config": {"name": "web1"},
"services": ["ssh", "http"],
},
{
"uuid": "u2", "name": "router",
"decky_config": {"name": "router"},
"services": ["dns"],
},
],
"edges": [],
}
import decnet.canary.planter as _planter_mod
monkeypatch.setattr(
"decnet.topology.persistence.hydrate", _fake_hydrate,
)
patcher, captured, _stdin = _patch_subprocess(rc=0)
with patcher:
rows = await _planter_mod.seed_baseline_topology(repo, topo_id)
# One token per decky × one generator in the baseline.
assert {r["decky_name"] for r in rows} == {"web1", "router"}
# docker exec -i <container> ... — captured argv index 3 is container.
containers = sorted(argv[3] for argv in captured)
assert containers == ["decnet_t_abcdef01_router", "web1-ssh"]
@pytest.mark.asyncio
async def test_seed_baseline_topology_returns_empty_for_missing_topology(
repo: SQLiteRepository, monkeypatch
) -> None:
async def _none_hydrate(_repo, _topo_id):
return None
monkeypatch.setattr(
"decnet.topology.persistence.hydrate", _none_hydrate,
)
rows = await planter.seed_baseline_topology(repo, "missing-id")
assert rows == []
@pytest.mark.asyncio
async def test_seed_baseline_marks_failed_when_docker_errors(
repo: SQLiteRepository, monkeypatch