From 5802de1f861adcb2717cffff1aaeef29c0e8fefd Mon Sep 17 00:00:00 2001 From: anti Date: Tue, 28 Apr 2026 22:30:11 -0400 Subject: [PATCH] feat(canary): seed baseline canaries on MazeNET deckies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Topology deploys now plant the configured canary baseline set on every decky in the topology, mirroring the fleet-deploy hook. Containers are resolved via resolve_topology_container — -ssh when the decky exposes an ssh service, else the topology base container decnet_t__. The planter's plant/revoke/seed_baseline grow an optional container= kwarg; default preserves the fleet -ssh resolution. --- decnet/canary/planter.py | 74 ++++++++++++++++++++++++++++- decnet/engine/deployer.py | 12 +++++ tests/canary/test_planter.py | 92 ++++++++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+), 2 deletions(-) diff --git a/decnet/canary/planter.py b/decnet/canary/planter.py index 6beae78b..c15fb859 100644 --- a/decnet/canary/planter.py +++ b/decnet/canary/planter.py @@ -52,6 +52,21 @@ def _container_for(decky_name: str) -> str: return f"{decky_name}{_SSH_CONTAINER_SUFFIX}" +def resolve_topology_container( + topology_id: str, decky_name: str, services: Iterable[str], +) -> str: + """Container name to docker-exec into for a MazeNET decky. + + The ssh service container (when present) wins because it carries the + most realistic filesystem layout — same rationale as the fleet path. + Otherwise we target the base container, whose name is set by + :func:`decnet.topology.compose._container_name`. + """ + if "ssh" in set(services): + return f"{decky_name}{_SSH_CONTAINER_SUFFIX}" + return f"decnet_t_{topology_id[:8]}_{decky_name}" + + def _dirname(path: str) -> str: idx = path.rfind("/") if idx <= 0: @@ -139,6 +154,7 @@ async def plant( repo: Optional[BaseRepository] = None, publish: bool = True, bus: Optional[BaseBus] = None, + container: Optional[str] = None, ) -> tuple[bool, Optional[str]]: """Write *artifact* into the decky's ssh container. @@ -158,9 +174,10 @@ async def plant( return False, err sh_cmd, stdin_payload = _build_plant_command(artifact) + target_container = container or _container_for(decky_name) # ``-i`` keeps stdin attached so base64 -d inside the container can # consume the encoded payload streamed from the host. - argv = [_DOCKER, "exec", "-i", _container_for(decky_name), "sh", "-c", sh_cmd] + argv = [_DOCKER, "exec", "-i", target_container, "sh", "-c", sh_cmd] rc, _stdout, stderr = await _run(argv, stdin_bytes=stdin_payload) success = rc == 0 error = None if success else (stderr.strip()[:256] or f"rc={rc}") @@ -196,6 +213,7 @@ async def revoke( repo: Optional[BaseRepository] = None, publish: bool = True, bus: Optional[BaseBus] = None, + container: Optional[str] = None, ) -> tuple[bool, Optional[str]]: """Best-effort unlink + state transition + bus publish. @@ -204,7 +222,8 @@ async def revoke( already missing); only docker / container-down errors return False. """ sh_cmd = f"rm -f {shlex.quote(placement_path)}" - argv = [_DOCKER, "exec", _container_for(decky_name), "sh", "-c", sh_cmd] + target_container = container or _container_for(decky_name) + argv = [_DOCKER, "exec", target_container, "sh", "-c", sh_cmd] rc, _stdout, stderr = await _run(argv) success = rc == 0 error = None if success else (stderr.strip()[:256] or f"rc={rc}") @@ -250,6 +269,7 @@ async def seed_baseline( persona: str = "linux", created_by: str = "system", bus: Optional[BaseBus] = None, + container: Optional[str] = None, ) -> list[dict[str, Any]]: """Plant the configured baseline canary set on one decky. @@ -293,9 +313,59 @@ async def seed_baseline( await plant( decky_name, artifact, token_uuid=token_uuid, repo=repo, publish=True, bus=bus, + container=container, ) out.append({ "token_uuid": token_uuid, "generator": gen_name, "kind": kind, "callback_token": slug, "placement_path": artifact.path, }) return out + + +async def seed_baseline_topology( + repo: BaseRepository, + topology_id: str, + *, + created_by: str = "system", + bus: Optional[BaseBus] = None, +) -> list[dict[str, Any]]: + """Plant baseline canaries on every decky in a MazeNET topology. + + Mirrors :func:`seed_baseline` for the topology path. Container name + resolution uses :func:`resolve_topology_container` since topology + deckies may not have an ssh service — in that case we target the + base container instead. + + Best-effort: failures on any single decky are logged inside + :func:`plant`; the deploy hook treats the return value as + informational. Returns a flat list of per-token dicts (with an added + ``decky_name`` key) across all deckies. + """ + from decnet.topology.persistence import hydrate + + hydrated = await hydrate(repo, topology_id) + if hydrated is None: + log.warning( + "canary.seed_baseline_topology: topology %s not found", topology_id, + ) + return [] + + out: list[dict[str, Any]] = [] + for decky in hydrated["deckies"]: + cfg = decky.get("decky_config") or {} + decky_name = cfg.get("name") or decky.get("name") + if not decky_name: + continue + services = decky.get("services") or [] + container = resolve_topology_container(topology_id, decky_name, services) + # MazeNET deckies don't carry an OS persona today; default to + # linux (every base image we ship is Linux). + rows = await seed_baseline( + decky_name, repo, + persona="linux", created_by=created_by, bus=bus, + container=container, + ) + for r in rows: + r["decky_name"] = decky_name + out.append(r) + return out diff --git a/decnet/engine/deployer.py b/decnet/engine/deployer.py index db4ad21e..2ab3b9ed 100644 --- a/decnet/engine/deployer.py +++ b/decnet/engine/deployer.py @@ -954,6 +954,18 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N await transition_status(repo, topology_id, TopologyStatus.ACTIVE) log.info("topology %s deployed n_lans=%d", topology_id, len(lans)) + # Best-effort canary baseline seed across every decky in the + # topology. Same resilience contract as the fleet path: failures + # surface as state=failed token rows, never abort the deploy. + try: + from decnet.canary import planter as _canary_planter + await _canary_planter.seed_baseline_topology(repo, topology_id) + except Exception as exc: # noqa: BLE001 + log.warning( + "canary baseline seed failed (best-effort) topology=%s err=%s", + topology_id, exc, + ) + @_traced("engine.teardown_topology") async def teardown_topology(repo, topology_id: str) -> None: diff --git a/tests/canary/test_planter.py b/tests/canary/test_planter.py index 6ecff6a5..2011886a 100644 --- a/tests/canary/test_planter.py +++ b/tests/canary/test_planter.py @@ -233,6 +233,98 @@ async def test_seed_baseline_skips_unknown_generator(repo: SQLiteRepository, mon assert {r["generator"] for r in rows} == {"env_file"} +@pytest.mark.asyncio +async def test_plant_honours_explicit_container_override(repo: SQLiteRepository) -> None: + """``container=`` lets MazeNET callers target a non-``-ssh`` container.""" + await repo.create_canary_token({ + "uuid": "tok-c", "kind": "http", "decky_name": "web1", + "generator": "env_file", "placement_path": "/x", + "callback_token": "slugC", "secret_seed": "s", "created_by": "u1", + }) + art = CanaryArtifact(path="/x", content=b"y", generator="env_file") + patcher, captured, _stdin = _patch_subprocess(rc=0) + with patcher: + ok, _err = await planter.plant( + "web1", art, token_uuid="tok-c", repo=repo, + container="decnet_t_abc12345_web1", + ) + assert ok is True + # docker exec -i ... + assert captured[0][3] == "decnet_t_abc12345_web1" + + +def test_resolve_topology_container_prefers_ssh_service() -> None: + name = planter.resolve_topology_container( + "abc123def456", "web1", services=["ssh", "http"], + ) + assert name == "web1-ssh" + + +def test_resolve_topology_container_falls_back_to_base() -> None: + name = planter.resolve_topology_container( + "abc123def456789", "router", services=["dns"], + ) + # decnet_t__; matches topology.compose._container_name. + assert name == "decnet_t_abc123de_router" + + +@pytest.mark.asyncio +async def test_seed_baseline_topology_iterates_deckies_and_resolves_container( + repo: SQLiteRepository, monkeypatch +) -> None: + """Topology seed: ssh-bearing decky → ``-ssh``; bare decky → base.""" + monkeypatch.setenv("DECNET_CANARY_BASELINE", "env_file") + topo_id = "abcdef0123456789" + + async def _fake_hydrate(_repo, _topo_id): + assert _topo_id == topo_id + return { + "topology": {"id": topo_id}, + "lans": [], + "deckies": [ + { + "uuid": "u1", "name": "web1", + "decky_config": {"name": "web1"}, + "services": ["ssh", "http"], + }, + { + "uuid": "u2", "name": "router", + "decky_config": {"name": "router"}, + "services": ["dns"], + }, + ], + "edges": [], + } + + import decnet.canary.planter as _planter_mod + monkeypatch.setattr( + "decnet.topology.persistence.hydrate", _fake_hydrate, + ) + + patcher, captured, _stdin = _patch_subprocess(rc=0) + with patcher: + rows = await _planter_mod.seed_baseline_topology(repo, topo_id) + + # One token per decky × one generator in the baseline. + assert {r["decky_name"] for r in rows} == {"web1", "router"} + # docker exec -i ... — captured argv index 3 is container. + containers = sorted(argv[3] for argv in captured) + assert containers == ["decnet_t_abcdef01_router", "web1-ssh"] + + +@pytest.mark.asyncio +async def test_seed_baseline_topology_returns_empty_for_missing_topology( + repo: SQLiteRepository, monkeypatch +) -> None: + async def _none_hydrate(_repo, _topo_id): + return None + monkeypatch.setattr( + "decnet.topology.persistence.hydrate", _none_hydrate, + ) + rows = await planter.seed_baseline_topology(repo, "missing-id") + assert rows == [] + + @pytest.mark.asyncio async def test_seed_baseline_marks_failed_when_docker_errors( repo: SQLiteRepository, monkeypatch