feat(canary): seed baseline canaries on MazeNET deckies
Topology deploys now plant the configured canary baseline set on every decky in the topology, mirroring the fleet-deploy hook. Containers are resolved via resolve_topology_container — <decky>-ssh when the decky exposes an ssh service, else the topology base container decnet_t_<id8>_<decky>. The planter's plant/revoke/seed_baseline grow an optional container= kwarg; default preserves the fleet <name>-ssh resolution.
This commit is contained in:
@@ -52,6 +52,21 @@ def _container_for(decky_name: str) -> str:
|
||||
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||
|
||||
|
||||
def resolve_topology_container(
|
||||
topology_id: str, decky_name: str, services: Iterable[str],
|
||||
) -> str:
|
||||
"""Container name to docker-exec into for a MazeNET decky.
|
||||
|
||||
The ssh service container (when present) wins because it carries the
|
||||
most realistic filesystem layout — same rationale as the fleet path.
|
||||
Otherwise we target the base container, whose name is set by
|
||||
:func:`decnet.topology.compose._container_name`.
|
||||
"""
|
||||
if "ssh" in set(services):
|
||||
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||
return f"decnet_t_{topology_id[:8]}_{decky_name}"
|
||||
|
||||
|
||||
def _dirname(path: str) -> str:
|
||||
idx = path.rfind("/")
|
||||
if idx <= 0:
|
||||
@@ -139,6 +154,7 @@ async def plant(
|
||||
repo: Optional[BaseRepository] = None,
|
||||
publish: bool = True,
|
||||
bus: Optional[BaseBus] = None,
|
||||
container: Optional[str] = None,
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""Write *artifact* into the decky's ssh container.
|
||||
|
||||
@@ -158,9 +174,10 @@ async def plant(
|
||||
return False, err
|
||||
|
||||
sh_cmd, stdin_payload = _build_plant_command(artifact)
|
||||
target_container = container or _container_for(decky_name)
|
||||
# ``-i`` keeps stdin attached so base64 -d inside the container can
|
||||
# consume the encoded payload streamed from the host.
|
||||
argv = [_DOCKER, "exec", "-i", _container_for(decky_name), "sh", "-c", sh_cmd]
|
||||
argv = [_DOCKER, "exec", "-i", target_container, "sh", "-c", sh_cmd]
|
||||
rc, _stdout, stderr = await _run(argv, stdin_bytes=stdin_payload)
|
||||
success = rc == 0
|
||||
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
|
||||
@@ -196,6 +213,7 @@ async def revoke(
|
||||
repo: Optional[BaseRepository] = None,
|
||||
publish: bool = True,
|
||||
bus: Optional[BaseBus] = None,
|
||||
container: Optional[str] = None,
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""Best-effort unlink + state transition + bus publish.
|
||||
|
||||
@@ -204,7 +222,8 @@ async def revoke(
|
||||
already missing); only docker / container-down errors return False.
|
||||
"""
|
||||
sh_cmd = f"rm -f {shlex.quote(placement_path)}"
|
||||
argv = [_DOCKER, "exec", _container_for(decky_name), "sh", "-c", sh_cmd]
|
||||
target_container = container or _container_for(decky_name)
|
||||
argv = [_DOCKER, "exec", target_container, "sh", "-c", sh_cmd]
|
||||
rc, _stdout, stderr = await _run(argv)
|
||||
success = rc == 0
|
||||
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
|
||||
@@ -250,6 +269,7 @@ async def seed_baseline(
|
||||
persona: str = "linux",
|
||||
created_by: str = "system",
|
||||
bus: Optional[BaseBus] = None,
|
||||
container: Optional[str] = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Plant the configured baseline canary set on one decky.
|
||||
|
||||
@@ -293,9 +313,59 @@ async def seed_baseline(
|
||||
await plant(
|
||||
decky_name, artifact,
|
||||
token_uuid=token_uuid, repo=repo, publish=True, bus=bus,
|
||||
container=container,
|
||||
)
|
||||
out.append({
|
||||
"token_uuid": token_uuid, "generator": gen_name, "kind": kind,
|
||||
"callback_token": slug, "placement_path": artifact.path,
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
async def seed_baseline_topology(
|
||||
repo: BaseRepository,
|
||||
topology_id: str,
|
||||
*,
|
||||
created_by: str = "system",
|
||||
bus: Optional[BaseBus] = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Plant baseline canaries on every decky in a MazeNET topology.
|
||||
|
||||
Mirrors :func:`seed_baseline` for the topology path. Container name
|
||||
resolution uses :func:`resolve_topology_container` since topology
|
||||
deckies may not have an ssh service — in that case we target the
|
||||
base container instead.
|
||||
|
||||
Best-effort: failures on any single decky are logged inside
|
||||
:func:`plant`; the deploy hook treats the return value as
|
||||
informational. Returns a flat list of per-token dicts (with an added
|
||||
``decky_name`` key) across all deckies.
|
||||
"""
|
||||
from decnet.topology.persistence import hydrate
|
||||
|
||||
hydrated = await hydrate(repo, topology_id)
|
||||
if hydrated is None:
|
||||
log.warning(
|
||||
"canary.seed_baseline_topology: topology %s not found", topology_id,
|
||||
)
|
||||
return []
|
||||
|
||||
out: list[dict[str, Any]] = []
|
||||
for decky in hydrated["deckies"]:
|
||||
cfg = decky.get("decky_config") or {}
|
||||
decky_name = cfg.get("name") or decky.get("name")
|
||||
if not decky_name:
|
||||
continue
|
||||
services = decky.get("services") or []
|
||||
container = resolve_topology_container(topology_id, decky_name, services)
|
||||
# MazeNET deckies don't carry an OS persona today; default to
|
||||
# linux (every base image we ship is Linux).
|
||||
rows = await seed_baseline(
|
||||
decky_name, repo,
|
||||
persona="linux", created_by=created_by, bus=bus,
|
||||
container=container,
|
||||
)
|
||||
for r in rows:
|
||||
r["decky_name"] = decky_name
|
||||
out.append(r)
|
||||
return out
|
||||
|
||||
@@ -954,6 +954,18 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N
|
||||
await transition_status(repo, topology_id, TopologyStatus.ACTIVE)
|
||||
log.info("topology %s deployed n_lans=%d", topology_id, len(lans))
|
||||
|
||||
# Best-effort canary baseline seed across every decky in the
|
||||
# topology. Same resilience contract as the fleet path: failures
|
||||
# surface as state=failed token rows, never abort the deploy.
|
||||
try:
|
||||
from decnet.canary import planter as _canary_planter
|
||||
await _canary_planter.seed_baseline_topology(repo, topology_id)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning(
|
||||
"canary baseline seed failed (best-effort) topology=%s err=%s",
|
||||
topology_id, exc,
|
||||
)
|
||||
|
||||
|
||||
@_traced("engine.teardown_topology")
|
||||
async def teardown_topology(repo, topology_id: str) -> None:
|
||||
|
||||
@@ -233,6 +233,98 @@ async def test_seed_baseline_skips_unknown_generator(repo: SQLiteRepository, mon
|
||||
assert {r["generator"] for r in rows} == {"env_file"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_plant_honours_explicit_container_override(repo: SQLiteRepository) -> None:
|
||||
"""``container=`` lets MazeNET callers target a non-``<name>-ssh`` container."""
|
||||
await repo.create_canary_token({
|
||||
"uuid": "tok-c", "kind": "http", "decky_name": "web1",
|
||||
"generator": "env_file", "placement_path": "/x",
|
||||
"callback_token": "slugC", "secret_seed": "s", "created_by": "u1",
|
||||
})
|
||||
art = CanaryArtifact(path="/x", content=b"y", generator="env_file")
|
||||
patcher, captured, _stdin = _patch_subprocess(rc=0)
|
||||
with patcher:
|
||||
ok, _err = await planter.plant(
|
||||
"web1", art, token_uuid="tok-c", repo=repo,
|
||||
container="decnet_t_abc12345_web1",
|
||||
)
|
||||
assert ok is True
|
||||
# docker exec -i <override-container> ...
|
||||
assert captured[0][3] == "decnet_t_abc12345_web1"
|
||||
|
||||
|
||||
def test_resolve_topology_container_prefers_ssh_service() -> None:
|
||||
name = planter.resolve_topology_container(
|
||||
"abc123def456", "web1", services=["ssh", "http"],
|
||||
)
|
||||
assert name == "web1-ssh"
|
||||
|
||||
|
||||
def test_resolve_topology_container_falls_back_to_base() -> None:
|
||||
name = planter.resolve_topology_container(
|
||||
"abc123def456789", "router", services=["dns"],
|
||||
)
|
||||
# decnet_t_<id8>_<decky_name>; matches topology.compose._container_name.
|
||||
assert name == "decnet_t_abc123de_router"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_seed_baseline_topology_iterates_deckies_and_resolves_container(
|
||||
repo: SQLiteRepository, monkeypatch
|
||||
) -> None:
|
||||
"""Topology seed: ssh-bearing decky → ``<name>-ssh``; bare decky → base."""
|
||||
monkeypatch.setenv("DECNET_CANARY_BASELINE", "env_file")
|
||||
topo_id = "abcdef0123456789"
|
||||
|
||||
async def _fake_hydrate(_repo, _topo_id):
|
||||
assert _topo_id == topo_id
|
||||
return {
|
||||
"topology": {"id": topo_id},
|
||||
"lans": [],
|
||||
"deckies": [
|
||||
{
|
||||
"uuid": "u1", "name": "web1",
|
||||
"decky_config": {"name": "web1"},
|
||||
"services": ["ssh", "http"],
|
||||
},
|
||||
{
|
||||
"uuid": "u2", "name": "router",
|
||||
"decky_config": {"name": "router"},
|
||||
"services": ["dns"],
|
||||
},
|
||||
],
|
||||
"edges": [],
|
||||
}
|
||||
|
||||
import decnet.canary.planter as _planter_mod
|
||||
monkeypatch.setattr(
|
||||
"decnet.topology.persistence.hydrate", _fake_hydrate,
|
||||
)
|
||||
|
||||
patcher, captured, _stdin = _patch_subprocess(rc=0)
|
||||
with patcher:
|
||||
rows = await _planter_mod.seed_baseline_topology(repo, topo_id)
|
||||
|
||||
# One token per decky × one generator in the baseline.
|
||||
assert {r["decky_name"] for r in rows} == {"web1", "router"}
|
||||
# docker exec -i <container> ... — captured argv index 3 is container.
|
||||
containers = sorted(argv[3] for argv in captured)
|
||||
assert containers == ["decnet_t_abcdef01_router", "web1-ssh"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_seed_baseline_topology_returns_empty_for_missing_topology(
|
||||
repo: SQLiteRepository, monkeypatch
|
||||
) -> None:
|
||||
async def _none_hydrate(_repo, _topo_id):
|
||||
return None
|
||||
monkeypatch.setattr(
|
||||
"decnet.topology.persistence.hydrate", _none_hydrate,
|
||||
)
|
||||
rows = await planter.seed_baseline_topology(repo, "missing-id")
|
||||
assert rows == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_seed_baseline_marks_failed_when_docker_errors(
|
||||
repo: SQLiteRepository, monkeypatch
|
||||
|
||||
Reference in New Issue
Block a user