# SPDX-License-Identifier: AGPL-3.0-or-later """Plant / revoke canary artifacts inside running decky containers. Single entry point per operation: * :func:`plant` writes a :class:`CanaryArtifact` into one decky's filesystem via ``docker exec`` (mirroring the SSH driver's ``_run_file`` pattern), backdates the mtime, sets the requested mode, and publishes ``canary.{token_id}.placed`` on the bus. * :func:`revoke` unlinks the file (best-effort) and publishes ``canary.{token_id}.revoked``. * :func:`seed_baseline` is the deploy-hook helper: synthesises the configured baseline set for one decky, persists rows, plants each. Failures are logged but do **not** abort the deploy (the deployer hook calls this best-effort). We don't reuse :class:`SSHDriver` directly because the orchestrator driver is tied to its action types (``FileAction`` carries str content; canary content is bytes). The planter takes the same shape but speaks bytes-via-base64 over the wire. """ from __future__ import annotations import os from datetime import datetime, timedelta, timezone from secrets import token_urlsafe from typing import Any, Iterable, Optional from decnet.bus import topics from decnet.bus.base import BaseBus from decnet.bus.factory import get_bus from decnet.canary.base import CanaryArtifact, CanaryContext from decnet.canary.factory import get_generator from decnet.canary.paths import default_path_for from decnet.decky_io import ( delete_file_from_container, resolve_topology_container, write_file_to_container, ) from decnet.logging import get_logger from decnet.web.db.repository import BaseRepository log = get_logger("canary.planter") # Container suffix — matches the orchestrator SSH driver's convention # (``-ssh``). Canary placement always happens through the # ssh container because every decky has one and it carries the most # realistic filesystem layout. _SSH_CONTAINER_SUFFIX = "-ssh" def _container_for(decky_name: str) -> str: return f"{decky_name}{_SSH_CONTAINER_SUFFIX}" # resolve_topology_container is re-exported from decky_io for back-compat # with callers (tests, deploy hook) that imported it from this module # before the decky_io extraction. __all__ = [ "plant", "revoke", "resolve_topology_container", "seed_baseline", "seed_baseline_topology", ] async def _publish( bus: Optional[BaseBus], topic: str, payload: dict[str, Any], ) -> None: """Best-effort publish — never raises. When ``bus`` is None we resolve via :func:`get_bus`; either way bus-side failures are logged and swallowed (delivery is at-most-once by contract; the DB row is source of truth). """ try: owns_bus = bus is None target = bus if bus is not None else get_bus() if owns_bus: await target.connect() await target.publish(topic, payload) if owns_bus: await target.close() except Exception as e: # noqa: BLE001 log.warning("canary bus publish failed topic=%s err=%s", topic, e) async def plant( decky_name: str, artifact: CanaryArtifact, *, token_uuid: str, repo: Optional[BaseRepository] = None, publish: bool = True, bus: Optional[BaseBus] = None, container: Optional[str] = None, ) -> tuple[bool, Optional[str]]: """Write *artifact* into the decky's ssh container. Returns ``(success, error_or_none)``. When ``repo`` is provided the token row's state is updated to ``planted`` / ``failed`` accordingly. When ``publish`` is True a ``canary..placed`` event is published on the bus on success. The function never raises on docker errors — callers (the API, the deploy hook) treat the result as data. """ if not artifact.path: err = "planter requires a non-empty artifact.path" log.warning("canary.plant skipped: %s decky=%s token=%s", err, decky_name, token_uuid) if repo is not None: await repo.update_canary_token_state(token_uuid, "failed", err) return False, err target_container = container or _container_for(decky_name) mtime = datetime.now(timezone.utc) + timedelta(seconds=artifact.mtime_offset) success, error = await write_file_to_container( target_container, artifact.path, artifact.content, mode=artifact.mode, mtime=mtime, ) if repo is not None: if success: await repo.update_canary_token_state(token_uuid, "planted", None) else: await repo.update_canary_token_state(token_uuid, "failed", error) if success and publish: await _publish(bus, topics.canary(token_uuid, topics.CANARY_PLACED), { "token_id": token_uuid, "decky_name": decky_name, "placement_path": artifact.path, "instrumenter": artifact.instrumenter, "generator": artifact.generator, }) if not success: log.warning( "canary.plant failed decky=%s token=%s container=%s err=%r", decky_name, token_uuid, target_container, error, ) return success, error async def revoke( decky_name: str, placement_path: str, *, token_uuid: str, repo: Optional[BaseRepository] = None, publish: bool = True, bus: Optional[BaseBus] = None, container: Optional[str] = None, ) -> tuple[bool, Optional[str]]: """Best-effort unlink + state transition + bus publish. Returns ``(success, error_or_none)``. ``success`` is True when the file is gone after the call (whether we deleted it or it was already missing); only docker / container-down errors return False. """ target_container = container or _container_for(decky_name) success, error = await delete_file_from_container( target_container, placement_path, ) if repo is not None: await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None) if publish: await _publish(bus, topics.canary(token_uuid, topics.CANARY_REVOKED), { "token_id": token_uuid, "decky_name": decky_name, "placement_path": placement_path, }) return success, error def _baseline_set() -> Iterable[str]: """Return the configured baseline generator names. Honors ``DECNET_CANARY_BASELINE`` (comma-separated). Default is a sensible mix that exercises every callback-bearing generator plus a passive aws_creds drop for realism. """ raw = os.environ.get( "DECNET_CANARY_BASELINE", "git_config,env_file,honeydoc,aws_creds", ) return [n.strip() for n in raw.split(",") if n.strip()] def _ctx_for(slug: str) -> CanaryContext: """Build a :class:`CanaryContext` from the canary worker config.""" base = os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088") zone = os.environ.get("DECNET_CANARY_DNS_ZONE", "") return CanaryContext(callback_token=slug, http_base=base, dns_zone=zone) async def seed_baseline( decky_name: str, repo: BaseRepository, *, persona: str = "linux", created_by: str = "system", bus: Optional[BaseBus] = None, container: Optional[str] = None, ) -> list[dict[str, Any]]: """Plant the configured baseline canary set on one decky. Best-effort: any individual placement that fails is logged and the row is left in ``state=failed``; the deployer hook treats the return value as informational, not authoritative. Returns the list of token rows created (whether their planting ultimately succeeded or not), so the caller can surface them in the deploy report. """ out: list[dict[str, Any]] = [] for gen_name in _baseline_set(): try: generator = get_generator(gen_name) except ValueError: log.warning("canary.seed_baseline: unknown generator %r — skipping", gen_name) continue slug = token_urlsafe(16) ctx = _ctx_for(slug) artifact = generator.generate(ctx) artifact.path = default_path_for(gen_name, persona) kind = "aws_passive" if gen_name == "aws_creds" else "http" # Persist first so the planter has a row to update; that way a # crash mid-plant leaves a recoverable failed-state row. from uuid import uuid4 token_uuid = str(uuid4()) await repo.create_canary_token({ "uuid": token_uuid, "kind": kind, "decky_name": decky_name, "blob_uuid": None, "instrumenter": None, "generator": gen_name, "placement_path": artifact.path, "callback_token": slug, "secret_seed": slug, "created_by": created_by, "state": "planted", # optimistic — plant() flips to failed on error }) await plant( decky_name, artifact, token_uuid=token_uuid, repo=repo, publish=True, bus=bus, container=container, ) out.append({ "token_uuid": token_uuid, "generator": gen_name, "kind": kind, "callback_token": slug, "placement_path": artifact.path, }) return out async def seed_baseline_topology( repo: BaseRepository, topology_id: str, *, created_by: str = "system", bus: Optional[BaseBus] = None, ) -> list[dict[str, Any]]: """Plant baseline canaries on every decky in a MazeNET topology. Mirrors :func:`seed_baseline` for the topology path. Container name resolution uses :func:`resolve_topology_container` since topology deckies may not have an ssh service — in that case we target the base container instead. Best-effort: failures on any single decky are logged inside :func:`plant`; the deploy hook treats the return value as informational. Returns a flat list of per-token dicts (with an added ``decky_name`` key) across all deckies. """ from decnet.topology.persistence import hydrate hydrated = await hydrate(repo, topology_id) if hydrated is None: log.warning( "canary.seed_baseline_topology: topology %s not found", topology_id, ) return [] out: list[dict[str, Any]] = [] for decky in hydrated["deckies"]: cfg = decky.get("decky_config") or {} decky_name = cfg.get("name") or decky.get("name") if not decky_name: continue services = decky.get("services") or [] container = resolve_topology_container(topology_id, decky_name, services) # MazeNET deckies don't carry an OS persona today; default to # linux (every base image we ship is Linux). rows = await seed_baseline( decky_name, repo, persona="linux", created_by=created_by, bus=bus, container=container, ) for r in rows: r["decky_name"] = decky_name out.append(r) return out