feat(canary): allow custom canaries on MazeNET deckies via API

POST /api/v1/canary/tokens grows an optional topology_id field.  When
present, the server hydrates the topology, validates the named decky is
in it, and resolves the docker container via
planter.resolve_topology_container — <name>-ssh if the decky exposes ssh,
else the topology base container.  Absent ⇒ fleet semantics, unchanged.

The token row gets a nullable topology_id column (no migration helper
per pre-v1 policy).  GET /api/v1/canary/tokens accepts ?topology_id= as
a filter.  DELETE re-resolves the container at revoke time so a
redeployed topology is still reachable.

422 when the named decky isn't in the topology; 404 when the topology
itself doesn't exist.
This commit is contained in:
2026-04-28 22:34:45 -04:00
parent 5802de1f86
commit 3fe999d706
5 changed files with 296 additions and 2 deletions

View File

@@ -100,6 +100,12 @@ class CanaryToken(SQLModel, table=True):
uuid: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
kind: str = Field(index=True) # CanaryKind literal at the API layer
decky_name: str = Field(index=True) # FleetDecky.name; no FK (composite PK)
# When NULL, the token is on a fleet decky (decky_name resolves to
# ``<name>-ssh``). When set, it points at a MazeNET topology — the
# planter resolves the container via :func:`resolve_topology_container`.
# No FK: topologies are mutable and we don't want a row to vanish on
# cascade; the row is the historical record of placement.
topology_id: Optional[str] = Field(default=None, index=True)
blob_uuid: Optional[str] = Field(
default=None, foreign_key="canary_blobs.uuid", index=True,
)
@@ -188,6 +194,10 @@ class CanaryTokenCreateRequest(BaseModel):
router so the 400 carries a clear detail message.
"""
decky_name: str = PydanticField(..., min_length=1)
# When set, ``decky_name`` is interpreted as a MazeNET topology decky
# name; the server validates membership and resolves the container
# accordingly. Absent ⇒ fleet semantics (today's behavior).
topology_id: Optional[str] = None
kind: CanaryKind
placement_path: str = PydanticField(..., min_length=1)
blob_uuid: Optional[str] = None
@@ -202,6 +212,7 @@ class CanaryTokenResponse(BaseModel):
uuid: str
kind: CanaryKind
decky_name: str
topology_id: Optional[str] = None
blob_uuid: Optional[str]
instrumenter: Optional[str]
generator: Optional[str]

View File

@@ -936,6 +936,7 @@ class BaseRepository(ABC):
decky_name: Optional[str] = None,
state: Optional[str] = None,
kind: Optional[str] = None,
topology_id: Optional[str] = None,
) -> list[dict[str, Any]]:
raise NotImplementedError

View File

@@ -122,6 +122,7 @@ class CanaryMixin:
decky_name: Optional[str] = None,
state: Optional[str] = None,
kind: Optional[str] = None,
topology_id: Optional[str] = None,
) -> list[dict[str, Any]]:
async with self._session() as session:
stmt = select(CanaryToken)
@@ -131,6 +132,8 @@ class CanaryMixin:
stmt = stmt.where(CanaryToken.state == state)
if kind is not None:
stmt = stmt.where(CanaryToken.kind == kind)
if topology_id is not None:
stmt = stmt.where(CanaryToken.topology_id == topology_id)
stmt = stmt.order_by(desc(CanaryToken.placed_at))
result = await session.execute(stmt)
return [r.model_dump(mode="json") for r in result.scalars().all()]

View File

@@ -61,6 +61,33 @@ def _row_to_response(row: dict[str, Any]) -> CanaryTokenResponse:
return CanaryTokenResponse(**row)
async def _resolve_topology_target(
topology_id: str, decky_name: str,
) -> str:
"""Validate (topology_id, decky_name) and return the docker container.
404 if the topology doesn't exist; 422 if the named decky isn't in it.
Hoisted into ``decky_io/resolve.py`` in workstream 2 so the file-drop
endpoint can share it; for now it's local to the canary router.
"""
from decnet.topology.persistence import hydrate
hydrated = await hydrate(repo, topology_id)
if hydrated is None:
raise HTTPException(status_code=404, detail="topology not found")
for decky in hydrated["deckies"]:
cfg = decky.get("decky_config") or {}
name = cfg.get("name") or decky.get("name")
if name == decky_name:
services = decky.get("services") or []
return planter.resolve_topology_container(
topology_id, decky_name, services,
)
raise HTTPException(
status_code=422,
detail=f"decky {decky_name!r} is not in topology {topology_id!r}",
)
def _trigger_row_to_response(row: dict[str, Any]) -> CanaryTriggerResponse:
# Decode raw_headers JSON for the response shape.
headers = row.get("raw_headers") or "{}"
@@ -105,6 +132,14 @@ async def api_create_token(
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e)) from e
# Resolve the docker container before any expensive work — surfacing
# 404/422 here keeps a typo from minting a half-baked token row.
container: str | None = None
if req.topology_id:
container = await _resolve_topology_target(
req.topology_id, req.decky_name,
)
slug = token_urlsafe(16)
ctx = CanaryContext(
callback_token=slug, http_base=_http_base(), dns_zone=_dns_zone(),
@@ -145,6 +180,7 @@ async def api_create_token(
"uuid": token_uuid,
"kind": kind,
"decky_name": req.decky_name,
"topology_id": req.topology_id,
"blob_uuid": req.blob_uuid,
"instrumenter": instrumenter_name,
"generator": req.generator,
@@ -154,7 +190,10 @@ async def api_create_token(
"created_by": admin.get("uuid", "unknown"),
"state": "planted",
})
await planter.plant(req.decky_name, artifact, token_uuid=token_uuid, repo=repo)
await planter.plant(
req.decky_name, artifact,
token_uuid=token_uuid, repo=repo, container=container,
)
row = await repo.get_canary_token(token_uuid)
return _row_to_response(row)
@@ -173,10 +212,12 @@ async def api_list_tokens(
decky_name: str | None = Query(default=None),
state: str | None = Query(default=None),
kind: str | None = Query(default=None),
topology_id: str | None = Query(default=None),
viewer: dict = Depends(require_viewer),
) -> CanaryTokensResponse:
rows = await repo.list_canary_tokens(
decky_name=decky_name, state=state, kind=kind,
topology_id=topology_id,
)
return CanaryTokensResponse(
tokens=[_row_to_response(r) for r in rows],
@@ -311,8 +352,21 @@ async def api_revoke_token(
row = await repo.get_canary_token(uuid)
if row is None:
raise HTTPException(status_code=404, detail="token not found")
# Re-resolve the container at revoke time: the topology may have
# been redeployed since placement. If it's gone entirely we fall
# through to the planter's fleet default — the call will fail
# best-effort and the row still flips to revoked.
container: str | None = None
topology_id = row.get("topology_id")
if topology_id:
try:
container = await _resolve_topology_target(
topology_id, row["decky_name"],
)
except HTTPException:
container = None
await planter.revoke(
row["decky_name"], row["placement_path"],
token_uuid=uuid, repo=repo,
token_uuid=uuid, repo=repo, container=container,
)
return MessageResponse(message="ok")