feat(web): DELETE /deckies/{name} single-decky teardown endpoint

The Fleet module had no delete — neither UI nor API — though the engine
capability existed (engine.teardown(decky_id=...), exposed only via
`decnet teardown --id`). Wire it to HTTP.

DELETE /deckies/{name} (admin-gated, 204). Synchronous: a single decky's
compose stop/rm is quick, so it's awaited off-thread rather than the
202+lifecycle path deploy/mutate use for slow builds. The single-decky
teardown never touches the host macvlan interface, so it needs no extra
CAP_NET_ADMIN.

State consistency: engine.teardown removes the containers and the
fleet_deckies row but leaves the decky in decnet-state.json. Left as is, the
reconciler would see "present in JSON, absent from DB" and re-INSERT the row,
resurrecting the decky. So the handler prunes it from both decnet-state.json
and the DB deployment key after teardown; deleting the last decky clears
state entirely (DecnetConfig.deckies has min_length=1).

Route ordering: the dynamic DELETE /deckies/{decky_name} is registered AFTER
the fixed /deckies/* routes (Starlette matches in registration order), so it
no longer shadows DELETE /deckies/files (file-drop).

Tests cover 401/403/404/422, single-delete pruning, and last-decky clear.
This commit is contained in:
2026-06-16 12:07:10 -04:00
parent 8db593a544
commit 0c10869e26
3 changed files with 211 additions and 0 deletions

View File

@@ -15,6 +15,7 @@ from .fleet.api_get_deckies import router as get_deckies_router
from .fleet.api_mutate_decky import router as mutate_decky_router
from .fleet.api_mutate_interval import router as mutate_interval_router
from .fleet.api_deploy_deckies import router as deploy_deckies_router
from .fleet.api_teardown_decky import router as teardown_decky_router
from .fleet.api_lifecycle import router as lifecycle_router
from .stream.api_stream_events import router as stream_router
from .attackers.api_get_attackers import router as attackers_router
@@ -196,6 +197,12 @@ api_router.include_router(topology_router)
api_router.include_router(canary_router)
api_router.include_router(deckies_router)
# Single-decky teardown LAST among /deckies/* routes: its dynamic
# DELETE /deckies/{decky_name} would otherwise shadow the fixed paths
# (e.g. DELETE /deckies/files) since Starlette matches in registration
# order. Fixed paths must be declared before the variable path.
api_router.include_router(teardown_decky_router)
# External webhook subscriptions (SIEM/SOAR egress)
api_router.include_router(webhooks_router)

View File

@@ -0,0 +1,92 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""DELETE /deckies/{name} — operator-triggered single-decky teardown.
Exposes the engine's per-decky teardown (previously CLI-only via
``decnet teardown --id <name>``). Synchronous: the compose stop/rm of one
decky's services is quick, so we await it off-thread and return 204 rather
than the 202+lifecycle dance that deploy/mutate use for slow image builds.
The single-decky teardown path does NOT touch the host macvlan interface
(that's only the teardown-all branch), so it needs no CAP_NET_ADMIN beyond
what the web service already holds.
State consistency is the subtle part. ``engine.teardown`` removes the
containers and the decky's ``fleet_deckies`` row, but it does NOT prune the
decky from ``decnet-state.json``. If we left it there, the reconciler would
see "present in JSON, absent from DB" and re-INSERT the row — resurrecting
the decky in the UI. So we prune it from both decnet-state.json (load/save)
and the DB ``deployment`` key (the mutate plane's store) after teardown.
"""
import asyncio
import os
import anyio
from fastapi import APIRouter, Depends, HTTPException, Path as PathParam, Response, status
from decnet.config import clear_state, load_state, save_state
from decnet.logging import get_logger
from decnet.telemetry import traced as _traced
from decnet.web.db.models import LOCAL_HOST_SENTINEL
from decnet.web.dependencies import require_admin, repo
log = get_logger("api.teardown")
router = APIRouter()
@router.delete(
"/deckies/{decky_name}",
tags=["Fleet Management"],
status_code=status.HTTP_204_NO_CONTENT,
responses={
204: {"description": "Decky torn down and removed from the fleet"},
401: {"description": "Could not validate credentials"},
403: {"description": "Insufficient permissions"},
404: {"description": "No active deployment, or decky not found"},
422: {"description": "Path parameter validation error (decky_name must match ^[a-z0-9\\-]{1,64}$)"},
},
)
@_traced("api.teardown_decky")
async def api_teardown_decky(
decky_name: str = PathParam(..., pattern=r"^[a-z0-9\-]{1,64}$"),
admin: dict = Depends(require_admin),
) -> Response:
loaded = await asyncio.to_thread(load_state)
if loaded is None:
raise HTTPException(status_code=404, detail="No active deployment")
config, compose_path = loaded
decky = next((d for d in config.deckies if d.name == decky_name), None)
if decky is None:
raise HTTPException(status_code=404, detail=f"Decky {decky_name} not found")
if os.environ.get("DECNET_CONTRACT_TEST") != "true":
# Stops/removes the decky's containers, emits a retirement lifecycle
# event, and drops its fleet_deckies row. Sync engine call, off-thread
# so it doesn't block the event loop.
from decnet.engine import teardown as engine_teardown
await anyio.to_thread.run_sync(engine_teardown, decky_name)
else:
# Engine teardown is skipped under contract tests (no docker); still
# drop the fleet_deckies row so the inventory reflects the deletion.
await repo.delete_fleet_decky(
host_uuid=decky.host_uuid or LOCAL_HOST_SENTINEL, name=decky_name,
)
# Prune the decky from persisted state so the reconciler doesn't resurrect
# it (JSON-has / DB-doesn't -> reconciler re-INSERTs the fleet_deckies row).
# DecnetConfig.deckies has min_length=1, so an empty fleet clears state
# entirely rather than persisting an invalid config.
remaining = [d for d in config.deckies if d.name != decky_name]
if remaining:
config.deckies = remaining
await asyncio.to_thread(save_state, config, compose_path)
await repo.set_state(
"deployment",
{"config": config.model_dump(), "compose_path": str(compose_path)},
)
else:
await asyncio.to_thread(clear_state)
await repo.set_state("deployment", None)
log.info("decky torn down via API decky=%s remaining=%d", decky_name, len(remaining))
return Response(status_code=status.HTTP_204_NO_CONTENT)

View File

@@ -0,0 +1,112 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""DELETE /deckies/{name} — single-decky teardown.
The handler must:
1. Reject anonymous callers (401) and non-admins (403).
2. 404 when no active deployment exists, or the named decky isn't in it.
3. 422 when decky_name fails the path pattern.
4. On the happy path: drop the decky's fleet_deckies row AND prune it from
decnet-state.json (so the reconciler can't resurrect it), leaving the rest
of the fleet intact; deleting the last decky clears state entirely.
Under DECNET_CONTRACT_TEST the engine teardown (docker) is skipped; the
handler still removes the fleet_deckies row and prunes state, which is what
these tests assert.
"""
from __future__ import annotations
import httpx
import pytest
from decnet.config import load_state
from decnet.web.dependencies import repo
@pytest.fixture(autouse=True)
def contract_test_mode(monkeypatch):
monkeypatch.setenv("DECNET_CONTRACT_TEST", "true")
@pytest.mark.anyio
async def test_unauthenticated_returns_401(client: httpx.AsyncClient):
resp = await client.delete("/api/v1/deckies/test-decky-1")
assert resp.status_code == 401
@pytest.mark.anyio
async def test_viewer_forbidden_403(client, viewer_token, mock_state_file, mock_fleet_deckies):
resp = await client.delete(
"/api/v1/deckies/test-decky-1",
headers={"Authorization": f"Bearer {viewer_token}"},
)
assert resp.status_code == 403
@pytest.mark.anyio
async def test_no_deployment_returns_404(client, auth_token):
# patch_state_file (autouse) points STATE_FILE at an empty tmp path with no
# file written, so load_state() returns None.
resp = await client.delete(
"/api/v1/deckies/test-decky-1",
headers={"Authorization": f"Bearer {auth_token}"},
)
assert resp.status_code == 404
assert "deployment" in resp.json()["detail"].lower()
@pytest.mark.anyio
async def test_unknown_decky_returns_404(client, auth_token, mock_state_file):
resp = await client.delete(
"/api/v1/deckies/does-not-exist",
headers={"Authorization": f"Bearer {auth_token}"},
)
assert resp.status_code == 404
assert "does-not-exist" in resp.json()["detail"]
@pytest.mark.anyio
async def test_invalid_name_returns_422(client, auth_token, mock_state_file):
resp = await client.delete(
"/api/v1/deckies/Bad_Name", # uppercase + underscore violate the pattern
headers={"Authorization": f"Bearer {auth_token}"},
)
assert resp.status_code == 422
@pytest.mark.anyio
async def test_delete_removes_decky_and_prunes_state(
client, auth_token, mock_state_file, mock_fleet_deckies,
):
"""Deleting one decky drops its fleet_deckies row and prunes it from
decnet-state.json, leaving the rest of the fleet intact."""
resp = await client.delete(
"/api/v1/deckies/test-decky-1",
headers={"Authorization": f"Bearer {auth_token}"},
)
assert resp.status_code == 204, resp.text
# fleet_deckies row gone (the store the UI reads), sibling untouched.
names = {r["name"] for r in await repo.list_fleet_deckies()}
assert names == {"test-decky-2"}
# decnet-state.json pruned so the reconciler can't resurrect it.
loaded = load_state()
assert loaded is not None
assert {d.name for d in loaded[0].deckies} == {"test-decky-2"}
@pytest.mark.anyio
async def test_delete_last_decky_clears_state(
client, auth_token, mock_state_file, mock_fleet_deckies,
):
"""Tearing down the final decky clears state entirely rather than
persisting an invalid empty-fleet config (DecnetConfig.deckies min_length=1)."""
for name in ("test-decky-1", "test-decky-2"):
resp = await client.delete(
f"/api/v1/deckies/{name}",
headers={"Authorization": f"Bearer {auth_token}"},
)
assert resp.status_code == 204, resp.text
assert await repo.list_fleet_deckies() == []
assert load_state() is None