From 0c10869e2657a2723784f6c7236f723aea7da0f2 Mon Sep 17 00:00:00 2001 From: anti Date: Tue, 16 Jun 2026 12:07:10 -0400 Subject: [PATCH] feat(web): DELETE /deckies/{name} single-decky teardown endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Fleet module had no delete — neither UI nor API — though the engine capability existed (engine.teardown(decky_id=...), exposed only via `decnet teardown --id`). Wire it to HTTP. DELETE /deckies/{name} (admin-gated, 204). Synchronous: a single decky's compose stop/rm is quick, so it's awaited off-thread rather than the 202+lifecycle path deploy/mutate use for slow builds. The single-decky teardown never touches the host macvlan interface, so it needs no extra CAP_NET_ADMIN. State consistency: engine.teardown removes the containers and the fleet_deckies row but leaves the decky in decnet-state.json. Left as is, the reconciler would see "present in JSON, absent from DB" and re-INSERT the row, resurrecting the decky. So the handler prunes it from both decnet-state.json and the DB deployment key after teardown; deleting the last decky clears state entirely (DecnetConfig.deckies has min_length=1). Route ordering: the dynamic DELETE /deckies/{decky_name} is registered AFTER the fixed /deckies/* routes (Starlette matches in registration order), so it no longer shadows DELETE /deckies/files (file-drop). Tests cover 401/403/404/422, single-delete pruning, and last-decky clear. --- decnet/web/router/__init__.py | 7 ++ decnet/web/router/fleet/api_teardown_decky.py | 92 ++++++++++++++ tests/api/fleet/test_teardown_decky.py | 112 ++++++++++++++++++ 3 files changed, 211 insertions(+) create mode 100644 decnet/web/router/fleet/api_teardown_decky.py create mode 100644 tests/api/fleet/test_teardown_decky.py diff --git a/decnet/web/router/__init__.py b/decnet/web/router/__init__.py index ed58b687..2ae228b9 100644 --- a/decnet/web/router/__init__.py +++ b/decnet/web/router/__init__.py @@ -15,6 +15,7 @@ from .fleet.api_get_deckies import router as get_deckies_router from .fleet.api_mutate_decky import router as mutate_decky_router from .fleet.api_mutate_interval import router as mutate_interval_router from .fleet.api_deploy_deckies import router as deploy_deckies_router +from .fleet.api_teardown_decky import router as teardown_decky_router from .fleet.api_lifecycle import router as lifecycle_router from .stream.api_stream_events import router as stream_router from .attackers.api_get_attackers import router as attackers_router @@ -196,6 +197,12 @@ api_router.include_router(topology_router) api_router.include_router(canary_router) api_router.include_router(deckies_router) +# Single-decky teardown LAST among /deckies/* routes: its dynamic +# DELETE /deckies/{decky_name} would otherwise shadow the fixed paths +# (e.g. DELETE /deckies/files) since Starlette matches in registration +# order. Fixed paths must be declared before the variable path. +api_router.include_router(teardown_decky_router) + # External webhook subscriptions (SIEM/SOAR egress) api_router.include_router(webhooks_router) diff --git a/decnet/web/router/fleet/api_teardown_decky.py b/decnet/web/router/fleet/api_teardown_decky.py new file mode 100644 index 00000000..cf7a86bf --- /dev/null +++ b/decnet/web/router/fleet/api_teardown_decky.py @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""DELETE /deckies/{name} — operator-triggered single-decky teardown. + +Exposes the engine's per-decky teardown (previously CLI-only via +``decnet teardown --id ``). Synchronous: the compose stop/rm of one +decky's services is quick, so we await it off-thread and return 204 rather +than the 202+lifecycle dance that deploy/mutate use for slow image builds. + +The single-decky teardown path does NOT touch the host macvlan interface +(that's only the teardown-all branch), so it needs no CAP_NET_ADMIN beyond +what the web service already holds. + +State consistency is the subtle part. ``engine.teardown`` removes the +containers and the decky's ``fleet_deckies`` row, but it does NOT prune the +decky from ``decnet-state.json``. If we left it there, the reconciler would +see "present in JSON, absent from DB" and re-INSERT the row — resurrecting +the decky in the UI. So we prune it from both decnet-state.json (load/save) +and the DB ``deployment`` key (the mutate plane's store) after teardown. +""" +import asyncio +import os + +import anyio +from fastapi import APIRouter, Depends, HTTPException, Path as PathParam, Response, status + +from decnet.config import clear_state, load_state, save_state +from decnet.logging import get_logger +from decnet.telemetry import traced as _traced +from decnet.web.db.models import LOCAL_HOST_SENTINEL +from decnet.web.dependencies import require_admin, repo + +log = get_logger("api.teardown") + +router = APIRouter() + + +@router.delete( + "/deckies/{decky_name}", + tags=["Fleet Management"], + status_code=status.HTTP_204_NO_CONTENT, + responses={ + 204: {"description": "Decky torn down and removed from the fleet"}, + 401: {"description": "Could not validate credentials"}, + 403: {"description": "Insufficient permissions"}, + 404: {"description": "No active deployment, or decky not found"}, + 422: {"description": "Path parameter validation error (decky_name must match ^[a-z0-9\\-]{1,64}$)"}, + }, +) +@_traced("api.teardown_decky") +async def api_teardown_decky( + decky_name: str = PathParam(..., pattern=r"^[a-z0-9\-]{1,64}$"), + admin: dict = Depends(require_admin), +) -> Response: + loaded = await asyncio.to_thread(load_state) + if loaded is None: + raise HTTPException(status_code=404, detail="No active deployment") + config, compose_path = loaded + decky = next((d for d in config.deckies if d.name == decky_name), None) + if decky is None: + raise HTTPException(status_code=404, detail=f"Decky {decky_name} not found") + + if os.environ.get("DECNET_CONTRACT_TEST") != "true": + # Stops/removes the decky's containers, emits a retirement lifecycle + # event, and drops its fleet_deckies row. Sync engine call, off-thread + # so it doesn't block the event loop. + from decnet.engine import teardown as engine_teardown + await anyio.to_thread.run_sync(engine_teardown, decky_name) + else: + # Engine teardown is skipped under contract tests (no docker); still + # drop the fleet_deckies row so the inventory reflects the deletion. + await repo.delete_fleet_decky( + host_uuid=decky.host_uuid or LOCAL_HOST_SENTINEL, name=decky_name, + ) + + # Prune the decky from persisted state so the reconciler doesn't resurrect + # it (JSON-has / DB-doesn't -> reconciler re-INSERTs the fleet_deckies row). + # DecnetConfig.deckies has min_length=1, so an empty fleet clears state + # entirely rather than persisting an invalid config. + remaining = [d for d in config.deckies if d.name != decky_name] + if remaining: + config.deckies = remaining + await asyncio.to_thread(save_state, config, compose_path) + await repo.set_state( + "deployment", + {"config": config.model_dump(), "compose_path": str(compose_path)}, + ) + else: + await asyncio.to_thread(clear_state) + await repo.set_state("deployment", None) + + log.info("decky torn down via API decky=%s remaining=%d", decky_name, len(remaining)) + return Response(status_code=status.HTTP_204_NO_CONTENT) diff --git a/tests/api/fleet/test_teardown_decky.py b/tests/api/fleet/test_teardown_decky.py new file mode 100644 index 00000000..578d032e --- /dev/null +++ b/tests/api/fleet/test_teardown_decky.py @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""DELETE /deckies/{name} — single-decky teardown. + +The handler must: +1. Reject anonymous callers (401) and non-admins (403). +2. 404 when no active deployment exists, or the named decky isn't in it. +3. 422 when decky_name fails the path pattern. +4. On the happy path: drop the decky's fleet_deckies row AND prune it from + decnet-state.json (so the reconciler can't resurrect it), leaving the rest + of the fleet intact; deleting the last decky clears state entirely. + +Under DECNET_CONTRACT_TEST the engine teardown (docker) is skipped; the +handler still removes the fleet_deckies row and prunes state, which is what +these tests assert. +""" +from __future__ import annotations + +import httpx +import pytest + +from decnet.config import load_state +from decnet.web.dependencies import repo + + +@pytest.fixture(autouse=True) +def contract_test_mode(monkeypatch): + monkeypatch.setenv("DECNET_CONTRACT_TEST", "true") + + +@pytest.mark.anyio +async def test_unauthenticated_returns_401(client: httpx.AsyncClient): + resp = await client.delete("/api/v1/deckies/test-decky-1") + assert resp.status_code == 401 + + +@pytest.mark.anyio +async def test_viewer_forbidden_403(client, viewer_token, mock_state_file, mock_fleet_deckies): + resp = await client.delete( + "/api/v1/deckies/test-decky-1", + headers={"Authorization": f"Bearer {viewer_token}"}, + ) + assert resp.status_code == 403 + + +@pytest.mark.anyio +async def test_no_deployment_returns_404(client, auth_token): + # patch_state_file (autouse) points STATE_FILE at an empty tmp path with no + # file written, so load_state() returns None. + resp = await client.delete( + "/api/v1/deckies/test-decky-1", + headers={"Authorization": f"Bearer {auth_token}"}, + ) + assert resp.status_code == 404 + assert "deployment" in resp.json()["detail"].lower() + + +@pytest.mark.anyio +async def test_unknown_decky_returns_404(client, auth_token, mock_state_file): + resp = await client.delete( + "/api/v1/deckies/does-not-exist", + headers={"Authorization": f"Bearer {auth_token}"}, + ) + assert resp.status_code == 404 + assert "does-not-exist" in resp.json()["detail"] + + +@pytest.mark.anyio +async def test_invalid_name_returns_422(client, auth_token, mock_state_file): + resp = await client.delete( + "/api/v1/deckies/Bad_Name", # uppercase + underscore violate the pattern + headers={"Authorization": f"Bearer {auth_token}"}, + ) + assert resp.status_code == 422 + + +@pytest.mark.anyio +async def test_delete_removes_decky_and_prunes_state( + client, auth_token, mock_state_file, mock_fleet_deckies, +): + """Deleting one decky drops its fleet_deckies row and prunes it from + decnet-state.json, leaving the rest of the fleet intact.""" + resp = await client.delete( + "/api/v1/deckies/test-decky-1", + headers={"Authorization": f"Bearer {auth_token}"}, + ) + assert resp.status_code == 204, resp.text + + # fleet_deckies row gone (the store the UI reads), sibling untouched. + names = {r["name"] for r in await repo.list_fleet_deckies()} + assert names == {"test-decky-2"} + + # decnet-state.json pruned so the reconciler can't resurrect it. + loaded = load_state() + assert loaded is not None + assert {d.name for d in loaded[0].deckies} == {"test-decky-2"} + + +@pytest.mark.anyio +async def test_delete_last_decky_clears_state( + client, auth_token, mock_state_file, mock_fleet_deckies, +): + """Tearing down the final decky clears state entirely rather than + persisting an invalid empty-fleet config (DecnetConfig.deckies min_length=1).""" + for name in ("test-decky-1", "test-decky-2"): + resp = await client.delete( + f"/api/v1/deckies/{name}", + headers={"Authorization": f"Bearer {auth_token}"}, + ) + assert resp.status_code == 204, resp.text + + assert await repo.list_fleet_deckies() == [] + assert load_state() is None