From 5dad1bb315ee3b17a54a0c26f9c5fc1318fa2766 Mon Sep 17 00:00:00 2001 From: anti Date: Sun, 19 Apr 2026 19:39:28 -0400 Subject: [PATCH] feat(swarm): remote teardown API + UI (per-decky and per-host) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agents already exposed POST /teardown; the master was missing the plumbing to reach it. Add: - POST /api/v1/swarm/hosts/{uuid}/teardown — admin-gated. Body {decky_id: str|null}: null tears the whole host, a value tears one decky. On worker failure the master returns 502 and leaves DB shards intact so master and agent stay aligned. - BaseRepository.delete_decky_shard(name) + sqlmodel impl for per-decky cleanup after a single-decky teardown. - SwarmHosts page: "Teardown all" button (keeps host enrolled). - SwarmDeckies page: per-row "Teardown" button. Also exclude setuptools' build/ staging dir from the enrollment tarball — `pip install -e` on the master generates build/lib/decnet_web/node_modules and the bundle walker was leaking it to agents. Align pyproject's bandit exclude with the git-hook invocation so both skip decnet/templates/. --- decnet/web/db/repository.py | 3 + decnet/web/db/sqlmodel_repo.py | 9 + decnet/web/router/swarm_mgmt/__init__.py | 2 + .../router/swarm_mgmt/api_enroll_bundle.py | 3 + .../router/swarm_mgmt/api_teardown_host.py | 70 ++++++++ decnet_web/src/components/SwarmDeckies.tsx | 27 ++- decnet_web/src/components/SwarmHosts.tsx | 24 ++- pyproject.toml | 5 +- tests/api/swarm_mgmt/test_teardown_host.py | 165 ++++++++++++++++++ 9 files changed, 305 insertions(+), 3 deletions(-) create mode 100644 decnet/web/router/swarm_mgmt/api_teardown_host.py create mode 100644 tests/api/swarm_mgmt/test_teardown_host.py diff --git a/decnet/web/db/repository.py b/decnet/web/db/repository.py index 1269a06..f2fdd7a 100644 --- a/decnet/web/db/repository.py +++ b/decnet/web/db/repository.py @@ -228,3 +228,6 @@ class BaseRepository(ABC): async def delete_decky_shards_for_host(self, host_uuid: str) -> int: raise NotImplementedError + + async def delete_decky_shard(self, decky_name: str) -> bool: + raise NotImplementedError diff --git a/decnet/web/db/sqlmodel_repo.py b/decnet/web/db/sqlmodel_repo.py index b7064cc..865e8c2 100644 --- a/decnet/web/db/sqlmodel_repo.py +++ b/decnet/web/db/sqlmodel_repo.py @@ -861,3 +861,12 @@ class SQLModelRepository(BaseRepository): ) await session.commit() return result.rowcount or 0 + + async def delete_decky_shard(self, decky_name: str) -> bool: + async with self._session() as session: + result = await session.execute( + text("DELETE FROM decky_shards WHERE decky_name = :n"), + {"n": decky_name}, + ) + await session.commit() + return bool(result.rowcount) diff --git a/decnet/web/router/swarm_mgmt/__init__.py b/decnet/web/router/swarm_mgmt/__init__.py index 8936f79..12790f8 100644 --- a/decnet/web/router/swarm_mgmt/__init__.py +++ b/decnet/web/router/swarm_mgmt/__init__.py @@ -15,6 +15,7 @@ from .api_list_hosts import router as list_hosts_router from .api_decommission_host import router as decommission_host_router from .api_list_deckies import router as list_deckies_router from .api_enroll_bundle import router as enroll_bundle_router +from .api_teardown_host import router as teardown_host_router swarm_mgmt_router = APIRouter(prefix="/swarm") @@ -22,3 +23,4 @@ swarm_mgmt_router.include_router(list_hosts_router) swarm_mgmt_router.include_router(decommission_host_router) swarm_mgmt_router.include_router(list_deckies_router) swarm_mgmt_router.include_router(enroll_bundle_router) +swarm_mgmt_router.include_router(teardown_host_router) diff --git a/decnet/web/router/swarm_mgmt/api_enroll_bundle.py b/decnet/web/router/swarm_mgmt/api_enroll_bundle.py index cc4df41..20debb8 100644 --- a/decnet/web/router/swarm_mgmt/api_enroll_bundle.py +++ b/decnet/web/router/swarm_mgmt/api_enroll_bundle.py @@ -55,6 +55,9 @@ _EXCLUDES: tuple[str, ...] = ( ".pytest_cache", ".pytest_cache/*", ".mypy_cache", ".mypy_cache/*", "*.egg-info", "*.egg-info/*", + # setuptools build/ staging dir — created by `pip install` and leaks a + # nested decnet_web/node_modules/ copy into the bundle otherwise. + "build", "build/*", "build/**", "*.pyc", "*.pyo", "*.db", "*.db-wal", "*.db-shm", "decnet.db*", "*.log", diff --git a/decnet/web/router/swarm_mgmt/api_teardown_host.py b/decnet/web/router/swarm_mgmt/api_teardown_host.py new file mode 100644 index 0000000..e648be5 --- /dev/null +++ b/decnet/web/router/swarm_mgmt/api_teardown_host.py @@ -0,0 +1,70 @@ +"""POST /swarm/hosts/{uuid}/teardown — remote teardown on a swarm worker. + +Body: ``{"decky_id": "..."}`` (optional). When ``decky_id`` is null/omitted +the agent tears down the entire host (all deckies + network); otherwise it +tears down that single decky. Mirrors the arguments of the local +``decnet teardown`` CLI command. +""" +from __future__ import annotations + +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel + +from decnet.logging import get_logger +from decnet.swarm.client import AgentClient +from decnet.web.db.repository import BaseRepository +from decnet.web.dependencies import get_repo, require_admin + +log = get_logger("swarm.teardown") +router = APIRouter() + + +class TeardownHostRequest(BaseModel): + decky_id: Optional[str] = None + + +class TeardownHostResponse(BaseModel): + host_uuid: str + host_name: str + decky_id: Optional[str] = None + ok: bool + detail: str + + +@router.post( + "/hosts/{uuid}/teardown", + response_model=TeardownHostResponse, + tags=["Swarm Management"], +) +async def teardown_host( + uuid: str, + req: TeardownHostRequest, + admin: dict = Depends(require_admin), + repo: BaseRepository = Depends(get_repo), +) -> TeardownHostResponse: + host = await repo.get_swarm_host_by_uuid(uuid) + if host is None: + raise HTTPException(status_code=404, detail="host not found") + + try: + async with AgentClient(host=host) as agent: + body = await agent.teardown(req.decky_id) + except Exception as exc: + log.exception("swarm.teardown dispatch failed host=%s decky=%s", + host.get("name"), req.decky_id) + raise HTTPException(status_code=502, detail=str(exc)) from exc + + if req.decky_id: + await repo.delete_decky_shard(req.decky_id) + else: + await repo.delete_decky_shards_for_host(uuid) + + return TeardownHostResponse( + host_uuid=uuid, + host_name=host.get("name") or "", + decky_id=req.decky_id, + ok=True, + detail=str(body), + ) diff --git a/decnet_web/src/components/SwarmDeckies.tsx b/decnet_web/src/components/SwarmDeckies.tsx index b33429a..8ace856 100644 --- a/decnet_web/src/components/SwarmDeckies.tsx +++ b/decnet_web/src/components/SwarmDeckies.tsx @@ -2,7 +2,7 @@ import React, { useEffect, useState } from 'react'; import api from '../utils/api'; import './Dashboard.css'; import './Swarm.css'; -import { Boxes, RefreshCw } from 'lucide-react'; +import { Boxes, PowerOff, RefreshCw } from 'lucide-react'; interface DeckyShard { decky_name: string; @@ -20,6 +20,7 @@ interface DeckyShard { const SwarmDeckies: React.FC = () => { const [shards, setShards] = useState([]); const [loading, setLoading] = useState(true); + const [tearingDown, setTearingDown] = useState(null); const [error, setError] = useState(null); const fetch = async () => { @@ -40,6 +41,19 @@ const SwarmDeckies: React.FC = () => { return () => clearInterval(t); }, []); + const handleTeardown = async (s: DeckyShard) => { + if (!window.confirm(`Tear down decky ${s.decky_name} on ${s.host_name}?`)) return; + setTearingDown(s.decky_name); + try { + await api.post(`/swarm/hosts/${s.host_uuid}/teardown`, { decky_id: s.decky_name }); + await fetch(); + } catch (err: any) { + alert(err?.response?.data?.detail || 'Teardown failed'); + } finally { + setTearingDown(null); + } + }; + const byHost: Record = {}; for (const s of shards) { if (!byHost[s.host_uuid]) { @@ -77,6 +91,7 @@ const SwarmDeckies: React.FC = () => { Services Compose Updated + @@ -87,6 +102,16 @@ const SwarmDeckies: React.FC = () => { {s.services.join(', ')} {s.compose_hash ? s.compose_hash.slice(0, 8) : '—'} {new Date(s.updated_at).toLocaleString()} + + + ))} diff --git a/decnet_web/src/components/SwarmHosts.tsx b/decnet_web/src/components/SwarmHosts.tsx index 83fe528..a56fa61 100644 --- a/decnet_web/src/components/SwarmHosts.tsx +++ b/decnet_web/src/components/SwarmHosts.tsx @@ -2,7 +2,7 @@ import React, { useEffect, useState } from 'react'; import api from '../utils/api'; import './Dashboard.css'; import './Swarm.css'; -import { HardDrive, RefreshCw, Trash2, Wifi, WifiOff } from 'lucide-react'; +import { HardDrive, PowerOff, RefreshCw, Trash2, Wifi, WifiOff } from 'lucide-react'; interface SwarmHost { uuid: string; @@ -23,6 +23,7 @@ const SwarmHosts: React.FC = () => { const [hosts, setHosts] = useState([]); const [loading, setLoading] = useState(true); const [decommissioning, setDecommissioning] = useState(null); + const [tearingDown, setTearingDown] = useState(null); const [error, setError] = useState(null); const fetchHosts = async () => { @@ -43,6 +44,19 @@ const SwarmHosts: React.FC = () => { return () => clearInterval(t); }, []); + const handleTeardownAll = async (host: SwarmHost) => { + if (!window.confirm(`Tear down ALL deckies on ${host.name}? The host stays enrolled.`)) return; + setTearingDown(host.uuid); + try { + await api.post(`/swarm/hosts/${host.uuid}/teardown`, {}); + await fetchHosts(); + } catch (err: any) { + alert(err?.response?.data?.detail || 'Teardown failed'); + } finally { + setTearingDown(null); + } + }; + const handleDecommission = async (host: SwarmHost) => { if (!window.confirm(`Decommission ${host.name} (${host.address})? This removes certs and decky mappings.`)) return; setDecommissioning(host.uuid); @@ -97,6 +111,14 @@ const SwarmHosts: React.FC = () => { {shortFp(h.client_cert_fingerprint)} {new Date(h.enrolled_at).toLocaleString()} +