feat(swarm): remote teardown API + UI (per-decky and per-host)
Agents already exposed POST /teardown; the master was missing the plumbing
to reach it. Add:
- POST /api/v1/swarm/hosts/{uuid}/teardown — admin-gated. Body
{decky_id: str|null}: null tears the whole host, a value tears one decky.
On worker failure the master returns 502 and leaves DB shards intact so
master and agent stay aligned.
- BaseRepository.delete_decky_shard(name) + sqlmodel impl for per-decky
cleanup after a single-decky teardown.
- SwarmHosts page: "Teardown all" button (keeps host enrolled).
- SwarmDeckies page: per-row "Teardown" button.
Also exclude setuptools' build/ staging dir from the enrollment tarball —
`pip install -e` on the master generates build/lib/decnet_web/node_modules
and the bundle walker was leaking it to agents. Align pyproject's bandit
exclude with the git-hook invocation so both skip decnet/templates/.
This commit is contained in:
@@ -228,3 +228,6 @@ class BaseRepository(ABC):
|
||||
|
||||
async def delete_decky_shards_for_host(self, host_uuid: str) -> int:
|
||||
raise NotImplementedError
|
||||
|
||||
async def delete_decky_shard(self, decky_name: str) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -861,3 +861,12 @@ class SQLModelRepository(BaseRepository):
|
||||
)
|
||||
await session.commit()
|
||||
return result.rowcount or 0
|
||||
|
||||
async def delete_decky_shard(self, decky_name: str) -> bool:
|
||||
async with self._session() as session:
|
||||
result = await session.execute(
|
||||
text("DELETE FROM decky_shards WHERE decky_name = :n"),
|
||||
{"n": decky_name},
|
||||
)
|
||||
await session.commit()
|
||||
return bool(result.rowcount)
|
||||
|
||||
@@ -15,6 +15,7 @@ from .api_list_hosts import router as list_hosts_router
|
||||
from .api_decommission_host import router as decommission_host_router
|
||||
from .api_list_deckies import router as list_deckies_router
|
||||
from .api_enroll_bundle import router as enroll_bundle_router
|
||||
from .api_teardown_host import router as teardown_host_router
|
||||
|
||||
swarm_mgmt_router = APIRouter(prefix="/swarm")
|
||||
|
||||
@@ -22,3 +23,4 @@ swarm_mgmt_router.include_router(list_hosts_router)
|
||||
swarm_mgmt_router.include_router(decommission_host_router)
|
||||
swarm_mgmt_router.include_router(list_deckies_router)
|
||||
swarm_mgmt_router.include_router(enroll_bundle_router)
|
||||
swarm_mgmt_router.include_router(teardown_host_router)
|
||||
|
||||
@@ -55,6 +55,9 @@ _EXCLUDES: tuple[str, ...] = (
|
||||
".pytest_cache", ".pytest_cache/*",
|
||||
".mypy_cache", ".mypy_cache/*",
|
||||
"*.egg-info", "*.egg-info/*",
|
||||
# setuptools build/ staging dir — created by `pip install` and leaks a
|
||||
# nested decnet_web/node_modules/ copy into the bundle otherwise.
|
||||
"build", "build/*", "build/**",
|
||||
"*.pyc", "*.pyo",
|
||||
"*.db", "*.db-wal", "*.db-shm", "decnet.db*",
|
||||
"*.log",
|
||||
|
||||
70
decnet/web/router/swarm_mgmt/api_teardown_host.py
Normal file
70
decnet/web/router/swarm_mgmt/api_teardown_host.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""POST /swarm/hosts/{uuid}/teardown — remote teardown on a swarm worker.
|
||||
|
||||
Body: ``{"decky_id": "..."}`` (optional). When ``decky_id`` is null/omitted
|
||||
the agent tears down the entire host (all deckies + network); otherwise it
|
||||
tears down that single decky. Mirrors the arguments of the local
|
||||
``decnet teardown`` CLI command.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.swarm.client import AgentClient
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
from decnet.web.dependencies import get_repo, require_admin
|
||||
|
||||
log = get_logger("swarm.teardown")
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class TeardownHostRequest(BaseModel):
|
||||
decky_id: Optional[str] = None
|
||||
|
||||
|
||||
class TeardownHostResponse(BaseModel):
|
||||
host_uuid: str
|
||||
host_name: str
|
||||
decky_id: Optional[str] = None
|
||||
ok: bool
|
||||
detail: str
|
||||
|
||||
|
||||
@router.post(
|
||||
"/hosts/{uuid}/teardown",
|
||||
response_model=TeardownHostResponse,
|
||||
tags=["Swarm Management"],
|
||||
)
|
||||
async def teardown_host(
|
||||
uuid: str,
|
||||
req: TeardownHostRequest,
|
||||
admin: dict = Depends(require_admin),
|
||||
repo: BaseRepository = Depends(get_repo),
|
||||
) -> TeardownHostResponse:
|
||||
host = await repo.get_swarm_host_by_uuid(uuid)
|
||||
if host is None:
|
||||
raise HTTPException(status_code=404, detail="host not found")
|
||||
|
||||
try:
|
||||
async with AgentClient(host=host) as agent:
|
||||
body = await agent.teardown(req.decky_id)
|
||||
except Exception as exc:
|
||||
log.exception("swarm.teardown dispatch failed host=%s decky=%s",
|
||||
host.get("name"), req.decky_id)
|
||||
raise HTTPException(status_code=502, detail=str(exc)) from exc
|
||||
|
||||
if req.decky_id:
|
||||
await repo.delete_decky_shard(req.decky_id)
|
||||
else:
|
||||
await repo.delete_decky_shards_for_host(uuid)
|
||||
|
||||
return TeardownHostResponse(
|
||||
host_uuid=uuid,
|
||||
host_name=host.get("name") or "",
|
||||
decky_id=req.decky_id,
|
||||
ok=True,
|
||||
detail=str(body),
|
||||
)
|
||||
Reference in New Issue
Block a user