feat(swarm): remote teardown API + UI (per-decky and per-host)
Agents already exposed POST /teardown; the master was missing the plumbing
to reach it. Add:
- POST /api/v1/swarm/hosts/{uuid}/teardown — admin-gated. Body
{decky_id: str|null}: null tears the whole host, a value tears one decky.
On worker failure the master returns 502 and leaves DB shards intact so
master and agent stay aligned.
- BaseRepository.delete_decky_shard(name) + sqlmodel impl for per-decky
cleanup after a single-decky teardown.
- SwarmHosts page: "Teardown all" button (keeps host enrolled).
- SwarmDeckies page: per-row "Teardown" button.
Also exclude setuptools' build/ staging dir from the enrollment tarball —
`pip install -e` on the master generates build/lib/decnet_web/node_modules
and the bundle walker was leaking it to agents. Align pyproject's bandit
exclude with the git-hook invocation so both skip decnet/templates/.
This commit is contained in:
@@ -228,3 +228,6 @@ class BaseRepository(ABC):
|
||||
|
||||
async def delete_decky_shards_for_host(self, host_uuid: str) -> int:
|
||||
raise NotImplementedError
|
||||
|
||||
async def delete_decky_shard(self, decky_name: str) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -861,3 +861,12 @@ class SQLModelRepository(BaseRepository):
|
||||
)
|
||||
await session.commit()
|
||||
return result.rowcount or 0
|
||||
|
||||
async def delete_decky_shard(self, decky_name: str) -> bool:
|
||||
async with self._session() as session:
|
||||
result = await session.execute(
|
||||
text("DELETE FROM decky_shards WHERE decky_name = :n"),
|
||||
{"n": decky_name},
|
||||
)
|
||||
await session.commit()
|
||||
return bool(result.rowcount)
|
||||
|
||||
@@ -15,6 +15,7 @@ from .api_list_hosts import router as list_hosts_router
|
||||
from .api_decommission_host import router as decommission_host_router
|
||||
from .api_list_deckies import router as list_deckies_router
|
||||
from .api_enroll_bundle import router as enroll_bundle_router
|
||||
from .api_teardown_host import router as teardown_host_router
|
||||
|
||||
swarm_mgmt_router = APIRouter(prefix="/swarm")
|
||||
|
||||
@@ -22,3 +23,4 @@ swarm_mgmt_router.include_router(list_hosts_router)
|
||||
swarm_mgmt_router.include_router(decommission_host_router)
|
||||
swarm_mgmt_router.include_router(list_deckies_router)
|
||||
swarm_mgmt_router.include_router(enroll_bundle_router)
|
||||
swarm_mgmt_router.include_router(teardown_host_router)
|
||||
|
||||
@@ -55,6 +55,9 @@ _EXCLUDES: tuple[str, ...] = (
|
||||
".pytest_cache", ".pytest_cache/*",
|
||||
".mypy_cache", ".mypy_cache/*",
|
||||
"*.egg-info", "*.egg-info/*",
|
||||
# setuptools build/ staging dir — created by `pip install` and leaks a
|
||||
# nested decnet_web/node_modules/ copy into the bundle otherwise.
|
||||
"build", "build/*", "build/**",
|
||||
"*.pyc", "*.pyo",
|
||||
"*.db", "*.db-wal", "*.db-shm", "decnet.db*",
|
||||
"*.log",
|
||||
|
||||
70
decnet/web/router/swarm_mgmt/api_teardown_host.py
Normal file
70
decnet/web/router/swarm_mgmt/api_teardown_host.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""POST /swarm/hosts/{uuid}/teardown — remote teardown on a swarm worker.
|
||||
|
||||
Body: ``{"decky_id": "..."}`` (optional). When ``decky_id`` is null/omitted
|
||||
the agent tears down the entire host (all deckies + network); otherwise it
|
||||
tears down that single decky. Mirrors the arguments of the local
|
||||
``decnet teardown`` CLI command.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from decnet.logging import get_logger
|
||||
from decnet.swarm.client import AgentClient
|
||||
from decnet.web.db.repository import BaseRepository
|
||||
from decnet.web.dependencies import get_repo, require_admin
|
||||
|
||||
log = get_logger("swarm.teardown")
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class TeardownHostRequest(BaseModel):
|
||||
decky_id: Optional[str] = None
|
||||
|
||||
|
||||
class TeardownHostResponse(BaseModel):
|
||||
host_uuid: str
|
||||
host_name: str
|
||||
decky_id: Optional[str] = None
|
||||
ok: bool
|
||||
detail: str
|
||||
|
||||
|
||||
@router.post(
|
||||
"/hosts/{uuid}/teardown",
|
||||
response_model=TeardownHostResponse,
|
||||
tags=["Swarm Management"],
|
||||
)
|
||||
async def teardown_host(
|
||||
uuid: str,
|
||||
req: TeardownHostRequest,
|
||||
admin: dict = Depends(require_admin),
|
||||
repo: BaseRepository = Depends(get_repo),
|
||||
) -> TeardownHostResponse:
|
||||
host = await repo.get_swarm_host_by_uuid(uuid)
|
||||
if host is None:
|
||||
raise HTTPException(status_code=404, detail="host not found")
|
||||
|
||||
try:
|
||||
async with AgentClient(host=host) as agent:
|
||||
body = await agent.teardown(req.decky_id)
|
||||
except Exception as exc:
|
||||
log.exception("swarm.teardown dispatch failed host=%s decky=%s",
|
||||
host.get("name"), req.decky_id)
|
||||
raise HTTPException(status_code=502, detail=str(exc)) from exc
|
||||
|
||||
if req.decky_id:
|
||||
await repo.delete_decky_shard(req.decky_id)
|
||||
else:
|
||||
await repo.delete_decky_shards_for_host(uuid)
|
||||
|
||||
return TeardownHostResponse(
|
||||
host_uuid=uuid,
|
||||
host_name=host.get("name") or "",
|
||||
decky_id=req.decky_id,
|
||||
ok=True,
|
||||
detail=str(body),
|
||||
)
|
||||
@@ -2,7 +2,7 @@ import React, { useEffect, useState } from 'react';
|
||||
import api from '../utils/api';
|
||||
import './Dashboard.css';
|
||||
import './Swarm.css';
|
||||
import { Boxes, RefreshCw } from 'lucide-react';
|
||||
import { Boxes, PowerOff, RefreshCw } from 'lucide-react';
|
||||
|
||||
interface DeckyShard {
|
||||
decky_name: string;
|
||||
@@ -20,6 +20,7 @@ interface DeckyShard {
|
||||
const SwarmDeckies: React.FC = () => {
|
||||
const [shards, setShards] = useState<DeckyShard[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [tearingDown, setTearingDown] = useState<string | null>(null);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const fetch = async () => {
|
||||
@@ -40,6 +41,19 @@ const SwarmDeckies: React.FC = () => {
|
||||
return () => clearInterval(t);
|
||||
}, []);
|
||||
|
||||
const handleTeardown = async (s: DeckyShard) => {
|
||||
if (!window.confirm(`Tear down decky ${s.decky_name} on ${s.host_name}?`)) return;
|
||||
setTearingDown(s.decky_name);
|
||||
try {
|
||||
await api.post(`/swarm/hosts/${s.host_uuid}/teardown`, { decky_id: s.decky_name });
|
||||
await fetch();
|
||||
} catch (err: any) {
|
||||
alert(err?.response?.data?.detail || 'Teardown failed');
|
||||
} finally {
|
||||
setTearingDown(null);
|
||||
}
|
||||
};
|
||||
|
||||
const byHost: Record<string, { name: string; address: string; status: string; shards: DeckyShard[] }> = {};
|
||||
for (const s of shards) {
|
||||
if (!byHost[s.host_uuid]) {
|
||||
@@ -77,6 +91,7 @@ const SwarmDeckies: React.FC = () => {
|
||||
<th>Services</th>
|
||||
<th>Compose</th>
|
||||
<th>Updated</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@@ -87,6 +102,16 @@ const SwarmDeckies: React.FC = () => {
|
||||
<td>{s.services.join(', ')}</td>
|
||||
<td><code>{s.compose_hash ? s.compose_hash.slice(0, 8) : '—'}</code></td>
|
||||
<td>{new Date(s.updated_at).toLocaleString()}</td>
|
||||
<td>
|
||||
<button
|
||||
className="control-btn danger"
|
||||
disabled={tearingDown === s.decky_name}
|
||||
onClick={() => handleTeardown(s)}
|
||||
title="Stop this decky on its host"
|
||||
>
|
||||
<PowerOff size={14} /> {tearingDown === s.decky_name ? 'Tearing down…' : 'Teardown'}
|
||||
</button>
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
|
||||
@@ -2,7 +2,7 @@ import React, { useEffect, useState } from 'react';
|
||||
import api from '../utils/api';
|
||||
import './Dashboard.css';
|
||||
import './Swarm.css';
|
||||
import { HardDrive, RefreshCw, Trash2, Wifi, WifiOff } from 'lucide-react';
|
||||
import { HardDrive, PowerOff, RefreshCw, Trash2, Wifi, WifiOff } from 'lucide-react';
|
||||
|
||||
interface SwarmHost {
|
||||
uuid: string;
|
||||
@@ -23,6 +23,7 @@ const SwarmHosts: React.FC = () => {
|
||||
const [hosts, setHosts] = useState<SwarmHost[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [decommissioning, setDecommissioning] = useState<string | null>(null);
|
||||
const [tearingDown, setTearingDown] = useState<string | null>(null);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const fetchHosts = async () => {
|
||||
@@ -43,6 +44,19 @@ const SwarmHosts: React.FC = () => {
|
||||
return () => clearInterval(t);
|
||||
}, []);
|
||||
|
||||
const handleTeardownAll = async (host: SwarmHost) => {
|
||||
if (!window.confirm(`Tear down ALL deckies on ${host.name}? The host stays enrolled.`)) return;
|
||||
setTearingDown(host.uuid);
|
||||
try {
|
||||
await api.post(`/swarm/hosts/${host.uuid}/teardown`, {});
|
||||
await fetchHosts();
|
||||
} catch (err: any) {
|
||||
alert(err?.response?.data?.detail || 'Teardown failed');
|
||||
} finally {
|
||||
setTearingDown(null);
|
||||
}
|
||||
};
|
||||
|
||||
const handleDecommission = async (host: SwarmHost) => {
|
||||
if (!window.confirm(`Decommission ${host.name} (${host.address})? This removes certs and decky mappings.`)) return;
|
||||
setDecommissioning(host.uuid);
|
||||
@@ -97,6 +111,14 @@ const SwarmHosts: React.FC = () => {
|
||||
<td title={h.client_cert_fingerprint}><code>{shortFp(h.client_cert_fingerprint)}</code></td>
|
||||
<td>{new Date(h.enrolled_at).toLocaleString()}</td>
|
||||
<td>
|
||||
<button
|
||||
className="control-btn"
|
||||
disabled={tearingDown === h.uuid || h.status !== 'active'}
|
||||
onClick={() => handleTeardownAll(h)}
|
||||
title="Stop all deckies on this host (keeps it enrolled)"
|
||||
>
|
||||
<PowerOff size={14} /> {tearingDown === h.uuid ? 'Tearing down…' : 'Teardown all'}
|
||||
</button>
|
||||
<button
|
||||
className="control-btn danger"
|
||||
disabled={decommissioning === h.uuid}
|
||||
|
||||
@@ -111,6 +111,9 @@ include = ["decnet*"]
|
||||
decnet = ["templates/**/*"]
|
||||
|
||||
[tool.bandit]
|
||||
# Docker build contexts — code runs inside decoy containers, not in the
|
||||
# master/agent process. Skipping keeps honeypot service scripts (hashlib.md5
|
||||
# for JA3, /tmp/build in Dockerfile helpers, etc.) out of the scan.
|
||||
exclude_dirs = [
|
||||
"decnet/templates/syslog_bridge.py",
|
||||
"decnet/templates",
|
||||
]
|
||||
|
||||
165
tests/api/swarm_mgmt/test_teardown_host.py
Normal file
165
tests/api/swarm_mgmt/test_teardown_host.py
Normal file
@@ -0,0 +1,165 @@
|
||||
"""POST /swarm/hosts/{uuid}/teardown — per-host and per-decky remote teardown."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from decnet.web.router.swarm_mgmt import api_teardown_host as mod
|
||||
|
||||
|
||||
class _FakeAgent:
|
||||
def __init__(self, *a, **kw):
|
||||
_FakeAgent.calls.append(("init", kw.get("host", a[0] if a else None)))
|
||||
self._host = kw.get("host", a[0] if a else None)
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc):
|
||||
return None
|
||||
|
||||
async def teardown(self, decky_id: Optional[str] = None) -> dict:
|
||||
_FakeAgent.calls.append(("teardown", decky_id))
|
||||
return {"status": "torn_down", "decky_id": decky_id}
|
||||
|
||||
|
||||
class _FailingAgent(_FakeAgent):
|
||||
async def teardown(self, decky_id: Optional[str] = None) -> dict:
|
||||
raise RuntimeError("network unreachable")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_agent(monkeypatch):
|
||||
_FakeAgent.calls = []
|
||||
monkeypatch.setattr(mod, "AgentClient", _FakeAgent)
|
||||
return _FakeAgent
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def failing_agent(monkeypatch):
|
||||
_FailingAgent.calls = []
|
||||
monkeypatch.setattr(mod, "AgentClient", _FailingAgent)
|
||||
return _FailingAgent
|
||||
|
||||
|
||||
async def _seed_host(repo, *, name="worker-a", uuid="h-1") -> str:
|
||||
await repo.add_swarm_host({
|
||||
"uuid": uuid,
|
||||
"name": name,
|
||||
"address": "10.0.0.9",
|
||||
"agent_port": 8765,
|
||||
"status": "active",
|
||||
"client_cert_fingerprint": "f" * 64,
|
||||
"cert_bundle_path": "",
|
||||
"use_ipvlan": False,
|
||||
"enrolled_at": datetime.now(timezone.utc),
|
||||
"last_heartbeat": None,
|
||||
})
|
||||
return uuid
|
||||
|
||||
|
||||
async def _seed_shard(repo, *, host_uuid: str, decky_name: str) -> None:
|
||||
await repo.upsert_decky_shard({
|
||||
"decky_name": decky_name,
|
||||
"host_uuid": host_uuid,
|
||||
"services": json.dumps(["ssh"]),
|
||||
"state": "running",
|
||||
"last_error": None,
|
||||
"updated_at": datetime.now(timezone.utc),
|
||||
})
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_teardown_all_deckies_on_host(client, auth_token, fake_agent):
|
||||
from decnet.web.dependencies import repo
|
||||
uuid = await _seed_host(repo, name="tear-all", uuid="tear-all-uuid")
|
||||
await _seed_shard(repo, host_uuid=uuid, decky_name="decky1")
|
||||
await _seed_shard(repo, host_uuid=uuid, decky_name="decky2")
|
||||
|
||||
resp = await client.post(
|
||||
f"/api/v1/swarm/hosts/{uuid}/teardown",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={},
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
body = resp.json()
|
||||
assert body["ok"] is True
|
||||
assert body["decky_id"] is None
|
||||
|
||||
assert ("teardown", None) in fake_agent.calls
|
||||
remaining = await repo.list_decky_shards(uuid)
|
||||
assert remaining == []
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_teardown_single_decky(client, auth_token, fake_agent):
|
||||
from decnet.web.dependencies import repo
|
||||
uuid = await _seed_host(repo, name="tear-one", uuid="tear-one-uuid")
|
||||
await _seed_shard(repo, host_uuid=uuid, decky_name="decky-keep")
|
||||
await _seed_shard(repo, host_uuid=uuid, decky_name="decky-drop")
|
||||
|
||||
resp = await client.post(
|
||||
f"/api/v1/swarm/hosts/{uuid}/teardown",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={"decky_id": "decky-drop"},
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
body = resp.json()
|
||||
assert body["decky_id"] == "decky-drop"
|
||||
|
||||
assert ("teardown", "decky-drop") in fake_agent.calls
|
||||
remaining = {s["decky_name"] for s in await repo.list_decky_shards(uuid)}
|
||||
assert remaining == {"decky-keep"}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_teardown_unknown_host_404(client, auth_token, fake_agent):
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm/hosts/does-not-exist/teardown",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={},
|
||||
)
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_teardown_agent_failure_502(client, auth_token, failing_agent):
|
||||
"""When the worker is unreachable the DB shards MUST NOT be deleted —
|
||||
otherwise the master's view diverges from reality."""
|
||||
from decnet.web.dependencies import repo
|
||||
uuid = await _seed_host(repo, name="tear-fail", uuid="tear-fail-uuid")
|
||||
await _seed_shard(repo, host_uuid=uuid, decky_name="survivor")
|
||||
|
||||
resp = await client.post(
|
||||
f"/api/v1/swarm/hosts/{uuid}/teardown",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={},
|
||||
)
|
||||
assert resp.status_code == 502
|
||||
|
||||
remaining = {s["decky_name"] for s in await repo.list_decky_shards(uuid)}
|
||||
assert remaining == {"survivor"}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_teardown_non_admin_forbidden(client, viewer_token, fake_agent):
|
||||
from decnet.web.dependencies import repo
|
||||
uuid = await _seed_host(repo, name="tear-guard", uuid="tear-guard-uuid")
|
||||
resp = await client.post(
|
||||
f"/api/v1/swarm/hosts/{uuid}/teardown",
|
||||
headers={"Authorization": f"Bearer {viewer_token}"},
|
||||
json={},
|
||||
)
|
||||
assert resp.status_code == 403
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_teardown_no_auth_401(client, fake_agent):
|
||||
resp = await client.post(
|
||||
"/api/v1/swarm/hosts/whatever/teardown",
|
||||
json={},
|
||||
)
|
||||
assert resp.status_code == 401
|
||||
Reference in New Issue
Block a user