feat(swarm): remote teardown API + UI (per-decky and per-host)

Agents already exposed POST /teardown; the master was missing the plumbing
to reach it. Add:

- POST /api/v1/swarm/hosts/{uuid}/teardown — admin-gated. Body
  {decky_id: str|null}: null tears the whole host, a value tears one decky.
  On worker failure the master returns 502 and leaves DB shards intact so
  master and agent stay aligned.
- BaseRepository.delete_decky_shard(name) + sqlmodel impl for per-decky
  cleanup after a single-decky teardown.
- SwarmHosts page: "Teardown all" button (keeps host enrolled).
- SwarmDeckies page: per-row "Teardown" button.

Also exclude setuptools' build/ staging dir from the enrollment tarball —
`pip install -e` on the master generates build/lib/decnet_web/node_modules
and the bundle walker was leaking it to agents. Align pyproject's bandit
exclude with the git-hook invocation so both skip decnet/templates/.
This commit is contained in:
2026-04-19 19:39:28 -04:00
parent 6708f26e6b
commit 5dad1bb315
9 changed files with 305 additions and 3 deletions

View File

@@ -2,7 +2,7 @@ import React, { useEffect, useState } from 'react';
import api from '../utils/api';
import './Dashboard.css';
import './Swarm.css';
import { Boxes, RefreshCw } from 'lucide-react';
import { Boxes, PowerOff, RefreshCw } from 'lucide-react';
interface DeckyShard {
decky_name: string;
@@ -20,6 +20,7 @@ interface DeckyShard {
const SwarmDeckies: React.FC = () => {
const [shards, setShards] = useState<DeckyShard[]>([]);
const [loading, setLoading] = useState(true);
const [tearingDown, setTearingDown] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
const fetch = async () => {
@@ -40,6 +41,19 @@ const SwarmDeckies: React.FC = () => {
return () => clearInterval(t);
}, []);
const handleTeardown = async (s: DeckyShard) => {
if (!window.confirm(`Tear down decky ${s.decky_name} on ${s.host_name}?`)) return;
setTearingDown(s.decky_name);
try {
await api.post(`/swarm/hosts/${s.host_uuid}/teardown`, { decky_id: s.decky_name });
await fetch();
} catch (err: any) {
alert(err?.response?.data?.detail || 'Teardown failed');
} finally {
setTearingDown(null);
}
};
const byHost: Record<string, { name: string; address: string; status: string; shards: DeckyShard[] }> = {};
for (const s of shards) {
if (!byHost[s.host_uuid]) {
@@ -77,6 +91,7 @@ const SwarmDeckies: React.FC = () => {
<th>Services</th>
<th>Compose</th>
<th>Updated</th>
<th></th>
</tr>
</thead>
<tbody>
@@ -87,6 +102,16 @@ const SwarmDeckies: React.FC = () => {
<td>{s.services.join(', ')}</td>
<td><code>{s.compose_hash ? s.compose_hash.slice(0, 8) : '—'}</code></td>
<td>{new Date(s.updated_at).toLocaleString()}</td>
<td>
<button
className="control-btn danger"
disabled={tearingDown === s.decky_name}
onClick={() => handleTeardown(s)}
title="Stop this decky on its host"
>
<PowerOff size={14} /> {tearingDown === s.decky_name ? 'Tearing down…' : 'Teardown'}
</button>
</td>
</tr>
))}
</tbody>

View File

@@ -2,7 +2,7 @@ import React, { useEffect, useState } from 'react';
import api from '../utils/api';
import './Dashboard.css';
import './Swarm.css';
import { HardDrive, RefreshCw, Trash2, Wifi, WifiOff } from 'lucide-react';
import { HardDrive, PowerOff, RefreshCw, Trash2, Wifi, WifiOff } from 'lucide-react';
interface SwarmHost {
uuid: string;
@@ -23,6 +23,7 @@ const SwarmHosts: React.FC = () => {
const [hosts, setHosts] = useState<SwarmHost[]>([]);
const [loading, setLoading] = useState(true);
const [decommissioning, setDecommissioning] = useState<string | null>(null);
const [tearingDown, setTearingDown] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
const fetchHosts = async () => {
@@ -43,6 +44,19 @@ const SwarmHosts: React.FC = () => {
return () => clearInterval(t);
}, []);
const handleTeardownAll = async (host: SwarmHost) => {
if (!window.confirm(`Tear down ALL deckies on ${host.name}? The host stays enrolled.`)) return;
setTearingDown(host.uuid);
try {
await api.post(`/swarm/hosts/${host.uuid}/teardown`, {});
await fetchHosts();
} catch (err: any) {
alert(err?.response?.data?.detail || 'Teardown failed');
} finally {
setTearingDown(null);
}
};
const handleDecommission = async (host: SwarmHost) => {
if (!window.confirm(`Decommission ${host.name} (${host.address})? This removes certs and decky mappings.`)) return;
setDecommissioning(host.uuid);
@@ -97,6 +111,14 @@ const SwarmHosts: React.FC = () => {
<td title={h.client_cert_fingerprint}><code>{shortFp(h.client_cert_fingerprint)}</code></td>
<td>{new Date(h.enrolled_at).toLocaleString()}</td>
<td>
<button
className="control-btn"
disabled={tearingDown === h.uuid || h.status !== 'active'}
onClick={() => handleTeardownAll(h)}
title="Stop all deckies on this host (keeps it enrolled)"
>
<PowerOff size={14} /> {tearingDown === h.uuid ? 'Tearing down…' : 'Teardown all'}
</button>
<button
className="control-btn danger"
disabled={decommissioning === h.uuid}