feat(web): async teardowns — 202 + background task, UI allows parallel queue

Teardowns were synchronous all the way through: POST blocked on the
worker's docker-compose-down cycle (seconds to minutes), the frontend
locked tearingDown to a single string so only one button could be armed
at a time, and operators couldn't queue a second teardown until the
first returned. On a flaky worker that meant staring at a spinner for
the whole RTT.

Backend: POST /swarm/hosts/{uuid}/teardown returns 202 the instant the
request is validated. Affected shards flip to state='tearing_down'
synchronously before the response so the UI reflects progress
immediately, then the actual AgentClient call + DB cleanup run in an
asyncio.create_task (tracked in a module-level set to survive GC and
to be drainable by tests). On failure the shard flips to
'teardown_failed' with the error recorded — nothing is re-raised,
since there's no caller to catch it.

Frontend: swap tearingDown / decommissioning from 'string | null' to
'Set<string>'. Each button tracks its own in-flight state; the poll
loop picks up the final shard state from the backend. Multiple
teardowns can now be queued without blocking each other.
This commit is contained in:
2026-04-19 20:30:56 -04:00
parent 07ec4bc269
commit 9d68bb45c7
4 changed files with 179 additions and 44 deletions

View File

@@ -21,7 +21,7 @@ interface DeckyShard {
const SwarmDeckies: React.FC = () => {
const [shards, setShards] = useState<DeckyShard[]>([]);
const [loading, setLoading] = useState(true);
const [tearingDown, setTearingDown] = useState<string | null>(null);
const [tearingDown, setTearingDown] = useState<Set<string>>(new Set());
const [error, setError] = useState<string | null>(null);
// Two-click arm/commit replaces window.confirm() — browsers silently
// suppress confirm() after the "prevent additional dialogs" opt-out.
@@ -53,14 +53,22 @@ const SwarmDeckies: React.FC = () => {
const key = `td:${s.host_uuid}:${s.decky_name}`;
if (armed !== key) { arm(key); return; }
setArmed(null);
setTearingDown(s.decky_name);
setTearingDown((prev) => new Set(prev).add(s.decky_name));
try {
// Endpoint returns 202 immediately; the actual teardown runs in the
// background on the backend. Shard state flips to 'tearing_down' and
// the 10s poll picks up the final state (gone on success, or
// 'teardown_failed' with an error).
await api.post(`/swarm/hosts/${s.host_uuid}/teardown`, { decky_id: s.decky_name });
await fetch();
} catch (err: any) {
alert(err?.response?.data?.detail || 'Teardown failed');
} finally {
setTearingDown(null);
setTearingDown((prev) => {
const next = new Set(prev);
next.delete(s.decky_name);
return next;
});
}
};
@@ -115,12 +123,12 @@ const SwarmDeckies: React.FC = () => {
<td>
<button
className="control-btn danger"
disabled={tearingDown === s.decky_name}
disabled={tearingDown.has(s.decky_name) || s.state === 'tearing_down'}
onClick={() => handleTeardown(s)}
title="Stop this decky on its host"
>
<PowerOff size={14} />{' '}
{tearingDown === s.decky_name
{tearingDown.has(s.decky_name) || s.state === 'tearing_down'
? 'Tearing down…'
: armed === `td:${s.host_uuid}:${s.decky_name}`
? 'Click again to confirm'

View File

@@ -22,8 +22,8 @@ const shortFp = (fp: string): string => (fp ? fp.slice(0, 16) + '…' : '—');
const SwarmHosts: React.FC = () => {
const [hosts, setHosts] = useState<SwarmHost[]>([]);
const [loading, setLoading] = useState(true);
const [decommissioning, setDecommissioning] = useState<string | null>(null);
const [tearingDown, setTearingDown] = useState<string | null>(null);
const [decommissioning, setDecommissioning] = useState<Set<string>>(new Set());
const [tearingDown, setTearingDown] = useState<Set<string>>(new Set());
const [error, setError] = useState<string | null>(null);
// Two-click arm/commit replaces window.confirm(). Browsers silently
// suppress confirm() after the "prevent additional dialogs" opt-out,
@@ -53,18 +53,22 @@ const SwarmHosts: React.FC = () => {
return () => clearInterval(t);
}, []);
const addTo = (set: Set<string>, id: string) => { const n = new Set(set); n.add(id); return n; };
const removeFrom = (set: Set<string>, id: string) => { const n = new Set(set); n.delete(id); return n; };
const handleTeardownAll = async (host: SwarmHost) => {
const key = `teardown:${host.uuid}`;
if (armed !== key) { arm(key); return; }
setArmed(null);
setTearingDown(host.uuid);
setTearingDown((s) => addTo(s, host.uuid));
try {
// 202 Accepted — teardown runs async on the backend.
await api.post(`/swarm/hosts/${host.uuid}/teardown`, {});
await fetchHosts();
} catch (err: any) {
alert(err?.response?.data?.detail || 'Teardown failed');
} finally {
setTearingDown(null);
setTearingDown((s) => removeFrom(s, host.uuid));
}
};
@@ -72,14 +76,14 @@ const SwarmHosts: React.FC = () => {
const key = `decom:${host.uuid}`;
if (armed !== key) { arm(key); return; }
setArmed(null);
setDecommissioning(host.uuid);
setDecommissioning((s) => addTo(s, host.uuid));
try {
await api.delete(`/swarm/hosts/${host.uuid}`);
await fetchHosts();
} catch (err: any) {
alert(err?.response?.data?.detail || 'Decommission failed');
} finally {
setDecommissioning(null);
setDecommissioning((s) => removeFrom(s, host.uuid));
}
};
@@ -126,12 +130,12 @@ const SwarmHosts: React.FC = () => {
<td>
<button
className={`control-btn${armed === `teardown:${h.uuid}` ? ' danger' : ''}`}
disabled={tearingDown === h.uuid || h.status !== 'active'}
disabled={tearingDown.has(h.uuid) || h.status !== 'active'}
onClick={() => handleTeardownAll(h)}
title="Stop all deckies on this host (keeps it enrolled)"
>
<PowerOff size={14} />{' '}
{tearingDown === h.uuid
{tearingDown.has(h.uuid)
? 'Tearing down…'
: armed === `teardown:${h.uuid}`
? 'Click again to confirm'
@@ -139,11 +143,11 @@ const SwarmHosts: React.FC = () => {
</button>
<button
className="control-btn danger"
disabled={decommissioning === h.uuid}
disabled={decommissioning.has(h.uuid)}
onClick={() => handleDecommission(h)}
>
<Trash2 size={14} />{' '}
{decommissioning === h.uuid
{decommissioning.has(h.uuid)
? 'Decommissioning…'
: armed === `decom:${h.uuid}`
? 'Click again to confirm'