feat(web): stage live MazeNET edits behind an UPDATE button

Live topology edits fired one mutation per canvas action. That coupled
each edit to an immediate enqueue+apply, which (post-serialization)
raced the SSE refetch and duplicated optimistic placeholders, and gave
the user no chance to assemble a coherent changeset (add a net AND
bridge it) before any of it landed.

Live edits now STAGE: each editor primitive records its op and returns
immediately; the optimistic placeholders callers already draw are the
staged preview. The action button reads UPDATE (n) when live (DEPLOY
when pending) and flushes the batch through the slice-1 submit queue —
sequential, version-cursored, each awaited to a terminal state, stopping
loudly on the first failure with the unapplied remainder kept for retry.
REFRESH becomes DISCARD (n) to drop the batch. SSE refetch is paused
during a commit so per-mutation applied events don't wipe still-staged
placeholders mid-batch; one refetch reconciles at the end.

Also fix _dropArchetype, which bailed without an optimistic node on the
staged path, leaving a decky added to an uncommitted LAN invisible until
UPDATE.
This commit is contained in:
2026-06-16 12:59:57 -04:00
parent f18bfee746
commit 4f141c1a54
4 changed files with 194 additions and 63 deletions

View File

@@ -124,10 +124,14 @@ async function _dropArchetype(
{ name, services: dServices, x: nx, y: ny, decky_config: { archetype: drag.slug } }, { name, services: dServices, x: nx, y: ny, decky_config: { archetype: drag.slug } },
overNetId, net.name, overNetId, net.name,
); );
if (dRes.kind !== 'applied') return; // On a live topology the add is STAGED (kind 'enqueued') — no server
const decky = dRes.data; // uuid yet, so use a temp id and render the node optimistically.
// Without this the decky is invisible until UPDATE + refetch. Mirrors
// the pending-net placeholder in _dropNetwork.
const id = dRes.kind === 'applied' ? dRes.data.uuid : `pending-decky-${name}`;
const nodeName = dRes.kind === 'applied' ? dRes.data.name : name;
setNodes((p) => [...p, { setNodes((p) => [...p, {
kind: 'decky', id: decky.uuid, netId: overNetId, name: decky.name, kind: 'decky', id, netId: overNetId, name: nodeName,
archetype: drag.slug, services: dServices, status: 'idle', x: nx, y: ny, archetype: drag.slug, services: dServices, status: 'idle', x: nx, y: ny,
} as DeckyNode]); } as DeckyNode]);
} catch (err) { } catch (err) {
@@ -181,7 +185,7 @@ const MazeNET: React.FC = () => {
const { const {
nets, setNets, nodes, setNodes, edges, setEdges, nets, setNets, nodes, setNodes, edges, setEdges,
topoMeta, services, archetypes, topoMeta, services, archetypes,
loadErr, actionErr, commitErr, clearCommitErr, flashErr, loadErr, actionErr, commitErr, clearCommitErr, flashErr, setRefetchPaused,
deploying, onDeploy, deploying, onDeploy,
streamLive, lastEventAt, streamEnabled, streamLive, lastEventAt, streamEnabled,
refetch, refetch,
@@ -531,9 +535,37 @@ const MazeNET: React.FC = () => {
}, []); }, []);
const canDeploy = topoStatus === 'pending' && nets.length > 0; const canDeploy = topoStatus === 'pending' && nets.length > 0;
const liveTopo = topoStatus === 'active' || topoStatus === 'degraded';
const pendingCount = editor.pendingCount;
const [committing, setCommitting] = useState(false);
const deckyNodes = nodes.filter((n) => n.kind === 'decky'); const deckyNodes = nodes.filter((n) => n.kind === 'decky');
const runningDeckies = deckyNodes.filter((n) => n.status === 'active').length; const runningDeckies = deckyNodes.filter((n) => n.status === 'active').length;
/* UPDATE button: flush the staged changeset as one sequential mutation
batch. SSE refetch is paused so per-mutation applied events don't wipe
still-staged placeholders mid-batch; one refetch reconciles at the end
(success or failure). A failed op throws MutationFailedError, which
flashErr pins as a persistent banner. */
const handleCommit = useCallback(async () => {
if (!topologyId || pendingCount === 0) return;
const n = pendingCount;
setCommitting(true);
setRefetchPaused(true);
try {
const applied = await editor.commitStaged();
pushToast({
text: `UPDATED · ${applied} CHANGE${applied === 1 ? '' : 'S'}`,
tone: 'matrix', icon: 'check-circle',
});
} catch (err) {
flashErr(err, `update failed after ${n - editor.pendingCount}/${n} changes`);
} finally {
setRefetchPaused(false);
await refetch();
setCommitting(false);
}
}, [editor, topologyId, pendingCount, refetch, pushToast, flashErr, setRefetchPaused]);
return ( return (
<div className="maze-page"> <div className="maze-page">
<div className="maze-page-header"> <div className="maze-page-header">
@@ -592,8 +624,13 @@ const MazeNET: React.FC = () => {
{fullscreen ? <Minimize2 size={12} /> : <Maximize2 size={12} />} {fullscreen ? <Minimize2 size={12} /> : <Maximize2 size={12} />}
{fullscreen ? ' EXIT FULL' : ' FULLSCREEN'} {fullscreen ? ' EXIT FULL' : ' FULLSCREEN'}
</button> </button>
<button type="button" className="maze-btn ghost" onClick={refetch} title="Revert local state to server"> <button
<RotateCcw size={12} /> REFRESH type="button"
className="maze-btn ghost"
onClick={() => { editor.discardStaged(); void refetch(); }}
title={pendingCount > 0 ? 'Discard staged changes + reload from server' : 'Reload from server'}
>
<RotateCcw size={12} /> {pendingCount > 0 ? `DISCARD (${pendingCount})` : 'REFRESH'}
</button> </button>
<button <button
type="button" type="button"
@@ -604,15 +641,31 @@ const MazeNET: React.FC = () => {
> >
<Mail size={12} /> PERSONAS <Mail size={12} /> PERSONAS
</button> </button>
<button {liveTopo ? (
type="button" <button
className="maze-btn" type="button"
disabled={!canDeploy || deploying} className="maze-btn"
onClick={onDeploy} disabled={pendingCount === 0 || committing}
title={canDeploy ? 'Deploy topology' : 'Deploy requires pending status + at least one network'} onClick={handleCommit}
> title={pendingCount > 0
<UploadCloud size={12} /> {deploying ? 'DEPLOYING…' : 'DEPLOY'} ? `Apply ${pendingCount} staged change(s) to the live topology`
</button> : 'No staged changes'}
>
<UploadCloud size={12} /> {committing
? 'UPDATING…'
: pendingCount > 0 ? `UPDATE (${pendingCount})` : 'UPDATE'}
</button>
) : (
<button
type="button"
className="maze-btn"
disabled={!canDeploy || deploying}
onClick={onDeploy}
title={canDeploy ? 'Deploy topology' : 'Deploy requires pending status + at least one network'}
>
<UploadCloud size={12} /> {deploying ? 'DEPLOYING…' : 'DEPLOY'}
</button>
)}
</div> </div>
</div> </div>

View File

@@ -1,5 +1,5 @@
// SPDX-License-Identifier: AGPL-3.0-or-later // SPDX-License-Identifier: AGPL-3.0-or-later
import { useCallback, useEffect, useState } from 'react'; import { useCallback, useEffect, useRef, useState } from 'react';
import type { ApiError } from '../../utils/api'; import type { ApiError } from '../../utils/api';
import type { Net, MazeNode, Edge } from './types'; import type { Net, MazeNode, Edge } from './types';
import { DEFAULT_SERVICES, ARCHETYPES as DEFAULT_ARCHETYPES } from './data'; import { DEFAULT_SERVICES, ARCHETYPES as DEFAULT_ARCHETYPES } from './data';
@@ -48,6 +48,10 @@ export interface UseTopologyDataResult {
commitErr: string | null; commitErr: string | null;
clearCommitErr: () => void; clearCommitErr: () => void;
flashErr: (err: unknown, fallback: string) => void; flashErr: (err: unknown, fallback: string) => void;
/** Pause SSE-driven refetch while a commit batch is in flight, so the
* per-mutation ``applied`` events don't wipe the still-staged
* placeholders mid-batch. The committer does one refetch at the end. */
setRefetchPaused: (paused: boolean) => void;
// Deploy // Deploy
deploying: boolean; deploying: boolean;
@@ -87,6 +91,11 @@ export function useTopologyData(
const clearCommitErr = useCallback(() => setCommitErr(null), []); const clearCommitErr = useCallback(() => setCommitErr(null), []);
const refetchPausedRef = useRef(false);
const setRefetchPaused = useCallback((paused: boolean) => {
refetchPausedRef.current = paused;
}, []);
const flashErr = useCallback((err: unknown, fallback: string) => { const flashErr = useCallback((err: unknown, fallback: string) => {
// A failed live mutation is loud + persistent: the queue halted and // A failed live mutation is loud + persistent: the queue halted and
// the topology probably degraded — don't let it vanish in 4s. // the topology probably degraded — don't let it vanish in 4s.
@@ -153,7 +162,8 @@ export function useTopologyData(
if (event.name === 'mutation.applied' if (event.name === 'mutation.applied'
|| event.name === 'mutation.failed' || event.name === 'mutation.failed'
|| event.name === 'status') { || event.name === 'status') {
void refetch(); // Suppressed mid-commit — the committer drives one refetch at the end.
if (!refetchPausedRef.current) void refetch();
} }
// Live service mutations from another tab / admin: optimistically // Live service mutations from another tab / admin: optimistically
// patch local state so the chip set reflects shape without a full // patch local state so the chip set reflects shape without a full
@@ -203,7 +213,7 @@ export function useTopologyData(
edges, setEdges, edges, setEdges,
topoMeta, topoMeta,
services, archetypes, services, archetypes,
loadErr, actionErr, commitErr, clearCommitErr, flashErr, loadErr, actionErr, commitErr, clearCommitErr, flashErr, setRefetchPaused,
deploying, onDeploy, deploying, onDeploy,
streamLive, lastEventAt, streamEnabled, streamLive, lastEventAt, streamEnabled,
refetch, refetch,

View File

@@ -19,27 +19,32 @@ const editorFor = (api: MazeApi, topoVersion = 5) =>
useTopologyEditor({ api, topoStatus: 'active', topoVersion }), useTopologyEditor({ api, topoStatus: 'active', topoVersion }),
); );
describe('useTopologyEditor live mutation queue', () => { describe('useTopologyEditor live staging', () => {
it('serialises concurrent submits and advances expected_version per enqueue', async () => { it('stages live edits without sending; commit flushes them in order with a version cursor', async () => {
const enqueue = vi.fn().mockResolvedValue({ mutation_id: 'm', state: 'pending' }); const enqueue = vi.fn().mockResolvedValue({ mutation_id: 'm', state: 'pending' });
const api = buildApi({ enqueueMutation: enqueue }); const api = buildApi({ enqueueMutation: enqueue });
const { result } = editorFor(api, 5); const { result } = editorFor(api, 5);
// Fire two structural ops in the SAME tick — the pre-fix bug was both
// sending expected_version=5 and the loser 409ing.
await act(async () => { await act(async () => {
await Promise.all([ await result.current.createLan('t', { name: 'a', is_dmz: false, x: 0, y: 0 });
result.current.createLan('t', { name: 'a', is_dmz: false, x: 0, y: 0 }), await result.current.deleteLan('t', 'lid', 'b');
result.current.deleteLan('t', 'lid', 'b'), });
]);
// Staged, not sent.
expect(result.current.pendingCount).toBe(2);
expect(enqueue).not.toHaveBeenCalled();
await act(async () => {
await result.current.commitStaged();
}); });
expect(enqueue).toHaveBeenCalledTimes(2); expect(enqueue).toHaveBeenCalledTimes(2);
expect(enqueue.mock.calls[0][3]).toBe(5); // first uses server version expect(enqueue.mock.calls[0][3]).toBe(5); // first uses server version
expect(enqueue.mock.calls[1][3]).toBe(6); // second advanced by the cursor expect(enqueue.mock.calls[1][3]).toBe(6); // second advanced by the cursor
expect(result.current.pendingCount).toBe(0);
}); });
it('throws MutationFailedError on a failed mutation but keeps the queue alive', async () => { it('commit stops loudly on a failed op, keeps the remainder, and retries cleanly', async () => {
const wait = vi const wait = vi
.fn() .fn()
.mockResolvedValueOnce({ state: 'failed', reason: 'post-apply validation failed: IP_COLLISION' }) .mockResolvedValueOnce({ state: 'failed', reason: 'post-apply validation failed: IP_COLLISION' })
@@ -48,16 +53,36 @@ describe('useTopologyEditor live mutation queue', () => {
const { result } = editorFor(api, 1); const { result } = editorFor(api, 1);
await act(async () => { await act(async () => {
await expect( await result.current.createLan('t', { name: 'a', is_dmz: false, x: 0, y: 0 });
result.current.createLan('t', { name: 'a', is_dmz: false, x: 0, y: 0 }), await result.current.deleteLan('t', 'lid', 'b');
).rejects.toBeInstanceOf(MutationFailedError);
}); });
expect(result.current.pendingCount).toBe(2);
// A failed op must not wedge the chain — the next submit still resolves.
await act(async () => { await act(async () => {
await expect( await expect(result.current.commitStaged()).rejects.toBeInstanceOf(MutationFailedError);
result.current.deleteLan('t', 'lid', 'b'),
).resolves.toEqual({ kind: 'enqueued', mutationId: 'm' });
}); });
// First op failed → nothing applied → both stay staged for retry.
expect(result.current.pendingCount).toBe(2);
// Retry: waitForMutation now resolves 'applied' for both.
await act(async () => {
await result.current.commitStaged();
});
expect(result.current.pendingCount).toBe(0);
});
it('discardStaged drops the batch without sending', async () => {
const enqueue = vi.fn().mockResolvedValue({ mutation_id: 'm', state: 'pending' });
const api = buildApi({ enqueueMutation: enqueue });
const { result } = editorFor(api, 1);
await act(async () => {
await result.current.createLan('t', { name: 'a', is_dmz: false, x: 0, y: 0 });
});
expect(result.current.pendingCount).toBe(1);
act(() => result.current.discardStaged());
expect(result.current.pendingCount).toBe(0);
expect(enqueue).not.toHaveBeenCalled();
}); });
}); });

View File

@@ -16,7 +16,7 @@
* primitive because mutation ops are name-keyed while direct CRUD is * primitive because mutation ops are name-keyed while direct CRUD is
* uuid-keyed. Callers plumb both. * uuid-keyed. Callers plumb both.
*/ */
import { useCallback, useEffect, useMemo, useRef } from 'react'; import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import type { import type {
CreateDeckyBody, CreateDeckyBody,
CreateLanBody, CreateLanBody,
@@ -53,6 +53,12 @@ export type PrimitiveResult<T> =
| { kind: 'applied'; data: T } | { kind: 'applied'; data: T }
| { kind: 'enqueued'; mutationId: string }; | { kind: 'enqueued'; mutationId: string };
interface StagedOp {
topologyId: string;
op: MutationOp;
payload: Record<string, unknown>;
}
export interface UseTopologyEditor { export interface UseTopologyEditor {
createLan(topologyId: string, body: CreateLanBody): Promise<PrimitiveResult<LANRow>>; createLan(topologyId: string, body: CreateLanBody): Promise<PrimitiveResult<LANRow>>;
updateLan( updateLan(
@@ -108,6 +114,18 @@ export interface UseTopologyEditor {
deckyName: string, deckyName: string,
lanName: string, lanName: string,
): Promise<PrimitiveResult<void>>; ): Promise<PrimitiveResult<void>>;
// ── Staging (live topologies only) ───────────────────────────────────
/** Count of staged-but-unsent live edits. 0 on a pending topology. */
pendingCount: number;
/** Flush staged edits as one sequential mutation batch (version-cursored,
* each awaited to a terminal state). Stops on the first failure, keeping
* the failing op + remainder staged, and rethrows MutationFailedError so
* the caller can surface it. Resolves with the count applied. */
commitStaged(): Promise<number>;
/** Drop all staged edits without sending (paired with a refetch to wipe
* their optimistic placeholders). */
discardStaged(): void;
} }
export function useTopologyEditor( export function useTopologyEditor(
@@ -116,19 +134,19 @@ export function useTopologyEditor(
const { api, topoStatus, topoVersion } = opts; const { api, topoStatus, topoVersion } = opts;
const live = topoStatus === 'active' || topoStatus === 'degraded'; const live = topoStatus === 'active' || topoStatus === 'degraded';
// Serialised mutation submission. Two problems this solves, both // Live edits STAGE rather than send. Each live primitive records its
// proven against the live backend: // op here and returns immediately; nothing hits the backend until
// 1. expected_version is bumped at ENQUEUE (not at apply), so two // commitStaged() flushes the batch (the UPDATE button). Staging is what
// ops fired back-to-back race: whichever HTTP request the server // lets the user assemble a coherent changeset (e.g. add a net AND bridge
// sees second carries a stale version and 409s. We chain submits // it) before any of it lands — and it kills the per-action SSE-refetch
// so only one enqueue is ever in flight, in submission order. // race that duplicated optimistic placeholders.
// 2. A failed mutation silently degrades the topology. We await each const [staged, setStaged] = useState<StagedOp[]>([]);
// mutation to a terminal state and throw MutationFailedError on
// 'failed' so the caller can surface it loudly.
const chainRef = useRef<Promise<unknown>>(Promise.resolve());
// Optimistic expected_version cursor. enqueue bumps the server version // Optimistic expected_version cursor. enqueue bumps the server version
// by exactly 1, so we advance locally rather than waiting for a refetch // by exactly 1, so within a commit batch we advance locally rather than
// between queued ops (onReparent fires detach + attach in one handler). // waiting for a refetch between ops. NB: a *failed* mutation still bumps
// the version (the check happens at enqueue), so we advance after enqueue
// regardless of the apply outcome.
const cursorRef = useRef<number>(topoVersion); const cursorRef = useRef<number>(topoVersion);
useEffect(() => { useEffect(() => {
// Adopt a higher server version (a refetch landed, or another editor // Adopt a higher server version (a refetch landed, or another editor
@@ -139,25 +157,45 @@ export function useTopologyEditor(
const submit = useCallback( const submit = useCallback(
(topologyId: string, op: MutationOp, payload: Record<string, unknown>): Promise<string> => { (topologyId: string, op: MutationOp, payload: Record<string, unknown>): Promise<string> => {
const task = chainRef.current.then(async () => { setStaged((prev) => [...prev, { topologyId, op, payload }]);
const expected = cursorRef.current; // Sentinel id — callers thread this into optimistic state but it
const res = await api.enqueueMutation(topologyId, op, payload, expected); // never reaches the backend; the post-commit refetch reconciles to
cursorRef.current = expected + 1; // real ids.
const row = await api.waitForMutation(topologyId, res.mutation_id); return Promise.resolve('staged');
if (row.state === 'failed') {
throw new MutationFailedError(op, row.reason ?? 'unknown reason');
}
return res.mutation_id;
});
// Keep the chain alive after a rejection so one failed op doesn't
// wedge every subsequent submit.
chainRef.current = task.then(() => undefined, () => undefined);
return task;
}, },
[api], [],
); );
return useMemo<UseTopologyEditor>(() => ({ const discardStaged = useCallback(() => setStaged([]), []);
const commitStaged = useCallback(async (): Promise<number> => {
const ops = staged;
if (ops.length === 0) return 0;
let applied = 0;
try {
for (const o of ops) {
const expected = cursorRef.current;
const res = await api.enqueueMutation(o.topologyId, o.op, o.payload, expected);
// Advance even if the apply fails below — enqueue already bumped
// the server version.
cursorRef.current = expected + 1;
const row = await api.waitForMutation(o.topologyId, res.mutation_id);
if (row.state === 'failed') {
throw new MutationFailedError(o.op, row.reason ?? 'unknown reason');
}
applied += 1;
}
setStaged([]);
return applied;
} catch (err) {
// Drop the applied prefix; keep the failing op + the rest so the user
// can fix and retry without re-staging everything.
setStaged(ops.slice(applied));
throw err;
}
}, [staged, api]);
const primitives = useMemo<Omit<UseTopologyEditor, 'pendingCount' | 'commitStaged' | 'discardStaged'>>(() => ({
// ── LAN ──────────────────────────────────────────────────────────── // ── LAN ────────────────────────────────────────────────────────────
async createLan(topologyId, body) { async createLan(topologyId, body) {
if (!live) { if (!live) {
@@ -274,4 +312,9 @@ export function useTopologyEditor(
return { kind: 'enqueued', mutationId }; return { kind: 'enqueued', mutationId };
}, },
}), [api, live, submit]); }), [api, live, submit]);
return useMemo<UseTopologyEditor>(
() => ({ ...primitives, pendingCount: staged.length, commitStaged, discardStaged }),
[primitives, staged.length, commitStaged, discardStaged],
);
} }