fix(web): serialize live topology mutations + surface failures loudly

Live MazeNET edits fired their mutations fire-and-forget: each canvas
action enqueued immediately and never awaited the result. Two failures
followed from that:

- expected_version is bumped at ENQUEUE (not at apply), so two ops fired
  back-to-back raced — the second carried a stale version and 409'd.
  Edits only worked when hand-paced (an SSE refetch landed between them).
- A failed mutation degrades the topology, but the only signal was a 4s
  toast, so the user saw DEGRADED with no cause.

useTopologyEditor now routes every live op through a serialized submit
queue: one enqueue in flight at a time (submission order preserved), an
optimistic expected_version cursor advanced per enqueue so back-to-back
ops (e.g. reparent's detach+attach) don't need a refetch between them,
and each mutation awaited to a terminal state. A 'failed' row throws
MutationFailedError, which the page pins as a persistent UPDATE FAILED
banner instead of a vanishing toast.

Slice 1 of the live-edit rework; stage+UPDATE-button batching and louder
backend materialisation reporting to follow.
This commit is contained in:
2026-06-16 12:44:34 -04:00
parent 5505de782f
commit f18bfee746
5 changed files with 216 additions and 28 deletions

View File

@@ -181,7 +181,7 @@ const MazeNET: React.FC = () => {
const { const {
nets, setNets, nodes, setNodes, edges, setEdges, nets, setNets, nodes, setNodes, edges, setEdges,
topoMeta, services, archetypes, topoMeta, services, archetypes,
loadErr, actionErr, flashErr, loadErr, actionErr, commitErr, clearCommitErr, flashErr,
deploying, onDeploy, deploying, onDeploy,
streamLive, lastEventAt, streamEnabled, streamLive, lastEventAt, streamEnabled,
refetch, refetch,
@@ -560,6 +560,17 @@ const MazeNET: React.FC = () => {
)} )}
{loadErr && <span className="alert-text"> · {loadErr}</span>} {loadErr && <span className="alert-text"> · {loadErr}</span>}
{actionErr && <span className="alert-text"> · {actionErr}</span>} {actionErr && <span className="alert-text"> · {actionErr}</span>}
{commitErr && (
<span className="alert-text"> · UPDATE FAILED: {commitErr}
<button
type="button"
className="maze-btn ghost"
style={{ marginLeft: 6, padding: '0 6px' }}
onClick={clearCommitErr}
title="Dismiss"
></button>
</span>
)}
</div> </div>
</div> </div>
<div className="maze-page-actions"> <div className="maze-page-actions">

View File

@@ -37,6 +37,16 @@ export interface EdgeRow {
forwards_l3: boolean; forwards_l3: boolean;
} }
export type MutationState = 'pending' | 'applying' | 'applied' | 'failed';
export interface MutationRow {
id: string;
topology_id: string;
op: string;
state: MutationState;
reason: string | null;
}
export interface TopologySummary { export interface TopologySummary {
id: string; id: string;
name: string; name: string;
@@ -249,6 +259,16 @@ export interface MazeApi {
expectedVersion?: number, expectedVersion?: number,
) => Promise<EnqueueMutationResponse>; ) => Promise<EnqueueMutationResponse>;
/** Poll the mutation queue until ``mutationId`` reaches a terminal
* state (``applied`` | ``failed``). Resolves with that row; rejects
* only on timeout. A ``failed`` row resolves (not rejects) so callers
* can read ``reason`` — the editor turns it into a loud error. */
waitForMutation: (
topologyId: string,
mutationId: string,
opts?: { timeoutMs?: number; intervalMs?: number },
) => Promise<MutationRow>;
deployTopology: (topologyId: string) => Promise<void>; deployTopology: (topologyId: string) => Promise<void>;
} }
@@ -393,6 +413,36 @@ export function useMazeApi(): MazeApi {
[], [],
); );
const waitForMutation = useCallback(
async (
topologyId: string,
mutationId: string,
opts: { timeoutMs?: number; intervalMs?: number } = {},
): Promise<MutationRow> => {
const { timeoutMs = 30000, intervalMs = 400 } = opts;
const deadline = Date.now() + timeoutMs;
// ponytail: poll the existing list endpoint; the SSE stream also
// carries mutation.applied/failed but wiring a one-shot waiter into
// it couples the editor to the stream hook for no real gain here.
for (;;) {
const { data } = await api.get<MutationRow[]>(
`/topologies/${topologyId}/mutations`,
);
const row = data.find((r) => r.id === mutationId);
if (row && (row.state === 'applied' || row.state === 'failed')) {
return row;
}
if (Date.now() >= deadline) {
throw new Error(
`mutation ${mutationId} did not settle within ${timeoutMs}ms`,
);
}
await new Promise((res) => setTimeout(res, intervalMs));
}
},
[],
);
const enqueueMutation = useCallback( const enqueueMutation = useCallback(
async ( async (
topologyId: string, topologyId: string,
@@ -418,7 +468,7 @@ export function useMazeApi(): MazeApi {
createLan, updateLan, deleteLan, createLan, updateLan, deleteLan,
createDecky, updateDecky, deleteDecky, createDecky, updateDecky, deleteDecky,
attachEdge, detachEdge, attachEdge, detachEdge,
enqueueMutation, enqueueMutation, waitForMutation,
deployTopology, deployTopology,
}), }),
[ [
@@ -427,7 +477,7 @@ export function useMazeApi(): MazeApi {
createLan, updateLan, deleteLan, createLan, updateLan, deleteLan,
createDecky, updateDecky, deleteDecky, createDecky, updateDecky, deleteDecky,
attachEdge, detachEdge, attachEdge, detachEdge,
enqueueMutation, enqueueMutation, waitForMutation,
deployTopology, deployTopology,
], ],
); );

View File

@@ -5,6 +5,7 @@ import type { Net, MazeNode, Edge } from './types';
import { DEFAULT_SERVICES, ARCHETYPES as DEFAULT_ARCHETYPES } from './data'; import { DEFAULT_SERVICES, ARCHETYPES as DEFAULT_ARCHETYPES } from './data';
import type { Archetype, ServiceDef } from './data'; import type { Archetype, ServiceDef } from './data';
import type { MazeApi } from './useMazeApi'; import type { MazeApi } from './useMazeApi';
import { MutationFailedError } from './useTopologyEditor';
import { useTopologyStream, type TopologyStreamEvent } from './useTopologyStream'; import { useTopologyStream, type TopologyStreamEvent } from './useTopologyStream';
export interface TopoMeta { export interface TopoMeta {
@@ -42,6 +43,10 @@ export interface UseTopologyDataResult {
// Errors + transient banners // Errors + transient banners
loadErr: string | null; loadErr: string | null;
actionErr: string | null; actionErr: string | null;
/** Persistent (no auto-clear) error from a failed live mutation —
* the topology likely went degraded. Dismissed via clearCommitErr. */
commitErr: string | null;
clearCommitErr: () => void;
flashErr: (err: unknown, fallback: string) => void; flashErr: (err: unknown, fallback: string) => void;
// Deploy // Deploy
@@ -77,9 +82,18 @@ export function useTopologyData(
const [loadErr, setLoadErr] = useState<string | null>(null); const [loadErr, setLoadErr] = useState<string | null>(null);
const [actionErr, setActionErr] = useState<string | null>(null); const [actionErr, setActionErr] = useState<string | null>(null);
const [commitErr, setCommitErr] = useState<string | null>(null);
const [deploying, setDeploying] = useState(false); const [deploying, setDeploying] = useState(false);
const clearCommitErr = useCallback(() => setCommitErr(null), []);
const flashErr = useCallback((err: unknown, fallback: string) => { const flashErr = useCallback((err: unknown, fallback: string) => {
// A failed live mutation is loud + persistent: the queue halted and
// the topology probably degraded — don't let it vanish in 4s.
if (err instanceof MutationFailedError) {
setCommitErr(err.message);
return;
}
const msg = (err as ApiError)?.response?.data?.detail ?? (err as ApiError)?.message ?? fallback; const msg = (err as ApiError)?.response?.data?.detail ?? (err as ApiError)?.message ?? fallback;
setActionErr(msg); setActionErr(msg);
setTimeout(() => setActionErr(null), 4000); setTimeout(() => setActionErr(null), 4000);
@@ -189,7 +203,7 @@ export function useTopologyData(
edges, setEdges, edges, setEdges,
topoMeta, topoMeta,
services, archetypes, services, archetypes,
loadErr, actionErr, flashErr, loadErr, actionErr, commitErr, clearCommitErr, flashErr,
deploying, onDeploy, deploying, onDeploy,
streamLive, lastEventAt, streamEnabled, streamLive, lastEventAt, streamEnabled,
refetch, refetch,

View File

@@ -0,0 +1,63 @@
// SPDX-License-Identifier: AGPL-3.0-or-later
/**
* @vitest-environment jsdom
*/
import { describe, it, expect, vi } from 'vitest';
import { act, renderHook } from '@testing-library/react';
import { useTopologyEditor, MutationFailedError } from './useTopologyEditor';
import type { MazeApi } from './useMazeApi';
const buildApi = (overrides: Partial<MazeApi> = {}): MazeApi => ({
enqueueMutation: vi.fn().mockResolvedValue({ mutation_id: 'm', state: 'pending' }),
waitForMutation: vi.fn().mockResolvedValue({ state: 'applied', reason: null }),
...overrides,
} as unknown as MazeApi);
const editorFor = (api: MazeApi, topoVersion = 5) =>
renderHook(() =>
useTopologyEditor({ api, topoStatus: 'active', topoVersion }),
);
describe('useTopologyEditor live mutation queue', () => {
it('serialises concurrent submits and advances expected_version per enqueue', async () => {
const enqueue = vi.fn().mockResolvedValue({ mutation_id: 'm', state: 'pending' });
const api = buildApi({ enqueueMutation: enqueue });
const { result } = editorFor(api, 5);
// Fire two structural ops in the SAME tick — the pre-fix bug was both
// sending expected_version=5 and the loser 409ing.
await act(async () => {
await Promise.all([
result.current.createLan('t', { name: 'a', is_dmz: false, x: 0, y: 0 }),
result.current.deleteLan('t', 'lid', 'b'),
]);
});
expect(enqueue).toHaveBeenCalledTimes(2);
expect(enqueue.mock.calls[0][3]).toBe(5); // first uses server version
expect(enqueue.mock.calls[1][3]).toBe(6); // second advanced by the cursor
});
it('throws MutationFailedError on a failed mutation but keeps the queue alive', async () => {
const wait = vi
.fn()
.mockResolvedValueOnce({ state: 'failed', reason: 'post-apply validation failed: IP_COLLISION' })
.mockResolvedValue({ state: 'applied', reason: null });
const api = buildApi({ waitForMutation: wait });
const { result } = editorFor(api, 1);
await act(async () => {
await expect(
result.current.createLan('t', { name: 'a', is_dmz: false, x: 0, y: 0 }),
).rejects.toBeInstanceOf(MutationFailedError);
});
// A failed op must not wedge the chain — the next submit still resolves.
await act(async () => {
await expect(
result.current.deleteLan('t', 'lid', 'b'),
).resolves.toEqual({ kind: 'enqueued', mutationId: 'm' });
});
});
});

View File

@@ -16,7 +16,7 @@
* primitive because mutation ops are name-keyed while direct CRUD is * primitive because mutation ops are name-keyed while direct CRUD is
* uuid-keyed. Callers plumb both. * uuid-keyed. Callers plumb both.
*/ */
import { useMemo } from 'react'; import { useCallback, useEffect, useMemo, useRef } from 'react';
import type { import type {
CreateDeckyBody, CreateDeckyBody,
CreateLanBody, CreateLanBody,
@@ -24,8 +24,23 @@ import type {
EdgeRow, EdgeRow,
LANRow, LANRow,
MazeApi, MazeApi,
MutationOp,
} from './useMazeApi'; } from './useMazeApi';
/** Thrown by a live primitive when its mutation settles as ``failed``.
* Carries the op + backend reason so the page can surface a loud,
* persistent error instead of a transient toast. */
export class MutationFailedError extends Error {
readonly op: string;
readonly reason: string;
constructor(op: string, reason: string) {
super(`mutation ${op} failed: ${reason}`);
this.name = 'MutationFailedError';
this.op = op;
this.reason = reason;
}
}
export interface UseTopologyEditorOptions { export interface UseTopologyEditorOptions {
api: MazeApi; api: MazeApi;
/** Current topology status from :func:`getTopology`. */ /** Current topology status from :func:`getTopology`. */
@@ -101,6 +116,47 @@ export function useTopologyEditor(
const { api, topoStatus, topoVersion } = opts; const { api, topoStatus, topoVersion } = opts;
const live = topoStatus === 'active' || topoStatus === 'degraded'; const live = topoStatus === 'active' || topoStatus === 'degraded';
// Serialised mutation submission. Two problems this solves, both
// proven against the live backend:
// 1. expected_version is bumped at ENQUEUE (not at apply), so two
// ops fired back-to-back race: whichever HTTP request the server
// sees second carries a stale version and 409s. We chain submits
// so only one enqueue is ever in flight, in submission order.
// 2. A failed mutation silently degrades the topology. We await each
// mutation to a terminal state and throw MutationFailedError on
// 'failed' so the caller can surface it loudly.
const chainRef = useRef<Promise<unknown>>(Promise.resolve());
// Optimistic expected_version cursor. enqueue bumps the server version
// by exactly 1, so we advance locally rather than waiting for a refetch
// between queued ops (onReparent fires detach + attach in one handler).
const cursorRef = useRef<number>(topoVersion);
useEffect(() => {
// Adopt a higher server version (a refetch landed, or another editor
// advanced it) but never walk the cursor backwards under an in-flight
// batch that has already advanced past the last-seen server version.
if (topoVersion > cursorRef.current) cursorRef.current = topoVersion;
}, [topoVersion]);
const submit = useCallback(
(topologyId: string, op: MutationOp, payload: Record<string, unknown>): Promise<string> => {
const task = chainRef.current.then(async () => {
const expected = cursorRef.current;
const res = await api.enqueueMutation(topologyId, op, payload, expected);
cursorRef.current = expected + 1;
const row = await api.waitForMutation(topologyId, res.mutation_id);
if (row.state === 'failed') {
throw new MutationFailedError(op, row.reason ?? 'unknown reason');
}
return res.mutation_id;
});
// Keep the chain alive after a rejection so one failed op doesn't
// wedge every subsequent submit.
chainRef.current = task.then(() => undefined, () => undefined);
return task;
},
[api],
);
return useMemo<UseTopologyEditor>(() => ({ return useMemo<UseTopologyEditor>(() => ({
// ── LAN ──────────────────────────────────────────────────────────── // ── LAN ────────────────────────────────────────────────────────────
async createLan(topologyId, body) { async createLan(topologyId, body) {
@@ -114,8 +170,8 @@ export function useTopologyEditor(
if (body.is_dmz !== undefined) payload.is_dmz = body.is_dmz; if (body.is_dmz !== undefined) payload.is_dmz = body.is_dmz;
if (body.x !== undefined) payload.x = body.x; if (body.x !== undefined) payload.x = body.x;
if (body.y !== undefined) payload.y = body.y; if (body.y !== undefined) payload.y = body.y;
const res = await api.enqueueMutation(topologyId, 'add_lan', payload, topoVersion); const mutationId = await submit(topologyId, 'add_lan', payload);
return { kind: 'enqueued', mutationId: res.mutation_id }; return { kind: 'enqueued', mutationId };
}, },
async updateLan(topologyId, lanId, lanName, patch) { async updateLan(topologyId, lanId, lanName, patch) {
if (!live) { if (!live) {
@@ -129,18 +185,16 @@ export function useTopologyEditor(
else patchFields[k] = v; else patchFields[k] = v;
} }
if (Object.keys(patchFields).length > 0) payload.patch = patchFields; if (Object.keys(patchFields).length > 0) payload.patch = patchFields;
const res = await api.enqueueMutation(topologyId, 'update_lan', payload, topoVersion); const mutationId = await submit(topologyId, 'update_lan', payload);
return { kind: 'enqueued', mutationId: res.mutation_id }; return { kind: 'enqueued', mutationId };
}, },
async deleteLan(topologyId, lanId, lanName) { async deleteLan(topologyId, lanId, lanName) {
if (!live) { if (!live) {
await api.deleteLan(topologyId, lanId); await api.deleteLan(topologyId, lanId);
return { kind: 'applied', data: undefined }; return { kind: 'applied', data: undefined };
} }
const res = await api.enqueueMutation( const mutationId = await submit(topologyId, 'remove_lan', { name: lanName });
topologyId, 'remove_lan', { name: lanName }, topoVersion, return { kind: 'enqueued', mutationId };
);
return { kind: 'enqueued', mutationId: res.mutation_id };
}, },
// ── Decky ────────────────────────────────────────────────────────── // ── Decky ──────────────────────────────────────────────────────────
@@ -172,8 +226,8 @@ export function useTopologyEditor(
if (fwd !== undefined) payload.forwards_l3 = fwd; if (fwd !== undefined) payload.forwards_l3 = fwd;
if (body.x !== undefined) payload.x = body.x; if (body.x !== undefined) payload.x = body.x;
if (body.y !== undefined) payload.y = body.y; if (body.y !== undefined) payload.y = body.y;
const res = await api.enqueueMutation(topologyId, 'add_decky', payload, topoVersion); const mutationId = await submit(topologyId, 'add_decky', payload);
return { kind: 'enqueued', mutationId: res.mutation_id }; return { kind: 'enqueued', mutationId };
}, },
async updateDecky(topologyId, uuid, deckyName, patch, extras) { async updateDecky(topologyId, uuid, deckyName, patch, extras) {
if (!live) { if (!live) {
@@ -188,18 +242,16 @@ export function useTopologyEditor(
} }
if (Object.keys(patchFields).length > 0) payload.patch = patchFields; if (Object.keys(patchFields).length > 0) payload.patch = patchFields;
if (extras?.force) payload.force = true; if (extras?.force) payload.force = true;
const res = await api.enqueueMutation(topologyId, 'update_decky', payload, topoVersion); const mutationId = await submit(topologyId, 'update_decky', payload);
return { kind: 'enqueued', mutationId: res.mutation_id }; return { kind: 'enqueued', mutationId };
}, },
async deleteDecky(topologyId, uuid, deckyName) { async deleteDecky(topologyId, uuid, deckyName) {
if (!live) { if (!live) {
await api.deleteDecky(topologyId, uuid); await api.deleteDecky(topologyId, uuid);
return { kind: 'applied', data: undefined }; return { kind: 'applied', data: undefined };
} }
const res = await api.enqueueMutation( const mutationId = await submit(topologyId, 'remove_decky', { decky: deckyName });
topologyId, 'remove_decky', { decky: deckyName }, topoVersion, return { kind: 'enqueued', mutationId };
);
return { kind: 'enqueued', mutationId: res.mutation_id };
}, },
// ── Edges ────────────────────────────────────────────────────────── // ── Edges ──────────────────────────────────────────────────────────
@@ -210,18 +262,16 @@ export function useTopologyEditor(
} }
const payload: Record<string, unknown> = { decky: deckyName, lan: lanName }; const payload: Record<string, unknown> = { decky: deckyName, lan: lanName };
if (body.forwards_l3 !== undefined) payload.forwards_l3 = body.forwards_l3; if (body.forwards_l3 !== undefined) payload.forwards_l3 = body.forwards_l3;
const res = await api.enqueueMutation(topologyId, 'attach_decky', payload, topoVersion); const mutationId = await submit(topologyId, 'attach_decky', payload);
return { kind: 'enqueued', mutationId: res.mutation_id }; return { kind: 'enqueued', mutationId };
}, },
async detachEdge(topologyId, edgeId, deckyName, lanName) { async detachEdge(topologyId, edgeId, deckyName, lanName) {
if (!live) { if (!live) {
await api.detachEdge(topologyId, edgeId); await api.detachEdge(topologyId, edgeId);
return { kind: 'applied', data: undefined }; return { kind: 'applied', data: undefined };
} }
const res = await api.enqueueMutation( const mutationId = await submit(topologyId, 'detach_decky', { decky: deckyName, lan: lanName });
topologyId, 'detach_decky', { decky: deckyName, lan: lanName }, topoVersion, return { kind: 'enqueued', mutationId };
);
return { kind: 'enqueued', mutationId: res.mutation_id };
}, },
}), [api, live, topoVersion]); }), [api, live, submit]);
} }