fix(web): serialize live topology mutations + surface failures loudly

Live MazeNET edits fired their mutations fire-and-forget: each canvas
action enqueued immediately and never awaited the result. Two failures
followed from that:

- expected_version is bumped at ENQUEUE (not at apply), so two ops fired
  back-to-back raced — the second carried a stale version and 409'd.
  Edits only worked when hand-paced (an SSE refetch landed between them).
- A failed mutation degrades the topology, but the only signal was a 4s
  toast, so the user saw DEGRADED with no cause.

useTopologyEditor now routes every live op through a serialized submit
queue: one enqueue in flight at a time (submission order preserved), an
optimistic expected_version cursor advanced per enqueue so back-to-back
ops (e.g. reparent's detach+attach) don't need a refetch between them,
and each mutation awaited to a terminal state. A 'failed' row throws
MutationFailedError, which the page pins as a persistent UPDATE FAILED
banner instead of a vanishing toast.

Slice 1 of the live-edit rework; stage+UPDATE-button batching and louder
backend materialisation reporting to follow.
This commit is contained in:
2026-06-16 12:44:34 -04:00
parent 5505de782f
commit f18bfee746
5 changed files with 216 additions and 28 deletions

View File

@@ -181,7 +181,7 @@ const MazeNET: React.FC = () => {
const {
nets, setNets, nodes, setNodes, edges, setEdges,
topoMeta, services, archetypes,
loadErr, actionErr, flashErr,
loadErr, actionErr, commitErr, clearCommitErr, flashErr,
deploying, onDeploy,
streamLive, lastEventAt, streamEnabled,
refetch,
@@ -560,6 +560,17 @@ const MazeNET: React.FC = () => {
)}
{loadErr && <span className="alert-text"> · {loadErr}</span>}
{actionErr && <span className="alert-text"> · {actionErr}</span>}
{commitErr && (
<span className="alert-text"> · UPDATE FAILED: {commitErr}
<button
type="button"
className="maze-btn ghost"
style={{ marginLeft: 6, padding: '0 6px' }}
onClick={clearCommitErr}
title="Dismiss"
></button>
</span>
)}
</div>
</div>
<div className="maze-page-actions">

View File

@@ -37,6 +37,16 @@ export interface EdgeRow {
forwards_l3: boolean;
}
export type MutationState = 'pending' | 'applying' | 'applied' | 'failed';
export interface MutationRow {
id: string;
topology_id: string;
op: string;
state: MutationState;
reason: string | null;
}
export interface TopologySummary {
id: string;
name: string;
@@ -249,6 +259,16 @@ export interface MazeApi {
expectedVersion?: number,
) => Promise<EnqueueMutationResponse>;
/** Poll the mutation queue until ``mutationId`` reaches a terminal
* state (``applied`` | ``failed``). Resolves with that row; rejects
* only on timeout. A ``failed`` row resolves (not rejects) so callers
* can read ``reason`` — the editor turns it into a loud error. */
waitForMutation: (
topologyId: string,
mutationId: string,
opts?: { timeoutMs?: number; intervalMs?: number },
) => Promise<MutationRow>;
deployTopology: (topologyId: string) => Promise<void>;
}
@@ -393,6 +413,36 @@ export function useMazeApi(): MazeApi {
[],
);
const waitForMutation = useCallback(
async (
topologyId: string,
mutationId: string,
opts: { timeoutMs?: number; intervalMs?: number } = {},
): Promise<MutationRow> => {
const { timeoutMs = 30000, intervalMs = 400 } = opts;
const deadline = Date.now() + timeoutMs;
// ponytail: poll the existing list endpoint; the SSE stream also
// carries mutation.applied/failed but wiring a one-shot waiter into
// it couples the editor to the stream hook for no real gain here.
for (;;) {
const { data } = await api.get<MutationRow[]>(
`/topologies/${topologyId}/mutations`,
);
const row = data.find((r) => r.id === mutationId);
if (row && (row.state === 'applied' || row.state === 'failed')) {
return row;
}
if (Date.now() >= deadline) {
throw new Error(
`mutation ${mutationId} did not settle within ${timeoutMs}ms`,
);
}
await new Promise((res) => setTimeout(res, intervalMs));
}
},
[],
);
const enqueueMutation = useCallback(
async (
topologyId: string,
@@ -418,7 +468,7 @@ export function useMazeApi(): MazeApi {
createLan, updateLan, deleteLan,
createDecky, updateDecky, deleteDecky,
attachEdge, detachEdge,
enqueueMutation,
enqueueMutation, waitForMutation,
deployTopology,
}),
[
@@ -427,7 +477,7 @@ export function useMazeApi(): MazeApi {
createLan, updateLan, deleteLan,
createDecky, updateDecky, deleteDecky,
attachEdge, detachEdge,
enqueueMutation,
enqueueMutation, waitForMutation,
deployTopology,
],
);

View File

@@ -5,6 +5,7 @@ import type { Net, MazeNode, Edge } from './types';
import { DEFAULT_SERVICES, ARCHETYPES as DEFAULT_ARCHETYPES } from './data';
import type { Archetype, ServiceDef } from './data';
import type { MazeApi } from './useMazeApi';
import { MutationFailedError } from './useTopologyEditor';
import { useTopologyStream, type TopologyStreamEvent } from './useTopologyStream';
export interface TopoMeta {
@@ -42,6 +43,10 @@ export interface UseTopologyDataResult {
// Errors + transient banners
loadErr: string | null;
actionErr: string | null;
/** Persistent (no auto-clear) error from a failed live mutation —
* the topology likely went degraded. Dismissed via clearCommitErr. */
commitErr: string | null;
clearCommitErr: () => void;
flashErr: (err: unknown, fallback: string) => void;
// Deploy
@@ -77,9 +82,18 @@ export function useTopologyData(
const [loadErr, setLoadErr] = useState<string | null>(null);
const [actionErr, setActionErr] = useState<string | null>(null);
const [commitErr, setCommitErr] = useState<string | null>(null);
const [deploying, setDeploying] = useState(false);
const clearCommitErr = useCallback(() => setCommitErr(null), []);
const flashErr = useCallback((err: unknown, fallback: string) => {
// A failed live mutation is loud + persistent: the queue halted and
// the topology probably degraded — don't let it vanish in 4s.
if (err instanceof MutationFailedError) {
setCommitErr(err.message);
return;
}
const msg = (err as ApiError)?.response?.data?.detail ?? (err as ApiError)?.message ?? fallback;
setActionErr(msg);
setTimeout(() => setActionErr(null), 4000);
@@ -189,7 +203,7 @@ export function useTopologyData(
edges, setEdges,
topoMeta,
services, archetypes,
loadErr, actionErr, flashErr,
loadErr, actionErr, commitErr, clearCommitErr, flashErr,
deploying, onDeploy,
streamLive, lastEventAt, streamEnabled,
refetch,

View File

@@ -0,0 +1,63 @@
// SPDX-License-Identifier: AGPL-3.0-or-later
/**
* @vitest-environment jsdom
*/
import { describe, it, expect, vi } from 'vitest';
import { act, renderHook } from '@testing-library/react';
import { useTopologyEditor, MutationFailedError } from './useTopologyEditor';
import type { MazeApi } from './useMazeApi';
const buildApi = (overrides: Partial<MazeApi> = {}): MazeApi => ({
enqueueMutation: vi.fn().mockResolvedValue({ mutation_id: 'm', state: 'pending' }),
waitForMutation: vi.fn().mockResolvedValue({ state: 'applied', reason: null }),
...overrides,
} as unknown as MazeApi);
const editorFor = (api: MazeApi, topoVersion = 5) =>
renderHook(() =>
useTopologyEditor({ api, topoStatus: 'active', topoVersion }),
);
describe('useTopologyEditor live mutation queue', () => {
it('serialises concurrent submits and advances expected_version per enqueue', async () => {
const enqueue = vi.fn().mockResolvedValue({ mutation_id: 'm', state: 'pending' });
const api = buildApi({ enqueueMutation: enqueue });
const { result } = editorFor(api, 5);
// Fire two structural ops in the SAME tick — the pre-fix bug was both
// sending expected_version=5 and the loser 409ing.
await act(async () => {
await Promise.all([
result.current.createLan('t', { name: 'a', is_dmz: false, x: 0, y: 0 }),
result.current.deleteLan('t', 'lid', 'b'),
]);
});
expect(enqueue).toHaveBeenCalledTimes(2);
expect(enqueue.mock.calls[0][3]).toBe(5); // first uses server version
expect(enqueue.mock.calls[1][3]).toBe(6); // second advanced by the cursor
});
it('throws MutationFailedError on a failed mutation but keeps the queue alive', async () => {
const wait = vi
.fn()
.mockResolvedValueOnce({ state: 'failed', reason: 'post-apply validation failed: IP_COLLISION' })
.mockResolvedValue({ state: 'applied', reason: null });
const api = buildApi({ waitForMutation: wait });
const { result } = editorFor(api, 1);
await act(async () => {
await expect(
result.current.createLan('t', { name: 'a', is_dmz: false, x: 0, y: 0 }),
).rejects.toBeInstanceOf(MutationFailedError);
});
// A failed op must not wedge the chain — the next submit still resolves.
await act(async () => {
await expect(
result.current.deleteLan('t', 'lid', 'b'),
).resolves.toEqual({ kind: 'enqueued', mutationId: 'm' });
});
});
});

View File

@@ -16,7 +16,7 @@
* primitive because mutation ops are name-keyed while direct CRUD is
* uuid-keyed. Callers plumb both.
*/
import { useMemo } from 'react';
import { useCallback, useEffect, useMemo, useRef } from 'react';
import type {
CreateDeckyBody,
CreateLanBody,
@@ -24,8 +24,23 @@ import type {
EdgeRow,
LANRow,
MazeApi,
MutationOp,
} from './useMazeApi';
/** Thrown by a live primitive when its mutation settles as ``failed``.
* Carries the op + backend reason so the page can surface a loud,
* persistent error instead of a transient toast. */
export class MutationFailedError extends Error {
readonly op: string;
readonly reason: string;
constructor(op: string, reason: string) {
super(`mutation ${op} failed: ${reason}`);
this.name = 'MutationFailedError';
this.op = op;
this.reason = reason;
}
}
export interface UseTopologyEditorOptions {
api: MazeApi;
/** Current topology status from :func:`getTopology`. */
@@ -101,6 +116,47 @@ export function useTopologyEditor(
const { api, topoStatus, topoVersion } = opts;
const live = topoStatus === 'active' || topoStatus === 'degraded';
// Serialised mutation submission. Two problems this solves, both
// proven against the live backend:
// 1. expected_version is bumped at ENQUEUE (not at apply), so two
// ops fired back-to-back race: whichever HTTP request the server
// sees second carries a stale version and 409s. We chain submits
// so only one enqueue is ever in flight, in submission order.
// 2. A failed mutation silently degrades the topology. We await each
// mutation to a terminal state and throw MutationFailedError on
// 'failed' so the caller can surface it loudly.
const chainRef = useRef<Promise<unknown>>(Promise.resolve());
// Optimistic expected_version cursor. enqueue bumps the server version
// by exactly 1, so we advance locally rather than waiting for a refetch
// between queued ops (onReparent fires detach + attach in one handler).
const cursorRef = useRef<number>(topoVersion);
useEffect(() => {
// Adopt a higher server version (a refetch landed, or another editor
// advanced it) but never walk the cursor backwards under an in-flight
// batch that has already advanced past the last-seen server version.
if (topoVersion > cursorRef.current) cursorRef.current = topoVersion;
}, [topoVersion]);
const submit = useCallback(
(topologyId: string, op: MutationOp, payload: Record<string, unknown>): Promise<string> => {
const task = chainRef.current.then(async () => {
const expected = cursorRef.current;
const res = await api.enqueueMutation(topologyId, op, payload, expected);
cursorRef.current = expected + 1;
const row = await api.waitForMutation(topologyId, res.mutation_id);
if (row.state === 'failed') {
throw new MutationFailedError(op, row.reason ?? 'unknown reason');
}
return res.mutation_id;
});
// Keep the chain alive after a rejection so one failed op doesn't
// wedge every subsequent submit.
chainRef.current = task.then(() => undefined, () => undefined);
return task;
},
[api],
);
return useMemo<UseTopologyEditor>(() => ({
// ── LAN ────────────────────────────────────────────────────────────
async createLan(topologyId, body) {
@@ -114,8 +170,8 @@ export function useTopologyEditor(
if (body.is_dmz !== undefined) payload.is_dmz = body.is_dmz;
if (body.x !== undefined) payload.x = body.x;
if (body.y !== undefined) payload.y = body.y;
const res = await api.enqueueMutation(topologyId, 'add_lan', payload, topoVersion);
return { kind: 'enqueued', mutationId: res.mutation_id };
const mutationId = await submit(topologyId, 'add_lan', payload);
return { kind: 'enqueued', mutationId };
},
async updateLan(topologyId, lanId, lanName, patch) {
if (!live) {
@@ -129,18 +185,16 @@ export function useTopologyEditor(
else patchFields[k] = v;
}
if (Object.keys(patchFields).length > 0) payload.patch = patchFields;
const res = await api.enqueueMutation(topologyId, 'update_lan', payload, topoVersion);
return { kind: 'enqueued', mutationId: res.mutation_id };
const mutationId = await submit(topologyId, 'update_lan', payload);
return { kind: 'enqueued', mutationId };
},
async deleteLan(topologyId, lanId, lanName) {
if (!live) {
await api.deleteLan(topologyId, lanId);
return { kind: 'applied', data: undefined };
}
const res = await api.enqueueMutation(
topologyId, 'remove_lan', { name: lanName }, topoVersion,
);
return { kind: 'enqueued', mutationId: res.mutation_id };
const mutationId = await submit(topologyId, 'remove_lan', { name: lanName });
return { kind: 'enqueued', mutationId };
},
// ── Decky ──────────────────────────────────────────────────────────
@@ -172,8 +226,8 @@ export function useTopologyEditor(
if (fwd !== undefined) payload.forwards_l3 = fwd;
if (body.x !== undefined) payload.x = body.x;
if (body.y !== undefined) payload.y = body.y;
const res = await api.enqueueMutation(topologyId, 'add_decky', payload, topoVersion);
return { kind: 'enqueued', mutationId: res.mutation_id };
const mutationId = await submit(topologyId, 'add_decky', payload);
return { kind: 'enqueued', mutationId };
},
async updateDecky(topologyId, uuid, deckyName, patch, extras) {
if (!live) {
@@ -188,18 +242,16 @@ export function useTopologyEditor(
}
if (Object.keys(patchFields).length > 0) payload.patch = patchFields;
if (extras?.force) payload.force = true;
const res = await api.enqueueMutation(topologyId, 'update_decky', payload, topoVersion);
return { kind: 'enqueued', mutationId: res.mutation_id };
const mutationId = await submit(topologyId, 'update_decky', payload);
return { kind: 'enqueued', mutationId };
},
async deleteDecky(topologyId, uuid, deckyName) {
if (!live) {
await api.deleteDecky(topologyId, uuid);
return { kind: 'applied', data: undefined };
}
const res = await api.enqueueMutation(
topologyId, 'remove_decky', { decky: deckyName }, topoVersion,
);
return { kind: 'enqueued', mutationId: res.mutation_id };
const mutationId = await submit(topologyId, 'remove_decky', { decky: deckyName });
return { kind: 'enqueued', mutationId };
},
// ── Edges ──────────────────────────────────────────────────────────
@@ -210,18 +262,16 @@ export function useTopologyEditor(
}
const payload: Record<string, unknown> = { decky: deckyName, lan: lanName };
if (body.forwards_l3 !== undefined) payload.forwards_l3 = body.forwards_l3;
const res = await api.enqueueMutation(topologyId, 'attach_decky', payload, topoVersion);
return { kind: 'enqueued', mutationId: res.mutation_id };
const mutationId = await submit(topologyId, 'attach_decky', payload);
return { kind: 'enqueued', mutationId };
},
async detachEdge(topologyId, edgeId, deckyName, lanName) {
if (!live) {
await api.detachEdge(topologyId, edgeId);
return { kind: 'applied', data: undefined };
}
const res = await api.enqueueMutation(
topologyId, 'detach_decky', { decky: deckyName, lan: lanName }, topoVersion,
);
return { kind: 'enqueued', mutationId: res.mutation_id };
const mutationId = await submit(topologyId, 'detach_decky', { decky: deckyName, lan: lanName });
return { kind: 'enqueued', mutationId };
},
}), [api, live, topoVersion]);
}), [api, live, submit]);
}