fix(web): serialize live topology mutations + surface failures loudly
Live MazeNET edits fired their mutations fire-and-forget: each canvas action enqueued immediately and never awaited the result. Two failures followed from that: - expected_version is bumped at ENQUEUE (not at apply), so two ops fired back-to-back raced — the second carried a stale version and 409'd. Edits only worked when hand-paced (an SSE refetch landed between them). - A failed mutation degrades the topology, but the only signal was a 4s toast, so the user saw DEGRADED with no cause. useTopologyEditor now routes every live op through a serialized submit queue: one enqueue in flight at a time (submission order preserved), an optimistic expected_version cursor advanced per enqueue so back-to-back ops (e.g. reparent's detach+attach) don't need a refetch between them, and each mutation awaited to a terminal state. A 'failed' row throws MutationFailedError, which the page pins as a persistent UPDATE FAILED banner instead of a vanishing toast. Slice 1 of the live-edit rework; stage+UPDATE-button batching and louder backend materialisation reporting to follow.
This commit is contained in:
@@ -181,7 +181,7 @@ const MazeNET: React.FC = () => {
|
||||
const {
|
||||
nets, setNets, nodes, setNodes, edges, setEdges,
|
||||
topoMeta, services, archetypes,
|
||||
loadErr, actionErr, flashErr,
|
||||
loadErr, actionErr, commitErr, clearCommitErr, flashErr,
|
||||
deploying, onDeploy,
|
||||
streamLive, lastEventAt, streamEnabled,
|
||||
refetch,
|
||||
@@ -560,6 +560,17 @@ const MazeNET: React.FC = () => {
|
||||
)}
|
||||
{loadErr && <span className="alert-text"> · {loadErr}</span>}
|
||||
{actionErr && <span className="alert-text"> · {actionErr}</span>}
|
||||
{commitErr && (
|
||||
<span className="alert-text"> · UPDATE FAILED: {commitErr}
|
||||
<button
|
||||
type="button"
|
||||
className="maze-btn ghost"
|
||||
style={{ marginLeft: 6, padding: '0 6px' }}
|
||||
onClick={clearCommitErr}
|
||||
title="Dismiss"
|
||||
>✕</button>
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="maze-page-actions">
|
||||
|
||||
@@ -37,6 +37,16 @@ export interface EdgeRow {
|
||||
forwards_l3: boolean;
|
||||
}
|
||||
|
||||
export type MutationState = 'pending' | 'applying' | 'applied' | 'failed';
|
||||
|
||||
export interface MutationRow {
|
||||
id: string;
|
||||
topology_id: string;
|
||||
op: string;
|
||||
state: MutationState;
|
||||
reason: string | null;
|
||||
}
|
||||
|
||||
export interface TopologySummary {
|
||||
id: string;
|
||||
name: string;
|
||||
@@ -249,6 +259,16 @@ export interface MazeApi {
|
||||
expectedVersion?: number,
|
||||
) => Promise<EnqueueMutationResponse>;
|
||||
|
||||
/** Poll the mutation queue until ``mutationId`` reaches a terminal
|
||||
* state (``applied`` | ``failed``). Resolves with that row; rejects
|
||||
* only on timeout. A ``failed`` row resolves (not rejects) so callers
|
||||
* can read ``reason`` — the editor turns it into a loud error. */
|
||||
waitForMutation: (
|
||||
topologyId: string,
|
||||
mutationId: string,
|
||||
opts?: { timeoutMs?: number; intervalMs?: number },
|
||||
) => Promise<MutationRow>;
|
||||
|
||||
deployTopology: (topologyId: string) => Promise<void>;
|
||||
}
|
||||
|
||||
@@ -393,6 +413,36 @@ export function useMazeApi(): MazeApi {
|
||||
[],
|
||||
);
|
||||
|
||||
const waitForMutation = useCallback(
|
||||
async (
|
||||
topologyId: string,
|
||||
mutationId: string,
|
||||
opts: { timeoutMs?: number; intervalMs?: number } = {},
|
||||
): Promise<MutationRow> => {
|
||||
const { timeoutMs = 30000, intervalMs = 400 } = opts;
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
// ponytail: poll the existing list endpoint; the SSE stream also
|
||||
// carries mutation.applied/failed but wiring a one-shot waiter into
|
||||
// it couples the editor to the stream hook for no real gain here.
|
||||
for (;;) {
|
||||
const { data } = await api.get<MutationRow[]>(
|
||||
`/topologies/${topologyId}/mutations`,
|
||||
);
|
||||
const row = data.find((r) => r.id === mutationId);
|
||||
if (row && (row.state === 'applied' || row.state === 'failed')) {
|
||||
return row;
|
||||
}
|
||||
if (Date.now() >= deadline) {
|
||||
throw new Error(
|
||||
`mutation ${mutationId} did not settle within ${timeoutMs}ms`,
|
||||
);
|
||||
}
|
||||
await new Promise((res) => setTimeout(res, intervalMs));
|
||||
}
|
||||
},
|
||||
[],
|
||||
);
|
||||
|
||||
const enqueueMutation = useCallback(
|
||||
async (
|
||||
topologyId: string,
|
||||
@@ -418,7 +468,7 @@ export function useMazeApi(): MazeApi {
|
||||
createLan, updateLan, deleteLan,
|
||||
createDecky, updateDecky, deleteDecky,
|
||||
attachEdge, detachEdge,
|
||||
enqueueMutation,
|
||||
enqueueMutation, waitForMutation,
|
||||
deployTopology,
|
||||
}),
|
||||
[
|
||||
@@ -427,7 +477,7 @@ export function useMazeApi(): MazeApi {
|
||||
createLan, updateLan, deleteLan,
|
||||
createDecky, updateDecky, deleteDecky,
|
||||
attachEdge, detachEdge,
|
||||
enqueueMutation,
|
||||
enqueueMutation, waitForMutation,
|
||||
deployTopology,
|
||||
],
|
||||
);
|
||||
|
||||
@@ -5,6 +5,7 @@ import type { Net, MazeNode, Edge } from './types';
|
||||
import { DEFAULT_SERVICES, ARCHETYPES as DEFAULT_ARCHETYPES } from './data';
|
||||
import type { Archetype, ServiceDef } from './data';
|
||||
import type { MazeApi } from './useMazeApi';
|
||||
import { MutationFailedError } from './useTopologyEditor';
|
||||
import { useTopologyStream, type TopologyStreamEvent } from './useTopologyStream';
|
||||
|
||||
export interface TopoMeta {
|
||||
@@ -42,6 +43,10 @@ export interface UseTopologyDataResult {
|
||||
// Errors + transient banners
|
||||
loadErr: string | null;
|
||||
actionErr: string | null;
|
||||
/** Persistent (no auto-clear) error from a failed live mutation —
|
||||
* the topology likely went degraded. Dismissed via clearCommitErr. */
|
||||
commitErr: string | null;
|
||||
clearCommitErr: () => void;
|
||||
flashErr: (err: unknown, fallback: string) => void;
|
||||
|
||||
// Deploy
|
||||
@@ -77,9 +82,18 @@ export function useTopologyData(
|
||||
|
||||
const [loadErr, setLoadErr] = useState<string | null>(null);
|
||||
const [actionErr, setActionErr] = useState<string | null>(null);
|
||||
const [commitErr, setCommitErr] = useState<string | null>(null);
|
||||
const [deploying, setDeploying] = useState(false);
|
||||
|
||||
const clearCommitErr = useCallback(() => setCommitErr(null), []);
|
||||
|
||||
const flashErr = useCallback((err: unknown, fallback: string) => {
|
||||
// A failed live mutation is loud + persistent: the queue halted and
|
||||
// the topology probably degraded — don't let it vanish in 4s.
|
||||
if (err instanceof MutationFailedError) {
|
||||
setCommitErr(err.message);
|
||||
return;
|
||||
}
|
||||
const msg = (err as ApiError)?.response?.data?.detail ?? (err as ApiError)?.message ?? fallback;
|
||||
setActionErr(msg);
|
||||
setTimeout(() => setActionErr(null), 4000);
|
||||
@@ -189,7 +203,7 @@ export function useTopologyData(
|
||||
edges, setEdges,
|
||||
topoMeta,
|
||||
services, archetypes,
|
||||
loadErr, actionErr, flashErr,
|
||||
loadErr, actionErr, commitErr, clearCommitErr, flashErr,
|
||||
deploying, onDeploy,
|
||||
streamLive, lastEventAt, streamEnabled,
|
||||
refetch,
|
||||
|
||||
63
decnet_web/src/components/MazeNET/useTopologyEditor.test.ts
Normal file
63
decnet_web/src/components/MazeNET/useTopologyEditor.test.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
/**
|
||||
* @vitest-environment jsdom
|
||||
*/
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
import { act, renderHook } from '@testing-library/react';
|
||||
|
||||
import { useTopologyEditor, MutationFailedError } from './useTopologyEditor';
|
||||
import type { MazeApi } from './useMazeApi';
|
||||
|
||||
const buildApi = (overrides: Partial<MazeApi> = {}): MazeApi => ({
|
||||
enqueueMutation: vi.fn().mockResolvedValue({ mutation_id: 'm', state: 'pending' }),
|
||||
waitForMutation: vi.fn().mockResolvedValue({ state: 'applied', reason: null }),
|
||||
...overrides,
|
||||
} as unknown as MazeApi);
|
||||
|
||||
const editorFor = (api: MazeApi, topoVersion = 5) =>
|
||||
renderHook(() =>
|
||||
useTopologyEditor({ api, topoStatus: 'active', topoVersion }),
|
||||
);
|
||||
|
||||
describe('useTopologyEditor live mutation queue', () => {
|
||||
it('serialises concurrent submits and advances expected_version per enqueue', async () => {
|
||||
const enqueue = vi.fn().mockResolvedValue({ mutation_id: 'm', state: 'pending' });
|
||||
const api = buildApi({ enqueueMutation: enqueue });
|
||||
const { result } = editorFor(api, 5);
|
||||
|
||||
// Fire two structural ops in the SAME tick — the pre-fix bug was both
|
||||
// sending expected_version=5 and the loser 409ing.
|
||||
await act(async () => {
|
||||
await Promise.all([
|
||||
result.current.createLan('t', { name: 'a', is_dmz: false, x: 0, y: 0 }),
|
||||
result.current.deleteLan('t', 'lid', 'b'),
|
||||
]);
|
||||
});
|
||||
|
||||
expect(enqueue).toHaveBeenCalledTimes(2);
|
||||
expect(enqueue.mock.calls[0][3]).toBe(5); // first uses server version
|
||||
expect(enqueue.mock.calls[1][3]).toBe(6); // second advanced by the cursor
|
||||
});
|
||||
|
||||
it('throws MutationFailedError on a failed mutation but keeps the queue alive', async () => {
|
||||
const wait = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({ state: 'failed', reason: 'post-apply validation failed: IP_COLLISION' })
|
||||
.mockResolvedValue({ state: 'applied', reason: null });
|
||||
const api = buildApi({ waitForMutation: wait });
|
||||
const { result } = editorFor(api, 1);
|
||||
|
||||
await act(async () => {
|
||||
await expect(
|
||||
result.current.createLan('t', { name: 'a', is_dmz: false, x: 0, y: 0 }),
|
||||
).rejects.toBeInstanceOf(MutationFailedError);
|
||||
});
|
||||
|
||||
// A failed op must not wedge the chain — the next submit still resolves.
|
||||
await act(async () => {
|
||||
await expect(
|
||||
result.current.deleteLan('t', 'lid', 'b'),
|
||||
).resolves.toEqual({ kind: 'enqueued', mutationId: 'm' });
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -16,7 +16,7 @@
|
||||
* primitive because mutation ops are name-keyed while direct CRUD is
|
||||
* uuid-keyed. Callers plumb both.
|
||||
*/
|
||||
import { useMemo } from 'react';
|
||||
import { useCallback, useEffect, useMemo, useRef } from 'react';
|
||||
import type {
|
||||
CreateDeckyBody,
|
||||
CreateLanBody,
|
||||
@@ -24,8 +24,23 @@ import type {
|
||||
EdgeRow,
|
||||
LANRow,
|
||||
MazeApi,
|
||||
MutationOp,
|
||||
} from './useMazeApi';
|
||||
|
||||
/** Thrown by a live primitive when its mutation settles as ``failed``.
|
||||
* Carries the op + backend reason so the page can surface a loud,
|
||||
* persistent error instead of a transient toast. */
|
||||
export class MutationFailedError extends Error {
|
||||
readonly op: string;
|
||||
readonly reason: string;
|
||||
constructor(op: string, reason: string) {
|
||||
super(`mutation ${op} failed: ${reason}`);
|
||||
this.name = 'MutationFailedError';
|
||||
this.op = op;
|
||||
this.reason = reason;
|
||||
}
|
||||
}
|
||||
|
||||
export interface UseTopologyEditorOptions {
|
||||
api: MazeApi;
|
||||
/** Current topology status from :func:`getTopology`. */
|
||||
@@ -101,6 +116,47 @@ export function useTopologyEditor(
|
||||
const { api, topoStatus, topoVersion } = opts;
|
||||
const live = topoStatus === 'active' || topoStatus === 'degraded';
|
||||
|
||||
// Serialised mutation submission. Two problems this solves, both
|
||||
// proven against the live backend:
|
||||
// 1. expected_version is bumped at ENQUEUE (not at apply), so two
|
||||
// ops fired back-to-back race: whichever HTTP request the server
|
||||
// sees second carries a stale version and 409s. We chain submits
|
||||
// so only one enqueue is ever in flight, in submission order.
|
||||
// 2. A failed mutation silently degrades the topology. We await each
|
||||
// mutation to a terminal state and throw MutationFailedError on
|
||||
// 'failed' so the caller can surface it loudly.
|
||||
const chainRef = useRef<Promise<unknown>>(Promise.resolve());
|
||||
// Optimistic expected_version cursor. enqueue bumps the server version
|
||||
// by exactly 1, so we advance locally rather than waiting for a refetch
|
||||
// between queued ops (onReparent fires detach + attach in one handler).
|
||||
const cursorRef = useRef<number>(topoVersion);
|
||||
useEffect(() => {
|
||||
// Adopt a higher server version (a refetch landed, or another editor
|
||||
// advanced it) but never walk the cursor backwards under an in-flight
|
||||
// batch that has already advanced past the last-seen server version.
|
||||
if (topoVersion > cursorRef.current) cursorRef.current = topoVersion;
|
||||
}, [topoVersion]);
|
||||
|
||||
const submit = useCallback(
|
||||
(topologyId: string, op: MutationOp, payload: Record<string, unknown>): Promise<string> => {
|
||||
const task = chainRef.current.then(async () => {
|
||||
const expected = cursorRef.current;
|
||||
const res = await api.enqueueMutation(topologyId, op, payload, expected);
|
||||
cursorRef.current = expected + 1;
|
||||
const row = await api.waitForMutation(topologyId, res.mutation_id);
|
||||
if (row.state === 'failed') {
|
||||
throw new MutationFailedError(op, row.reason ?? 'unknown reason');
|
||||
}
|
||||
return res.mutation_id;
|
||||
});
|
||||
// Keep the chain alive after a rejection so one failed op doesn't
|
||||
// wedge every subsequent submit.
|
||||
chainRef.current = task.then(() => undefined, () => undefined);
|
||||
return task;
|
||||
},
|
||||
[api],
|
||||
);
|
||||
|
||||
return useMemo<UseTopologyEditor>(() => ({
|
||||
// ── LAN ────────────────────────────────────────────────────────────
|
||||
async createLan(topologyId, body) {
|
||||
@@ -114,8 +170,8 @@ export function useTopologyEditor(
|
||||
if (body.is_dmz !== undefined) payload.is_dmz = body.is_dmz;
|
||||
if (body.x !== undefined) payload.x = body.x;
|
||||
if (body.y !== undefined) payload.y = body.y;
|
||||
const res = await api.enqueueMutation(topologyId, 'add_lan', payload, topoVersion);
|
||||
return { kind: 'enqueued', mutationId: res.mutation_id };
|
||||
const mutationId = await submit(topologyId, 'add_lan', payload);
|
||||
return { kind: 'enqueued', mutationId };
|
||||
},
|
||||
async updateLan(topologyId, lanId, lanName, patch) {
|
||||
if (!live) {
|
||||
@@ -129,18 +185,16 @@ export function useTopologyEditor(
|
||||
else patchFields[k] = v;
|
||||
}
|
||||
if (Object.keys(patchFields).length > 0) payload.patch = patchFields;
|
||||
const res = await api.enqueueMutation(topologyId, 'update_lan', payload, topoVersion);
|
||||
return { kind: 'enqueued', mutationId: res.mutation_id };
|
||||
const mutationId = await submit(topologyId, 'update_lan', payload);
|
||||
return { kind: 'enqueued', mutationId };
|
||||
},
|
||||
async deleteLan(topologyId, lanId, lanName) {
|
||||
if (!live) {
|
||||
await api.deleteLan(topologyId, lanId);
|
||||
return { kind: 'applied', data: undefined };
|
||||
}
|
||||
const res = await api.enqueueMutation(
|
||||
topologyId, 'remove_lan', { name: lanName }, topoVersion,
|
||||
);
|
||||
return { kind: 'enqueued', mutationId: res.mutation_id };
|
||||
const mutationId = await submit(topologyId, 'remove_lan', { name: lanName });
|
||||
return { kind: 'enqueued', mutationId };
|
||||
},
|
||||
|
||||
// ── Decky ──────────────────────────────────────────────────────────
|
||||
@@ -172,8 +226,8 @@ export function useTopologyEditor(
|
||||
if (fwd !== undefined) payload.forwards_l3 = fwd;
|
||||
if (body.x !== undefined) payload.x = body.x;
|
||||
if (body.y !== undefined) payload.y = body.y;
|
||||
const res = await api.enqueueMutation(topologyId, 'add_decky', payload, topoVersion);
|
||||
return { kind: 'enqueued', mutationId: res.mutation_id };
|
||||
const mutationId = await submit(topologyId, 'add_decky', payload);
|
||||
return { kind: 'enqueued', mutationId };
|
||||
},
|
||||
async updateDecky(topologyId, uuid, deckyName, patch, extras) {
|
||||
if (!live) {
|
||||
@@ -188,18 +242,16 @@ export function useTopologyEditor(
|
||||
}
|
||||
if (Object.keys(patchFields).length > 0) payload.patch = patchFields;
|
||||
if (extras?.force) payload.force = true;
|
||||
const res = await api.enqueueMutation(topologyId, 'update_decky', payload, topoVersion);
|
||||
return { kind: 'enqueued', mutationId: res.mutation_id };
|
||||
const mutationId = await submit(topologyId, 'update_decky', payload);
|
||||
return { kind: 'enqueued', mutationId };
|
||||
},
|
||||
async deleteDecky(topologyId, uuid, deckyName) {
|
||||
if (!live) {
|
||||
await api.deleteDecky(topologyId, uuid);
|
||||
return { kind: 'applied', data: undefined };
|
||||
}
|
||||
const res = await api.enqueueMutation(
|
||||
topologyId, 'remove_decky', { decky: deckyName }, topoVersion,
|
||||
);
|
||||
return { kind: 'enqueued', mutationId: res.mutation_id };
|
||||
const mutationId = await submit(topologyId, 'remove_decky', { decky: deckyName });
|
||||
return { kind: 'enqueued', mutationId };
|
||||
},
|
||||
|
||||
// ── Edges ──────────────────────────────────────────────────────────
|
||||
@@ -210,18 +262,16 @@ export function useTopologyEditor(
|
||||
}
|
||||
const payload: Record<string, unknown> = { decky: deckyName, lan: lanName };
|
||||
if (body.forwards_l3 !== undefined) payload.forwards_l3 = body.forwards_l3;
|
||||
const res = await api.enqueueMutation(topologyId, 'attach_decky', payload, topoVersion);
|
||||
return { kind: 'enqueued', mutationId: res.mutation_id };
|
||||
const mutationId = await submit(topologyId, 'attach_decky', payload);
|
||||
return { kind: 'enqueued', mutationId };
|
||||
},
|
||||
async detachEdge(topologyId, edgeId, deckyName, lanName) {
|
||||
if (!live) {
|
||||
await api.detachEdge(topologyId, edgeId);
|
||||
return { kind: 'applied', data: undefined };
|
||||
}
|
||||
const res = await api.enqueueMutation(
|
||||
topologyId, 'detach_decky', { decky: deckyName, lan: lanName }, topoVersion,
|
||||
);
|
||||
return { kind: 'enqueued', mutationId: res.mutation_id };
|
||||
const mutationId = await submit(topologyId, 'detach_decky', { decky: deckyName, lan: lanName });
|
||||
return { kind: 'enqueued', mutationId };
|
||||
},
|
||||
}), [api, live, topoVersion]);
|
||||
}), [api, live, submit]);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user