feat(orchestrator): authoritative failure-count badge endpoint (DEBT-042)

New GET /api/v1/orchestrator/events/stats?since=1h&success=false&kind=...
backed by repo.count_orchestrator_failures(since_ts, kind), which
counts failed rows across both orchestrator_events and
orchestrator_emails since the cutoff.

Window parser accepts ^\d+[smhd]$, capped at 7d. Today only
success=false is accepted on this surface so the endpoint isn't
accidentally repurposed before the next consumer is properly
designed.

Orchestrator.tsx polls the endpoint on mount + every 30 s and
renders the authoritative DB-derived count instead of deriving from
the in-memory SSE buffer + one paginated page (which silently
excluded failures older than the local window).
This commit is contained in:
2026-05-03 05:26:45 -04:00
parent 866a76eccf
commit 03beff3840
9 changed files with 431 additions and 28 deletions

View File

@@ -26,6 +26,26 @@ vi.mock('./useOrchestratorStream', () => ({
import api from '../utils/api';
const apiGet = api.get as ReturnType<typeof vi.fn>;
/** Two endpoints fire at mount — events list + failure-count stats.
* This dispatcher maps URLs to canned responses so per-test cases stay
* focused on the path they care about. */
const buildApiResponder = (overrides: {
events?: { data: unknown[]; total: number };
failures?: number;
} = {}) => {
const events = overrides.events ?? { data: [], total: 0 };
const failures = overrides.failures ?? 0;
return (url: string) => {
if (url.startsWith('/orchestrator/events/stats')) {
return Promise.resolve({ data: { count: failures } });
}
if (url.startsWith('/orchestrator/events')) {
return Promise.resolve({ data: events });
}
return Promise.resolve({ data: {} });
};
};
const renderPage = () =>
render(
<MemoryRouter initialEntries={['/orchestrator']}>
@@ -40,7 +60,7 @@ describe('Orchestrator', () => {
});
it('renders the empty state when the API returns no events', async () => {
apiGet.mockResolvedValueOnce({ data: { data: [], total: 0 } });
apiGet.mockImplementation(buildApiResponder());
renderPage();
@@ -50,11 +70,14 @@ describe('Orchestrator', () => {
});
it('switches the kind filter and refetches scoped to that kind', async () => {
apiGet.mockResolvedValue({ data: { data: [], total: 0 } });
apiGet.mockImplementation(buildApiResponder());
renderPage();
await waitFor(() => expect(apiGet).toHaveBeenCalledTimes(1));
expect(apiGet.mock.calls[0][0]).toMatch(/^\/orchestrator\/events\?limit=50&offset=0$/);
await waitFor(() =>
expect(
apiGet.mock.calls.some((c) => /^\/orchestrator\/events\?limit=50&offset=0$/.test(c[0])),
).toBe(true),
);
await userEvent.click(screen.getByRole('tab', { name: /^email$/ }));
@@ -65,7 +88,7 @@ describe('Orchestrator', () => {
});
it('prepends a row when the live stream pushes a traffic event', async () => {
apiGet.mockResolvedValueOnce({ data: { data: [], total: 0 } });
apiGet.mockImplementation(buildApiResponder());
renderPage();
await waitFor(() => expect(capturedOnEvent).not.toBeNull());
@@ -90,4 +113,31 @@ describe('Orchestrator', () => {
// 1 event shown after a single push.
expect(screen.getByText(/1 EVENTS SHOWN/i)).toBeInTheDocument();
});
it('renders the failure-count badge from the stats endpoint (DEBT-042)', async () => {
apiGet.mockImplementation(buildApiResponder({ failures: 42 }));
renderPage();
expect(await screen.findByText(/42 FAILURES \/ 1H/i)).toBeInTheDocument();
// Stats endpoint is the authoritative source — verify it was actually queried.
expect(
apiGet.mock.calls.some((c) =>
/\/orchestrator\/events\/stats\?since=1h&success=false/.test(c[0]),
),
).toBe(true);
});
it('hides the failure-count badge when the stats endpoint reports zero', async () => {
apiGet.mockImplementation(buildApiResponder({ failures: 0 }));
renderPage();
await waitFor(() =>
expect(
apiGet.mock.calls.some((c) => /\/orchestrator\/events\/stats/.test(c[0])),
).toBe(true),
);
expect(screen.queryByText(/FAILURES \/ 1H/i)).not.toBeInTheDocument();
});
});

View File

@@ -36,7 +36,6 @@ type KindFilter = 'all' | 'traffic' | 'file' | 'email';
type StreamStatus = 'connecting' | 'live' | 'error';
const ROW_CAP = 500;
const HOUR_MS = 60 * 60 * 1000;
const FRESH_MS = 5_000;
const timeAgo = (dateStr: string | null): string => {
@@ -64,6 +63,7 @@ const Orchestrator: React.FC = () => {
const [paused, setPaused] = useState(false);
const [now, setNow] = useState(Date.now());
const [selected, setSelected] = useState<OrchestratorEntry | null>(null);
const [failuresLastHour, setFailuresLastHour] = useState(0);
const limit = 50;
const pausedRef = useRef(paused);
@@ -75,6 +75,27 @@ const Orchestrator: React.FC = () => {
return () => clearInterval(t);
}, []);
// Authoritative failure count from the DB — see DEBT-042. The
// in-memory derivation it replaced was bounded by the SSE buffer +
// one paginated page, so failures older than the local window were
// silently excluded and the badge read low on busy fleets.
useEffect(() => {
let cancelled = false;
const fetchStats = async () => {
try {
const res = await api.get(
'/orchestrator/events/stats?since=1h&success=false',
);
if (!cancelled) setFailuresLastHour(res.data?.count ?? 0);
} catch {
// Silent: the badge is a hint, missing data shouldn't blow up the page.
}
};
fetchStats();
const t = setInterval(fetchStats, 30_000);
return () => { cancelled = true; clearInterval(t); };
}, []);
const fetchEvents = async () => {
setLoading(true);
try {
@@ -163,13 +184,6 @@ const Orchestrator: React.FC = () => {
return merged.filter((r) => r.kind === kindParam);
}, [streamRows, rows, kindParam]);
const failuresLastHour = useMemo(() => {
const cutoff = now - HOUR_MS;
return [...streamRows, ...rows].filter(
(r) => !r.success && new Date(r.ts).getTime() >= cutoff,
).length;
}, [streamRows, rows, now]);
const statusLabel =
status === 'live' ? 'LIVE'
: status === 'connecting' ? 'CONNECTING'