feat(orchestrator): authoritative failure-count badge endpoint (DEBT-042)

New GET /api/v1/orchestrator/events/stats?since=1h&success=false&kind=... backed by repo.count_orchestrator_failures(since_ts, kind), which counts failed rows across both orchestrator_events and orchestrator_emails since the cutoff. Window parser accepts ^\d+[smhd]$, capped at 7d. Today only success=false is accepted on this surface so the endpoint isn't accidentally repurposed before the next consumer is properly designed. Orchestrator.tsx polls the endpoint on mount + every 30 s and renders the authoritative DB-derived count instead of deriving from the in-memory SSE buffer + one paginated page (which silently excluded failures older than the local window).
2026-05-03 05:26:45 -04:00
parent 866a76eccf
commit 03beff3840
9 changed files with 431 additions and 28 deletions
--- a/tests/orchestrator/test_repo_pagination.py
+++ b/tests/orchestrator/test_repo_pagination.py
@@ -97,6 +97,55 @@ async def test_kind_filter_narrows(tmp_path):
    assert {r["kind"] for r in only_file} == {"file"}


+@pytest.mark.asyncio
+async def test_count_failures_window_and_kind(tmp_path):
+    """count_orchestrator_failures must:
+    - count both tables (events + emails) when kind is None
+    - respect the since_ts cutoff
+    - skip success=True rows
+    - narrow to a single source table when kind is set"""
+    from datetime import datetime, timedelta, timezone
+
+    repo = await _make_repo(tmp_path, "failures.db")
+    dst = await _seed_decky(repo, "decky-A")
+
+    # 2 fresh failures + 1 fresh success on the events table.
+    for i in range(2):
+        await repo.record_orchestrator_event({
+            "kind": "traffic", "protocol": "ssh",
+            "action": f"fail:{i}", "src_decky_uuid": None,
+            "dst_decky_uuid": dst, "success": False, "payload": {},
+        })
+    await repo.record_orchestrator_event({
+        "kind": "traffic", "protocol": "ssh",
+        "action": "ok", "src_decky_uuid": None,
+        "dst_decky_uuid": dst, "success": True, "payload": {},
+    })
+
+    # 1 fresh email failure.
+    await repo.record_orchestrator_email({
+        "ts": datetime.now(timezone.utc),
+        "subject": "boom", "sender_email": "a@x", "recipient_email": "b@y",
+        "mail_decky_uuid": "mh", "language": "en",
+        "thread_id": "t1", "message_id": "<m1@x>", "in_reply_to": None,
+        "eml_path": "/tmp/m1.eml",
+        "success": False, "payload": "{}",
+    })
+
+    cutoff = datetime.now(timezone.utc) - timedelta(hours=1)
+
+    assert await repo.count_orchestrator_failures(since_ts=cutoff) == 3
+    assert (
+        await repo.count_orchestrator_failures(since_ts=cutoff, kind="traffic")
+    ) == 2
+    assert (
+        await repo.count_orchestrator_failures(since_ts=cutoff, kind="email")
+    ) == 1
+    # Future cutoff → nothing matches.
+    future = datetime.now(timezone.utc) + timedelta(hours=1)
+    assert await repo.count_orchestrator_failures(since_ts=future) == 0
+
+
@pytest.mark.asyncio
 async def test_prune_caps_per_dst(tmp_path):
    repo = await _make_repo(tmp_path, "prune.db")