feat(orchestrator): authoritative failure-count badge endpoint (DEBT-042)
New GET /api/v1/orchestrator/events/stats?since=1h&success=false&kind=... backed by repo.count_orchestrator_failures(since_ts, kind), which counts failed rows across both orchestrator_events and orchestrator_emails since the cutoff. Window parser accepts ^\d+[smhd]$, capped at 7d. Today only success=false is accepted on this surface so the endpoint isn't accidentally repurposed before the next consumer is properly designed. Orchestrator.tsx polls the endpoint on mount + every 30 s and renders the authoritative DB-derived count instead of deriving from the in-memory SSE buffer + one paginated page (which silently excluded failures older than the local window).
This commit is contained in:
136
tests/api/orchestrator/test_event_stats.py
Normal file
136
tests/api/orchestrator/test_event_stats.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""GET /api/v1/orchestrator/events/stats — failure-count badge endpoint (DEBT-042)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from decnet.web.api import app
|
||||
|
||||
_V1 = "/api/v1/orchestrator"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_stats_unauthenticated_401():
|
||||
async with httpx.AsyncClient(
|
||||
transport=httpx.ASGITransport(app=app), base_url="http://test",
|
||||
) as ac:
|
||||
r = await ac.get(f"{_V1}/events/stats?since=1h&success=false")
|
||||
assert r.status_code == 401
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stats_returns_failure_count_with_window():
|
||||
from decnet.web.router.orchestrator.api_event_stats import (
|
||||
orchestrator_event_stats,
|
||||
)
|
||||
|
||||
with patch(
|
||||
"decnet.web.router.orchestrator.api_event_stats.repo"
|
||||
) as mock_repo:
|
||||
mock_repo.count_orchestrator_failures = AsyncMock(return_value=7)
|
||||
|
||||
result = await orchestrator_event_stats(
|
||||
since="1h", success=False, kind=None,
|
||||
user={"uuid": "u", "role": "viewer"},
|
||||
)
|
||||
|
||||
assert result["count"] == 7
|
||||
assert result["since"] == "1h"
|
||||
assert result["success"] is False
|
||||
assert result["kind"] is None
|
||||
|
||||
# Window must be "now - 1h", not 5h or 30s. Tolerance of 5 seconds
|
||||
# for the test execution.
|
||||
call = mock_repo.count_orchestrator_failures.await_args
|
||||
since_ts = call.kwargs["since_ts"]
|
||||
expected = datetime.now(timezone.utc) - timedelta(hours=1)
|
||||
assert abs((since_ts - expected).total_seconds()) < 5
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stats_forwards_kind_filter():
|
||||
from decnet.web.router.orchestrator.api_event_stats import (
|
||||
orchestrator_event_stats,
|
||||
)
|
||||
|
||||
with patch(
|
||||
"decnet.web.router.orchestrator.api_event_stats.repo"
|
||||
) as mock_repo:
|
||||
mock_repo.count_orchestrator_failures = AsyncMock(return_value=2)
|
||||
|
||||
await orchestrator_event_stats(
|
||||
since="15m", success=False, kind="email",
|
||||
user={"uuid": "u", "role": "viewer"},
|
||||
)
|
||||
|
||||
assert mock_repo.count_orchestrator_failures.await_args.kwargs["kind"] == "email"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stats_rejects_success_true():
|
||||
"""Only success=false is supported on this surface today; everything
|
||||
else is rejected so the endpoint isn't accidentally repurposed
|
||||
before the next consumer is properly designed."""
|
||||
from fastapi import HTTPException
|
||||
|
||||
from decnet.web.router.orchestrator.api_event_stats import (
|
||||
orchestrator_event_stats,
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
await orchestrator_event_stats(
|
||||
since="1h", success=True, kind=None,
|
||||
user={"uuid": "u", "role": "viewer"},
|
||||
)
|
||||
assert exc.value.status_code == 422
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stats_rejects_success_unset():
|
||||
from fastapi import HTTPException
|
||||
|
||||
from decnet.web.router.orchestrator.api_event_stats import (
|
||||
orchestrator_event_stats,
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
await orchestrator_event_stats(
|
||||
since="1h", success=None, kind=None,
|
||||
user={"uuid": "u", "role": "viewer"},
|
||||
)
|
||||
assert exc.value.status_code == 422
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stats_rejects_malformed_since():
|
||||
from fastapi import HTTPException
|
||||
|
||||
from decnet.web.router.orchestrator.api_event_stats import (
|
||||
orchestrator_event_stats,
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
await orchestrator_event_stats(
|
||||
since="garbage", success=False, kind=None,
|
||||
user={"uuid": "u", "role": "viewer"},
|
||||
)
|
||||
assert exc.value.status_code == 422
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stats_rejects_window_over_max():
|
||||
from fastapi import HTTPException
|
||||
|
||||
from decnet.web.router.orchestrator.api_event_stats import (
|
||||
orchestrator_event_stats,
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
await orchestrator_event_stats(
|
||||
since="30d", success=False, kind=None,
|
||||
user={"uuid": "u", "role": "viewer"},
|
||||
)
|
||||
assert exc.value.status_code == 422
|
||||
@@ -97,6 +97,55 @@ async def test_kind_filter_narrows(tmp_path):
|
||||
assert {r["kind"] for r in only_file} == {"file"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_count_failures_window_and_kind(tmp_path):
|
||||
"""count_orchestrator_failures must:
|
||||
- count both tables (events + emails) when kind is None
|
||||
- respect the since_ts cutoff
|
||||
- skip success=True rows
|
||||
- narrow to a single source table when kind is set"""
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
repo = await _make_repo(tmp_path, "failures.db")
|
||||
dst = await _seed_decky(repo, "decky-A")
|
||||
|
||||
# 2 fresh failures + 1 fresh success on the events table.
|
||||
for i in range(2):
|
||||
await repo.record_orchestrator_event({
|
||||
"kind": "traffic", "protocol": "ssh",
|
||||
"action": f"fail:{i}", "src_decky_uuid": None,
|
||||
"dst_decky_uuid": dst, "success": False, "payload": {},
|
||||
})
|
||||
await repo.record_orchestrator_event({
|
||||
"kind": "traffic", "protocol": "ssh",
|
||||
"action": "ok", "src_decky_uuid": None,
|
||||
"dst_decky_uuid": dst, "success": True, "payload": {},
|
||||
})
|
||||
|
||||
# 1 fresh email failure.
|
||||
await repo.record_orchestrator_email({
|
||||
"ts": datetime.now(timezone.utc),
|
||||
"subject": "boom", "sender_email": "a@x", "recipient_email": "b@y",
|
||||
"mail_decky_uuid": "mh", "language": "en",
|
||||
"thread_id": "t1", "message_id": "<m1@x>", "in_reply_to": None,
|
||||
"eml_path": "/tmp/m1.eml",
|
||||
"success": False, "payload": "{}",
|
||||
})
|
||||
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(hours=1)
|
||||
|
||||
assert await repo.count_orchestrator_failures(since_ts=cutoff) == 3
|
||||
assert (
|
||||
await repo.count_orchestrator_failures(since_ts=cutoff, kind="traffic")
|
||||
) == 2
|
||||
assert (
|
||||
await repo.count_orchestrator_failures(since_ts=cutoff, kind="email")
|
||||
) == 1
|
||||
# Future cutoff → nothing matches.
|
||||
future = datetime.now(timezone.utc) + timedelta(hours=1)
|
||||
assert await repo.count_orchestrator_failures(since_ts=future) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prune_caps_per_dst(tmp_path):
|
||||
repo = await _make_repo(tmp_path, "prune.db")
|
||||
|
||||
Reference in New Issue
Block a user