perf(api): TTL-cache /stats + unfiltered pagination counts

Every /stats call ran SELECT count(*) FROM logs + SELECT count(DISTINCT attacker_ip) FROM logs; every /logs and /attackers call ran an unfiltered count for the paginator. At 500 concurrent users these serialize through aiosqlite's worker threads and dominate wall time. Cache at the router layer (repo stays dialect-agnostic): - /stats response: 5s TTL - /logs total (only when no filters): 2s TTL - /attackers total (only when no filters): 2s TTL Filtered paths bypass the cache. Pattern reused from api_get_config and api_get_health (asyncio.Lock + time.monotonic window + lazy lock).
2026-04-17 19:09:15 -04:00
parent de4b64d857
commit 6301504c0e
6 changed files with 233 additions and 4 deletions
--- a/decnet/web/router/attackers/api_get_attackers.py
+++ b/decnet/web/router/attackers/api_get_attackers.py
@@ -1,3 +1,5 @@
+import asyncio
+import time
 from typing import Any, Optional

 from fastapi import APIRouter, Depends, Query
@@ -8,6 +10,36 @@ from decnet.web.db.models import AttackersResponse

 router = APIRouter()

+# Same pattern as /logs — cache the unfiltered total count; filtered
+# counts go straight to the DB.
+_TOTAL_TTL = 2.0
+_total_cache: tuple[Optional[int], float] = (None, 0.0)
+_total_lock: Optional[asyncio.Lock] = None
+
+
+def _reset_total_cache() -> None:
+    global _total_cache, _total_lock
+    _total_cache = (None, 0.0)
+    _total_lock = None
+
+
+async def _get_total_attackers_cached() -> int:
+    global _total_cache, _total_lock
+    value, ts = _total_cache
+    now = time.monotonic()
+    if value is not None and now - ts < _TOTAL_TTL:
+        return value
+    if _total_lock is None:
+        _total_lock = asyncio.Lock()
+    async with _total_lock:
+        value, ts = _total_cache
+        now = time.monotonic()
+        if value is not None and now - ts < _TOTAL_TTL:
+            return value
+        value = await repo.get_total_attackers()
+        _total_cache = (value, time.monotonic())
+        return value
+

@router.get(
    "/attackers",
@@ -37,7 +69,10 @@ async def get_attackers(
    s = _norm(search)
    svc = _norm(service)
    _data = await repo.get_attackers(limit=limit, offset=offset, search=s, sort_by=sort_by, service=svc)
-    _total = await repo.get_total_attackers(search=s, service=svc)
+    if s is None and svc is None:
+        _total = await _get_total_attackers_cached()
+    else:
+        _total = await repo.get_total_attackers(search=s, service=svc)

    # Bulk-join behavior rows for the IPs in this page to avoid N+1 queries.
    _ips = {row["ip"] for row in _data if row.get("ip")}
--- a/decnet/web/router/logs/api_get_logs.py
+++ b/decnet/web/router/logs/api_get_logs.py
@@ -1,3 +1,5 @@
+import asyncio
+import time
 from typing import Any, Optional

 from fastapi import APIRouter, Depends, Query
@@ -8,6 +10,37 @@ from decnet.web.db.models import LogsResponse

 router = APIRouter()

+# Cache the unfiltered total-logs count. Filtered counts bypass the cache
+# (rare, freshness matters for search). SELECT count(*) FROM logs is a
+# full scan and gets hammered by paginating clients.
+_TOTAL_TTL = 2.0
+_total_cache: tuple[Optional[int], float] = (None, 0.0)
+_total_lock: Optional[asyncio.Lock] = None
+
+
+def _reset_total_cache() -> None:
+    global _total_cache, _total_lock
+    _total_cache = (None, 0.0)
+    _total_lock = None
+
+
+async def _get_total_logs_cached() -> int:
+    global _total_cache, _total_lock
+    value, ts = _total_cache
+    now = time.monotonic()
+    if value is not None and now - ts < _TOTAL_TTL:
+        return value
+    if _total_lock is None:
+        _total_lock = asyncio.Lock()
+    async with _total_lock:
+        value, ts = _total_cache
+        now = time.monotonic()
+        if value is not None and now - ts < _TOTAL_TTL:
+            return value
+        value = await repo.get_total_logs()
+        _total_cache = (value, time.monotonic())
+        return value
+

@router.get("/logs", response_model=LogsResponse, tags=["Logs"],
    responses={401: {"description": "Could not validate credentials"}, 403: {"description": "Insufficient permissions"}, 422: {"description": "Validation error"}})
@@ -30,7 +63,10 @@ async def get_logs(
    et = _norm(end_time)

    _logs: list[dict[str, Any]] = await repo.get_logs(limit=limit, offset=offset, search=s, start_time=st, end_time=et)
-    _total: int = await repo.get_total_logs(search=s, start_time=st, end_time=et)
+    if s is None and st is None and et is None:
+        _total: int = await _get_total_logs_cached()
+    else:
+        _total = await repo.get_total_logs(search=s, start_time=st, end_time=et)
    return {
        "total": _total,
        "limit": limit,
--- a/decnet/web/router/stats/api_get_stats.py
+++ b/decnet/web/router/stats/api_get_stats.py
@@ -1,4 +1,6 @@
-from typing import Any
+import asyncio
+import time
+from typing import Any, Optional

 from fastapi import APIRouter, Depends

@@ -8,9 +10,41 @@ from decnet.web.db.models import StatsResponse

 router = APIRouter()

+# /stats is aggregate telemetry polled constantly by the UI and locust.
+# A 5s window collapses thousands of concurrent calls — each of which
+# runs SELECT count(*) FROM logs + SELECT count(DISTINCT attacker_ip) —
+# into one DB hit per window.
+_STATS_TTL = 5.0
+_stats_cache: tuple[Optional[dict[str, Any]], float] = (None, 0.0)
+_stats_lock: Optional[asyncio.Lock] = None
+
+
+def _reset_stats_cache() -> None:
+    global _stats_cache, _stats_lock
+    _stats_cache = (None, 0.0)
+    _stats_lock = None
+
+
+async def _get_stats_cached() -> dict[str, Any]:
+    global _stats_cache, _stats_lock
+    value, ts = _stats_cache
+    now = time.monotonic()
+    if value is not None and now - ts < _STATS_TTL:
+        return value
+    if _stats_lock is None:
+        _stats_lock = asyncio.Lock()
+    async with _stats_lock:
+        value, ts = _stats_cache
+        now = time.monotonic()
+        if value is not None and now - ts < _STATS_TTL:
+            return value
+        value = await repo.get_stats_summary()
+        _stats_cache = (value, time.monotonic())
+        return value
+

@router.get("/stats", response_model=StatsResponse, tags=["Observability"],
    responses={401: {"description": "Could not validate credentials"}, 403: {"description": "Insufficient permissions"}, 422: {"description": "Validation error"}},)
@_traced("api.get_stats")
 async def get_stats(user: dict = Depends(require_viewer)) -> dict[str, Any]:
-    return await repo.get_stats_summary()
+    return await _get_stats_cached()