perf(api): TTL-cache /stats + unfiltered pagination counts

Every /stats call ran SELECT count(*) FROM logs + SELECT count(DISTINCT
attacker_ip) FROM logs; every /logs and /attackers call ran an
unfiltered count for the paginator. At 500 concurrent users these
serialize through aiosqlite's worker threads and dominate wall time.

Cache at the router layer (repo stays dialect-agnostic):
  - /stats response: 5s TTL
  - /logs total (only when no filters): 2s TTL
  - /attackers total (only when no filters): 2s TTL

Filtered paths bypass the cache. Pattern reused from api_get_config
and api_get_health (asyncio.Lock + time.monotonic window + lazy lock).
This commit is contained in:
2026-04-17 19:09:15 -04:00
parent de4b64d857
commit 6301504c0e
6 changed files with 233 additions and 4 deletions

View File

@@ -1,3 +1,5 @@
import asyncio
import time
from typing import Any, Optional
from fastapi import APIRouter, Depends, Query
@@ -8,6 +10,36 @@ from decnet.web.db.models import AttackersResponse
router = APIRouter()
# Same pattern as /logs — cache the unfiltered total count; filtered
# counts go straight to the DB.
_TOTAL_TTL = 2.0
_total_cache: tuple[Optional[int], float] = (None, 0.0)
_total_lock: Optional[asyncio.Lock] = None
def _reset_total_cache() -> None:
global _total_cache, _total_lock
_total_cache = (None, 0.0)
_total_lock = None
async def _get_total_attackers_cached() -> int:
global _total_cache, _total_lock
value, ts = _total_cache
now = time.monotonic()
if value is not None and now - ts < _TOTAL_TTL:
return value
if _total_lock is None:
_total_lock = asyncio.Lock()
async with _total_lock:
value, ts = _total_cache
now = time.monotonic()
if value is not None and now - ts < _TOTAL_TTL:
return value
value = await repo.get_total_attackers()
_total_cache = (value, time.monotonic())
return value
@router.get(
"/attackers",
@@ -37,7 +69,10 @@ async def get_attackers(
s = _norm(search)
svc = _norm(service)
_data = await repo.get_attackers(limit=limit, offset=offset, search=s, sort_by=sort_by, service=svc)
_total = await repo.get_total_attackers(search=s, service=svc)
if s is None and svc is None:
_total = await _get_total_attackers_cached()
else:
_total = await repo.get_total_attackers(search=s, service=svc)
# Bulk-join behavior rows for the IPs in this page to avoid N+1 queries.
_ips = {row["ip"] for row in _data if row.get("ip")}

View File

@@ -1,3 +1,5 @@
import asyncio
import time
from typing import Any, Optional
from fastapi import APIRouter, Depends, Query
@@ -8,6 +10,37 @@ from decnet.web.db.models import LogsResponse
router = APIRouter()
# Cache the unfiltered total-logs count. Filtered counts bypass the cache
# (rare, freshness matters for search). SELECT count(*) FROM logs is a
# full scan and gets hammered by paginating clients.
_TOTAL_TTL = 2.0
_total_cache: tuple[Optional[int], float] = (None, 0.0)
_total_lock: Optional[asyncio.Lock] = None
def _reset_total_cache() -> None:
global _total_cache, _total_lock
_total_cache = (None, 0.0)
_total_lock = None
async def _get_total_logs_cached() -> int:
global _total_cache, _total_lock
value, ts = _total_cache
now = time.monotonic()
if value is not None and now - ts < _TOTAL_TTL:
return value
if _total_lock is None:
_total_lock = asyncio.Lock()
async with _total_lock:
value, ts = _total_cache
now = time.monotonic()
if value is not None and now - ts < _TOTAL_TTL:
return value
value = await repo.get_total_logs()
_total_cache = (value, time.monotonic())
return value
@router.get("/logs", response_model=LogsResponse, tags=["Logs"],
responses={401: {"description": "Could not validate credentials"}, 403: {"description": "Insufficient permissions"}, 422: {"description": "Validation error"}})
@@ -30,7 +63,10 @@ async def get_logs(
et = _norm(end_time)
_logs: list[dict[str, Any]] = await repo.get_logs(limit=limit, offset=offset, search=s, start_time=st, end_time=et)
_total: int = await repo.get_total_logs(search=s, start_time=st, end_time=et)
if s is None and st is None and et is None:
_total: int = await _get_total_logs_cached()
else:
_total = await repo.get_total_logs(search=s, start_time=st, end_time=et)
return {
"total": _total,
"limit": limit,

View File

@@ -1,4 +1,6 @@
from typing import Any
import asyncio
import time
from typing import Any, Optional
from fastapi import APIRouter, Depends
@@ -8,9 +10,41 @@ from decnet.web.db.models import StatsResponse
router = APIRouter()
# /stats is aggregate telemetry polled constantly by the UI and locust.
# A 5s window collapses thousands of concurrent calls — each of which
# runs SELECT count(*) FROM logs + SELECT count(DISTINCT attacker_ip) —
# into one DB hit per window.
_STATS_TTL = 5.0
_stats_cache: tuple[Optional[dict[str, Any]], float] = (None, 0.0)
_stats_lock: Optional[asyncio.Lock] = None
def _reset_stats_cache() -> None:
global _stats_cache, _stats_lock
_stats_cache = (None, 0.0)
_stats_lock = None
async def _get_stats_cached() -> dict[str, Any]:
global _stats_cache, _stats_lock
value, ts = _stats_cache
now = time.monotonic()
if value is not None and now - ts < _STATS_TTL:
return value
if _stats_lock is None:
_stats_lock = asyncio.Lock()
async with _stats_lock:
value, ts = _stats_cache
now = time.monotonic()
if value is not None and now - ts < _STATS_TTL:
return value
value = await repo.get_stats_summary()
_stats_cache = (value, time.monotonic())
return value
@router.get("/stats", response_model=StatsResponse, tags=["Observability"],
responses={401: {"description": "Could not validate credentials"}, 403: {"description": "Insufficient permissions"}, 422: {"description": "Validation error"}},)
@_traced("api.get_stats")
async def get_stats(user: dict = Depends(require_viewer)) -> dict[str, Any]:
return await repo.get_stats_summary()
return await _get_stats_cached()