added: profiling toolchain (py-spy, pyinstrument, pytest-benchmark, memray, snakeviz)

New `profile` optional-deps group, opt-in Pyinstrument ASGI middleware
gated by DECNET_PROFILE_REQUESTS, bench marker + tests/perf/ micro-benchmarks
for repository hot paths, and scripts/profile/ helpers for py-spy/cProfile/memray.
This commit is contained in:
2026-04-17 13:13:00 -04:00
parent c1d8102253
commit 319c1dbb61
10 changed files with 278 additions and 4 deletions

View File

@@ -59,6 +59,12 @@ DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.lo
# which causes events to be skipped or processed twice.
DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true"
# Set to "true" to mount the Pyinstrument ASGI middleware on the FastAPI app.
# Produces per-request HTML flamegraphs under ./profiles/. Off by default so
# production and normal dev runs pay zero profiling overhead.
DECNET_PROFILE_REQUESTS: bool = os.environ.get("DECNET_PROFILE_REQUESTS", "").lower() == "true"
DECNET_PROFILE_DIR: str = os.environ.get("DECNET_PROFILE_DIR", "profiles")
# API Options
DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "127.0.0.1")
DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)

View File

@@ -9,7 +9,14 @@ from fastapi.responses import JSONResponse
from pydantic import ValidationError
from fastapi.middleware.cors import CORSMiddleware
from decnet.env import DECNET_CORS_ORIGINS, DECNET_DEVELOPER, DECNET_EMBED_PROFILER, DECNET_INGEST_LOG_FILE
from decnet.env import (
DECNET_CORS_ORIGINS,
DECNET_DEVELOPER,
DECNET_EMBED_PROFILER,
DECNET_INGEST_LOG_FILE,
DECNET_PROFILE_DIR,
DECNET_PROFILE_REQUESTS,
)
from decnet.logging import get_logger
from decnet.web.dependencies import repo
from decnet.collector import log_collector_worker
@@ -38,6 +45,16 @@ def get_background_tasks() -> dict[str, Optional[asyncio.Task[Any]]]:
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
global ingestion_task, collector_task, attacker_task, sniffer_task
import resource
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
if soft < 4096:
log.warning(
"Low open-file limit detected (ulimit -n = %d). "
"High-traffic deployments may hit 'Too many open files' errors. "
"Raise it with: ulimit -n 65536 (session) or LimitNOFILE=65536 (systemd)",
soft,
)
log.info("API startup initialising database")
for attempt in range(1, 6):
try:
@@ -125,6 +142,31 @@ app.add_middleware(
allow_headers=["Authorization", "Content-Type", "Last-Event-ID"],
)
if DECNET_PROFILE_REQUESTS:
import time
from pathlib import Path
from pyinstrument import Profiler
from starlette.middleware.base import BaseHTTPMiddleware
_profile_dir = Path(DECNET_PROFILE_DIR)
_profile_dir.mkdir(parents=True, exist_ok=True)
class PyinstrumentMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
profiler = Profiler(async_mode="enabled")
profiler.start()
try:
response = await call_next(request)
finally:
profiler.stop()
slug = request.url.path.strip("/").replace("/", "_") or "root"
out = _profile_dir / f"{int(time.time() * 1000)}-{request.method}-{slug}.html"
out.write_text(profiler.output_html())
return response
app.add_middleware(PyinstrumentMiddleware)
log.info("Pyinstrument middleware mounted — flamegraphs -> %s", _profile_dir)
# Include the modular API router
app.include_router(api_router, prefix="/api/v1")

View File

@@ -16,7 +16,7 @@ dependencies = [
"fastapi>=0.110.0",
"uvicorn>=0.29.0",
"aiosqlite>=0.20.0",
"aiomysql>=0.2.0",
"asyncmy>=0.2.9",
"PyJWT>=2.8.0",
"bcrypt>=4.1.0",
"psutil>=5.9.0",
@@ -32,8 +32,15 @@ tracing = [
"opentelemetry-exporter-otlp>=1.20.0",
"opentelemetry-instrumentation-fastapi>=0.41b0",
]
profile = [
"py-spy>=0.4.1",
"pyinstrument>=4.7",
"pytest-benchmark>=4.0",
"memray>=1.14 ; sys_platform == 'linux'",
"snakeviz>=2.2",
]
dev = [
"decnet[tracing]",
"decnet[tracing,profile]",
"pytest>=9.0.3",
"ruff>=0.15.10",
"bandit>=1.9.4",
@@ -54,6 +61,8 @@ dev = [
"psycopg2-binary>=2.9.11",
"paho-mqtt>=2.1.0",
"pymongo>=4.16.0",
"locust>=2.29",
"gevent>=24.0",
]
[project.scripts]
@@ -62,11 +71,13 @@ decnet = "decnet.cli:app"
[tool.pytest.ini_options]
asyncio_mode = "auto"
asyncio_debug = "true"
addopts = "-m 'not fuzz and not live' -v -q -x -n logical --dist loadscope"
addopts = "-m 'not fuzz and not live and not stress and not bench' -v -q -x -n logical --dist loadscope"
markers = [
"fuzz: hypothesis-based fuzz tests (slow, run with -m fuzz or -m '' for all)",
"live: live subprocess service tests (run with -m live)",
"live_docker: live Docker container tests (requires DECNET_LIVE_DOCKER=1)",
"stress: locust-based stress tests (run with -m stress)",
"bench: pytest-benchmark micro-benchmarks (run with -m bench)",
]
filterwarnings = [
"ignore::pytest.PytestUnhandledThreadExceptionWarning",

17
scripts/profile/cprofile-cli.sh Executable file
View File

@@ -0,0 +1,17 @@
#!/usr/bin/env bash
# Run a `decnet` subcommand under cProfile and write a .prof file for snakeviz.
# Usage: scripts/profile/cprofile-cli.sh services
# scripts/profile/cprofile-cli.sh status
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: $0 <decnet-subcommand> [args...]" >&2
exit 1
fi
OUT="${OUT:-profiles/cprofile-$(date +%s).prof}"
mkdir -p "$(dirname "$OUT")"
python -m cProfile -o "${OUT}" -m decnet.cli "$@"
echo "Wrote ${OUT}"
echo "View with: snakeviz ${OUT}"

15
scripts/profile/memray-api.sh Executable file
View File

@@ -0,0 +1,15 @@
#!/usr/bin/env bash
# Run the DECNET API under memray to capture an allocation profile.
# Stop with Ctrl-C; then render with `memray flamegraph <bin>`.
set -euo pipefail
HOST="${DECNET_API_HOST:-127.0.0.1}"
PORT="${DECNET_API_PORT:-8000}"
OUT="${OUT:-profiles/memray-$(date +%s).bin}"
mkdir -p "$(dirname "$OUT")"
echo "Starting uvicorn under memray -> ${OUT}"
python -m memray run -o "${OUT}" -m uvicorn decnet.web.api:app \
--host "${HOST}" --port "${PORT}" --log-level warning
echo "Render with: memray flamegraph ${OUT}"

18
scripts/profile/pyspy-attach.sh Executable file
View File

@@ -0,0 +1,18 @@
#!/usr/bin/env bash
# Attach py-spy to the running DECNET uvicorn worker(s) and record a flamegraph.
# Requires sudo on Linux because of kernel.yama.ptrace_scope=1 by default.
set -euo pipefail
DURATION="${DURATION:-30}"
OUT="${OUT:-profiles/pyspy-$(date +%s).svg}"
mkdir -p "$(dirname "$OUT")"
PID="$(pgrep -f 'uvicorn decnet.web.api' | head -n 1 || true)"
if [[ -z "${PID}" ]]; then
echo "No uvicorn worker found. Start the API first (e.g. 'decnet deploy ...')." >&2
exit 1
fi
echo "Attaching py-spy to PID ${PID} for ${DURATION}s -> ${OUT}"
sudo py-spy record -o "${OUT}" -p "${PID}" -d "${DURATION}" --subprocesses
echo "Wrote ${OUT}"

69
tests/perf/README.md Normal file
View File

@@ -0,0 +1,69 @@
# DECNET Profiling
Five complementary lenses. Pick whichever answers the question you have.
## 1. Whole-process sampling — py-spy
Attach to a running API and record a flamegraph for 30s. Requires `sudo`
(Linux ptrace scope).
```bash
./scripts/profile/pyspy-attach.sh # auto-finds uvicorn pid
sudo py-spy record -o profile.svg -p <PID> -d 30 --subprocesses
```
If py-spy "doesn't work", it is almost always one of:
- Attached to the Typer CLI PID, not the uvicorn worker PID (use `pgrep -f 'uvicorn decnet.web.api'`).
- `kernel.yama.ptrace_scope=1` — run with `sudo` or `sudo sysctl kernel.yama.ptrace_scope=0`.
- The API isn't actually running (a `--dry-run` deploy starts nothing).
## 2. Per-request flamegraphs — Pyinstrument
Set the env flag, hit endpoints, find HTML flamegraphs under `./profiles/`.
```bash
DECNET_PROFILE_REQUESTS=true decnet deploy --mode unihost --deckies 1
# in another shell:
curl http://127.0.0.1:8000/api/v1/health
open profiles/*.html
```
Off by default — zero overhead when the flag is unset.
## 3. Deterministic call graph — cProfile + snakeviz
For one-shot profiling of CLI commands or scripts.
```bash
./scripts/profile/cprofile-cli.sh services # profiles `decnet services`
snakeviz profiles/cprofile.prof
```
## 4. Micro-benchmarks — pytest-benchmark
Regression-gate repository hot paths.
```bash
pytest -m bench tests/perf/ -n0 # SQLite backend (default)
DECNET_DB_TYPE=mysql pytest -m bench tests/perf/ -n0
```
Note: `-n0` disables xdist. `pytest-benchmark` refuses to measure under
parallel workers, which is the project default (`-n logical --dist loadscope`).
## 5. Memory allocation — memray
Hunt leaks and allocation hot spots in the API / workers.
```bash
./scripts/profile/memray-api.sh # runs uvicorn under memray
memray flamegraph profiles/memray.bin
```
## Load generation
Pair any of the in-process lenses (2, 5) with Locust for realistic traffic:
```bash
pytest -m stress tests/stress/
```

0
tests/perf/__init__.py Normal file
View File

36
tests/perf/conftest.py Normal file
View File

@@ -0,0 +1,36 @@
import asyncio
import pytest
from decnet.web.db.factory import get_repository
@pytest.fixture(scope="session")
def event_loop():
loop = asyncio.new_event_loop()
yield loop
loop.close()
@pytest.fixture(scope="session")
def repo(tmp_path_factory, event_loop):
path = tmp_path_factory.mktemp("perf") / "bench.db"
r = get_repository(db_path=str(path))
event_loop.run_until_complete(r.initialize())
return r
@pytest.fixture(scope="session")
def seeded_repo(repo, event_loop):
async def _seed():
for i in range(1000):
await repo.add_log({
"decky": f"decky-{i % 10:02d}",
"service": ["ssh", "ftp", "smb", "rdp"][i % 4],
"event_type": "connect",
"attacker_ip": f"10.0.{i // 256}.{i % 256}",
"raw_line": f"event {i}",
"fields": "{}",
"msg": "",
})
event_loop.run_until_complete(_seed())
return repo

View File

@@ -0,0 +1,60 @@
"""
Micro-benchmarks for the repository hot paths.
Run with:
pytest -m bench tests/perf/
These do NOT run in the default suite (see `addopts` in pyproject.toml).
"""
import pytest
pytestmark = pytest.mark.bench
def test_add_log_bench(benchmark, repo, event_loop):
payload = {
"decky": "decky-bench",
"service": "ssh",
"event_type": "connect",
"attacker_ip": "10.0.0.1",
"raw_line": "bench event",
"fields": "{}",
"msg": "",
}
def run():
event_loop.run_until_complete(repo.add_log(payload))
benchmark(run)
def test_get_logs_bench(benchmark, seeded_repo, event_loop):
def run():
return event_loop.run_until_complete(seeded_repo.get_logs(limit=50, offset=0))
result = benchmark(run)
assert len(result) == 50
def test_get_total_logs_bench(benchmark, seeded_repo, event_loop):
def run():
return event_loop.run_until_complete(seeded_repo.get_total_logs())
benchmark(run)
def test_get_logs_search_bench(benchmark, seeded_repo, event_loop):
def run():
return event_loop.run_until_complete(
seeded_repo.get_logs(limit=50, offset=0, search="service:ssh")
)
benchmark(run)
def test_get_user_by_username_bench(benchmark, seeded_repo, event_loop):
def run():
return event_loop.run_until_complete(seeded_repo.get_user_by_username("admin"))
benchmark(run)