diff --git a/decnet/env.py b/decnet/env.py index 3fe0fcf..7cb163b 100644 --- a/decnet/env.py +++ b/decnet/env.py @@ -59,6 +59,12 @@ DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.lo # which causes events to be skipped or processed twice. DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true" +# Set to "true" to mount the Pyinstrument ASGI middleware on the FastAPI app. +# Produces per-request HTML flamegraphs under ./profiles/. Off by default so +# production and normal dev runs pay zero profiling overhead. +DECNET_PROFILE_REQUESTS: bool = os.environ.get("DECNET_PROFILE_REQUESTS", "").lower() == "true" +DECNET_PROFILE_DIR: str = os.environ.get("DECNET_PROFILE_DIR", "profiles") + # API Options DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "127.0.0.1") DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000) diff --git a/decnet/web/api.py b/decnet/web/api.py index 9e33c77..c837941 100644 --- a/decnet/web/api.py +++ b/decnet/web/api.py @@ -9,7 +9,14 @@ from fastapi.responses import JSONResponse from pydantic import ValidationError from fastapi.middleware.cors import CORSMiddleware -from decnet.env import DECNET_CORS_ORIGINS, DECNET_DEVELOPER, DECNET_EMBED_PROFILER, DECNET_INGEST_LOG_FILE +from decnet.env import ( + DECNET_CORS_ORIGINS, + DECNET_DEVELOPER, + DECNET_EMBED_PROFILER, + DECNET_INGEST_LOG_FILE, + DECNET_PROFILE_DIR, + DECNET_PROFILE_REQUESTS, +) from decnet.logging import get_logger from decnet.web.dependencies import repo from decnet.collector import log_collector_worker @@ -38,6 +45,16 @@ def get_background_tasks() -> dict[str, Optional[asyncio.Task[Any]]]: async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: global ingestion_task, collector_task, attacker_task, sniffer_task + import resource + soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) + if soft < 4096: + log.warning( + "Low open-file limit detected (ulimit -n = %d). " + "High-traffic deployments may hit 'Too many open files' errors. " + "Raise it with: ulimit -n 65536 (session) or LimitNOFILE=65536 (systemd)", + soft, + ) + log.info("API startup initialising database") for attempt in range(1, 6): try: @@ -125,6 +142,31 @@ app.add_middleware( allow_headers=["Authorization", "Content-Type", "Last-Event-ID"], ) +if DECNET_PROFILE_REQUESTS: + import time + from pathlib import Path + from pyinstrument import Profiler + from starlette.middleware.base import BaseHTTPMiddleware + + _profile_dir = Path(DECNET_PROFILE_DIR) + _profile_dir.mkdir(parents=True, exist_ok=True) + + class PyinstrumentMiddleware(BaseHTTPMiddleware): + async def dispatch(self, request: Request, call_next): + profiler = Profiler(async_mode="enabled") + profiler.start() + try: + response = await call_next(request) + finally: + profiler.stop() + slug = request.url.path.strip("/").replace("/", "_") or "root" + out = _profile_dir / f"{int(time.time() * 1000)}-{request.method}-{slug}.html" + out.write_text(profiler.output_html()) + return response + + app.add_middleware(PyinstrumentMiddleware) + log.info("Pyinstrument middleware mounted — flamegraphs -> %s", _profile_dir) + # Include the modular API router app.include_router(api_router, prefix="/api/v1") diff --git a/pyproject.toml b/pyproject.toml index 8558433..c899e69 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ "fastapi>=0.110.0", "uvicorn>=0.29.0", "aiosqlite>=0.20.0", - "aiomysql>=0.2.0", + "asyncmy>=0.2.9", "PyJWT>=2.8.0", "bcrypt>=4.1.0", "psutil>=5.9.0", @@ -32,8 +32,15 @@ tracing = [ "opentelemetry-exporter-otlp>=1.20.0", "opentelemetry-instrumentation-fastapi>=0.41b0", ] +profile = [ + "py-spy>=0.4.1", + "pyinstrument>=4.7", + "pytest-benchmark>=4.0", + "memray>=1.14 ; sys_platform == 'linux'", + "snakeviz>=2.2", +] dev = [ - "decnet[tracing]", + "decnet[tracing,profile]", "pytest>=9.0.3", "ruff>=0.15.10", "bandit>=1.9.4", @@ -54,6 +61,8 @@ dev = [ "psycopg2-binary>=2.9.11", "paho-mqtt>=2.1.0", "pymongo>=4.16.0", + "locust>=2.29", + "gevent>=24.0", ] [project.scripts] @@ -62,11 +71,13 @@ decnet = "decnet.cli:app" [tool.pytest.ini_options] asyncio_mode = "auto" asyncio_debug = "true" -addopts = "-m 'not fuzz and not live' -v -q -x -n logical --dist loadscope" +addopts = "-m 'not fuzz and not live and not stress and not bench' -v -q -x -n logical --dist loadscope" markers = [ "fuzz: hypothesis-based fuzz tests (slow, run with -m fuzz or -m '' for all)", "live: live subprocess service tests (run with -m live)", "live_docker: live Docker container tests (requires DECNET_LIVE_DOCKER=1)", + "stress: locust-based stress tests (run with -m stress)", + "bench: pytest-benchmark micro-benchmarks (run with -m bench)", ] filterwarnings = [ "ignore::pytest.PytestUnhandledThreadExceptionWarning", diff --git a/scripts/profile/cprofile-cli.sh b/scripts/profile/cprofile-cli.sh new file mode 100755 index 0000000..445fb68 --- /dev/null +++ b/scripts/profile/cprofile-cli.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Run a `decnet` subcommand under cProfile and write a .prof file for snakeviz. +# Usage: scripts/profile/cprofile-cli.sh services +# scripts/profile/cprofile-cli.sh status +set -euo pipefail + +if [[ $# -lt 1 ]]; then + echo "Usage: $0 [args...]" >&2 + exit 1 +fi + +OUT="${OUT:-profiles/cprofile-$(date +%s).prof}" +mkdir -p "$(dirname "$OUT")" + +python -m cProfile -o "${OUT}" -m decnet.cli "$@" +echo "Wrote ${OUT}" +echo "View with: snakeviz ${OUT}" diff --git a/scripts/profile/memray-api.sh b/scripts/profile/memray-api.sh new file mode 100755 index 0000000..8bc3b9f --- /dev/null +++ b/scripts/profile/memray-api.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Run the DECNET API under memray to capture an allocation profile. +# Stop with Ctrl-C; then render with `memray flamegraph `. +set -euo pipefail + +HOST="${DECNET_API_HOST:-127.0.0.1}" +PORT="${DECNET_API_PORT:-8000}" +OUT="${OUT:-profiles/memray-$(date +%s).bin}" +mkdir -p "$(dirname "$OUT")" + +echo "Starting uvicorn under memray -> ${OUT}" +python -m memray run -o "${OUT}" -m uvicorn decnet.web.api:app \ + --host "${HOST}" --port "${PORT}" --log-level warning + +echo "Render with: memray flamegraph ${OUT}" diff --git a/scripts/profile/pyspy-attach.sh b/scripts/profile/pyspy-attach.sh new file mode 100755 index 0000000..5a61e42 --- /dev/null +++ b/scripts/profile/pyspy-attach.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Attach py-spy to the running DECNET uvicorn worker(s) and record a flamegraph. +# Requires sudo on Linux because of kernel.yama.ptrace_scope=1 by default. +set -euo pipefail + +DURATION="${DURATION:-30}" +OUT="${OUT:-profiles/pyspy-$(date +%s).svg}" +mkdir -p "$(dirname "$OUT")" + +PID="$(pgrep -f 'uvicorn decnet.web.api' | head -n 1 || true)" +if [[ -z "${PID}" ]]; then + echo "No uvicorn worker found. Start the API first (e.g. 'decnet deploy ...')." >&2 + exit 1 +fi + +echo "Attaching py-spy to PID ${PID} for ${DURATION}s -> ${OUT}" +sudo py-spy record -o "${OUT}" -p "${PID}" -d "${DURATION}" --subprocesses +echo "Wrote ${OUT}" diff --git a/tests/perf/README.md b/tests/perf/README.md new file mode 100644 index 0000000..38310c4 --- /dev/null +++ b/tests/perf/README.md @@ -0,0 +1,69 @@ +# DECNET Profiling + +Five complementary lenses. Pick whichever answers the question you have. + +## 1. Whole-process sampling — py-spy + +Attach to a running API and record a flamegraph for 30s. Requires `sudo` +(Linux ptrace scope). + +```bash +./scripts/profile/pyspy-attach.sh # auto-finds uvicorn pid +sudo py-spy record -o profile.svg -p -d 30 --subprocesses +``` + +If py-spy "doesn't work", it is almost always one of: +- Attached to the Typer CLI PID, not the uvicorn worker PID (use `pgrep -f 'uvicorn decnet.web.api'`). +- `kernel.yama.ptrace_scope=1` — run with `sudo` or `sudo sysctl kernel.yama.ptrace_scope=0`. +- The API isn't actually running (a `--dry-run` deploy starts nothing). + +## 2. Per-request flamegraphs — Pyinstrument + +Set the env flag, hit endpoints, find HTML flamegraphs under `./profiles/`. + +```bash +DECNET_PROFILE_REQUESTS=true decnet deploy --mode unihost --deckies 1 +# in another shell: +curl http://127.0.0.1:8000/api/v1/health +open profiles/*.html +``` + +Off by default — zero overhead when the flag is unset. + +## 3. Deterministic call graph — cProfile + snakeviz + +For one-shot profiling of CLI commands or scripts. + +```bash +./scripts/profile/cprofile-cli.sh services # profiles `decnet services` +snakeviz profiles/cprofile.prof +``` + +## 4. Micro-benchmarks — pytest-benchmark + +Regression-gate repository hot paths. + +```bash +pytest -m bench tests/perf/ -n0 # SQLite backend (default) +DECNET_DB_TYPE=mysql pytest -m bench tests/perf/ -n0 +``` + +Note: `-n0` disables xdist. `pytest-benchmark` refuses to measure under +parallel workers, which is the project default (`-n logical --dist loadscope`). + +## 5. Memory allocation — memray + +Hunt leaks and allocation hot spots in the API / workers. + +```bash +./scripts/profile/memray-api.sh # runs uvicorn under memray +memray flamegraph profiles/memray.bin +``` + +## Load generation + +Pair any of the in-process lenses (2, 5) with Locust for realistic traffic: + +```bash +pytest -m stress tests/stress/ +``` diff --git a/tests/perf/__init__.py b/tests/perf/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py new file mode 100644 index 0000000..da0d374 --- /dev/null +++ b/tests/perf/conftest.py @@ -0,0 +1,36 @@ +import asyncio +import pytest + +from decnet.web.db.factory import get_repository + + +@pytest.fixture(scope="session") +def event_loop(): + loop = asyncio.new_event_loop() + yield loop + loop.close() + + +@pytest.fixture(scope="session") +def repo(tmp_path_factory, event_loop): + path = tmp_path_factory.mktemp("perf") / "bench.db" + r = get_repository(db_path=str(path)) + event_loop.run_until_complete(r.initialize()) + return r + + +@pytest.fixture(scope="session") +def seeded_repo(repo, event_loop): + async def _seed(): + for i in range(1000): + await repo.add_log({ + "decky": f"decky-{i % 10:02d}", + "service": ["ssh", "ftp", "smb", "rdp"][i % 4], + "event_type": "connect", + "attacker_ip": f"10.0.{i // 256}.{i % 256}", + "raw_line": f"event {i}", + "fields": "{}", + "msg": "", + }) + event_loop.run_until_complete(_seed()) + return repo diff --git a/tests/perf/test_repo_bench.py b/tests/perf/test_repo_bench.py new file mode 100644 index 0000000..cc8c30f --- /dev/null +++ b/tests/perf/test_repo_bench.py @@ -0,0 +1,60 @@ +""" +Micro-benchmarks for the repository hot paths. + +Run with: + pytest -m bench tests/perf/ + +These do NOT run in the default suite (see `addopts` in pyproject.toml). +""" +import pytest + + +pytestmark = pytest.mark.bench + + +def test_add_log_bench(benchmark, repo, event_loop): + payload = { + "decky": "decky-bench", + "service": "ssh", + "event_type": "connect", + "attacker_ip": "10.0.0.1", + "raw_line": "bench event", + "fields": "{}", + "msg": "", + } + + def run(): + event_loop.run_until_complete(repo.add_log(payload)) + + benchmark(run) + + +def test_get_logs_bench(benchmark, seeded_repo, event_loop): + def run(): + return event_loop.run_until_complete(seeded_repo.get_logs(limit=50, offset=0)) + + result = benchmark(run) + assert len(result) == 50 + + +def test_get_total_logs_bench(benchmark, seeded_repo, event_loop): + def run(): + return event_loop.run_until_complete(seeded_repo.get_total_logs()) + + benchmark(run) + + +def test_get_logs_search_bench(benchmark, seeded_repo, event_loop): + def run(): + return event_loop.run_until_complete( + seeded_repo.get_logs(limit=50, offset=0, search="service:ssh") + ) + + benchmark(run) + + +def test_get_user_by_username_bench(benchmark, seeded_repo, event_loop): + def run(): + return event_loop.run_until_complete(seeded_repo.get_user_by_username("admin")) + + benchmark(run)