10 changed files with 278 additions and 4 deletions
--- a/decnet/env.py
+++ b/decnet/env.py
@@ -59,6 +59,12 @@ DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.lo
 # which causes events to be skipped or processed twice.
 DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true"
 # Set to "true" to mount the Pyinstrument ASGI middleware on the FastAPI app.
 # Produces per-request HTML flamegraphs under ./profiles/. Off by default so
 # production and normal dev runs pay zero profiling overhead.
 DECNET_PROFILE_REQUESTS: bool = os.environ.get("DECNET_PROFILE_REQUESTS", "").lower() == "true"
 DECNET_PROFILE_DIR: str = os.environ.get("DECNET_PROFILE_DIR", "profiles")
 # API Options
 DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "127.0.0.1")
 DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
--- a/decnet/web/api.py
+++ b/decnet/web/api.py
@@ -9,7 +9,14 @@ from fastapi.responses import JSONResponse
 from pydantic import ValidationError
 from fastapi.middleware.cors import CORSMiddleware
-from decnet.env import DECNET_CORS_ORIGINS, DECNET_DEVELOPER, DECNET_EMBED_PROFILER, DECNET_INGEST_LOG_FILE
+from decnet.env import (
    DECNET_CORS_ORIGINS,
    DECNET_DEVELOPER,
    DECNET_EMBED_PROFILER,
    DECNET_INGEST_LOG_FILE,
    DECNET_PROFILE_DIR,
    DECNET_PROFILE_REQUESTS,
 )
 from decnet.logging import get_logger
 from decnet.web.dependencies import repo
 from decnet.collector import log_collector_worker
@@ -38,6 +45,16 @@ def get_background_tasks() -> dict[str, Optional[asyncio.Task[Any]]]:
 async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
    global ingestion_task, collector_task, attacker_task, sniffer_task
    import resource
    soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
    if soft < 4096:
        log.warning(
            "Low open-file limit detected (ulimit -n = %d). "
            "High-traffic deployments may hit 'Too many open files' errors. "
            "Raise it with: ulimit -n 65536 (session) or LimitNOFILE=65536 (systemd)",
            soft,
        )
    log.info("API startup initialising database")
    for attempt in range(1, 6):
        try:
@@ -125,6 +142,31 @@ app.add_middleware(
    allow_headers=["Authorization", "Content-Type", "Last-Event-ID"],
 )
 if DECNET_PROFILE_REQUESTS:
    import time
    from pathlib import Path
    from pyinstrument import Profiler
    from starlette.middleware.base import BaseHTTPMiddleware
    _profile_dir = Path(DECNET_PROFILE_DIR)
    _profile_dir.mkdir(parents=True, exist_ok=True)
    class PyinstrumentMiddleware(BaseHTTPMiddleware):
        async def dispatch(self, request: Request, call_next):
            profiler = Profiler(async_mode="enabled")
            profiler.start()
            try:
                response = await call_next(request)
            finally:
                profiler.stop()
            slug = request.url.path.strip("/").replace("/", "_") or "root"
            out = _profile_dir / f"{int(time.time() * 1000)}-{request.method}-{slug}.html"
            out.write_text(profiler.output_html())
            return response
    app.add_middleware(PyinstrumentMiddleware)
    log.info("Pyinstrument middleware mounted — flamegraphs -> %s", _profile_dir)
 # Include the modular API router
 app.include_router(api_router, prefix="/api/v1")
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,7 @@ dependencies = [
    "fastapi>=0.110.0",
    "uvicorn>=0.29.0",
    "aiosqlite>=0.20.0",
-    "aiomysql>=0.2.0",
+    "asyncmy>=0.2.9",
    "PyJWT>=2.8.0",
    "bcrypt>=4.1.0",
    "psutil>=5.9.0",
@@ -32,8 +32,15 @@ tracing = [
    "opentelemetry-exporter-otlp>=1.20.0",
    "opentelemetry-instrumentation-fastapi>=0.41b0",
 ]
 profile = [
    "py-spy>=0.4.1",
    "pyinstrument>=4.7",
    "pytest-benchmark>=4.0",
    "memray>=1.14 ; sys_platform == 'linux'",
    "snakeviz>=2.2",
 ]
 dev = [
-    "decnet[tracing]",
+    "decnet[tracing,profile]",
    "pytest>=9.0.3",
    "ruff>=0.15.10",
    "bandit>=1.9.4",
@@ -54,6 +61,8 @@ dev = [
    "psycopg2-binary>=2.9.11",
    "paho-mqtt>=2.1.0",
    "pymongo>=4.16.0",
    "locust>=2.29",
    "gevent>=24.0",
 ]
 [project.scripts]
@@ -62,11 +71,13 @@ decnet = "decnet.cli:app"
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
 asyncio_debug = "true"
-addopts = "-m 'not fuzz and not live' -v -q -x -n logical --dist loadscope"
+addopts = "-m 'not fuzz and not live and not stress and not bench' -v -q -x -n logical --dist loadscope"
 markers = [
    "fuzz: hypothesis-based fuzz tests (slow, run with -m fuzz or -m '' for all)",
    "live: live subprocess service tests (run with -m live)",
    "live_docker: live Docker container tests (requires DECNET_LIVE_DOCKER=1)",
    "stress: locust-based stress tests (run with -m stress)",
    "bench: pytest-benchmark micro-benchmarks (run with -m bench)",
 ]
 filterwarnings = [
    "ignore::pytest.PytestUnhandledThreadExceptionWarning",
--- a/scripts/profile/cprofile-cli.sh
+++ b/scripts/profile/cprofile-cli.sh
@@ -0,0 +1,17 @@
 #!/usr/bin/env bash
 # Run a `decnet` subcommand under cProfile and write a .prof file for snakeviz.
 # Usage: scripts/profile/cprofile-cli.sh services
 #        scripts/profile/cprofile-cli.sh status
 set -euo pipefail
 if [[ $# -lt 1 ]]; then
    echo "Usage: $0 <decnet-subcommand> [args...]" >&2
    exit 1
 fi
 OUT="${OUT:-profiles/cprofile-$(date +%s).prof}"
 mkdir -p "$(dirname "$OUT")"
 python -m cProfile -o "${OUT}" -m decnet.cli "$@"
 echo "Wrote ${OUT}"
 echo "View with: snakeviz ${OUT}"
--- a/scripts/profile/memray-api.sh
+++ b/scripts/profile/memray-api.sh
@@ -0,0 +1,15 @@
 #!/usr/bin/env bash
 # Run the DECNET API under memray to capture an allocation profile.
 # Stop with Ctrl-C; then render with `memray flamegraph <bin>`.
 set -euo pipefail
 HOST="${DECNET_API_HOST:-127.0.0.1}"
 PORT="${DECNET_API_PORT:-8000}"
 OUT="${OUT:-profiles/memray-$(date +%s).bin}"
 mkdir -p "$(dirname "$OUT")"
 echo "Starting uvicorn under memray -> ${OUT}"
 python -m memray run -o "${OUT}" -m uvicorn decnet.web.api:app \
    --host "${HOST}" --port "${PORT}" --log-level warning
 echo "Render with: memray flamegraph ${OUT}"
--- a/scripts/profile/pyspy-attach.sh
+++ b/scripts/profile/pyspy-attach.sh
@@ -0,0 +1,18 @@
 #!/usr/bin/env bash
 # Attach py-spy to the running DECNET uvicorn worker(s) and record a flamegraph.
 # Requires sudo on Linux because of kernel.yama.ptrace_scope=1 by default.
 set -euo pipefail
 DURATION="${DURATION:-30}"
 OUT="${OUT:-profiles/pyspy-$(date +%s).svg}"
 mkdir -p "$(dirname "$OUT")"
 PID="$(pgrep -f 'uvicorn decnet.web.api' | head -n 1 || true)"
 if [[ -z "${PID}" ]]; then
    echo "No uvicorn worker found. Start the API first (e.g. 'decnet deploy ...')." >&2
    exit 1
 fi
 echo "Attaching py-spy to PID ${PID} for ${DURATION}s -> ${OUT}"
 sudo py-spy record -o "${OUT}" -p "${PID}" -d "${DURATION}" --subprocesses
 echo "Wrote ${OUT}"
--- a/tests/perf/README.md
+++ b/tests/perf/README.md
@@ -0,0 +1,69 @@
 # DECNET Profiling
 Five complementary lenses. Pick whichever answers the question you have.
 ## 1. Whole-process sampling — py-spy
 Attach to a running API and record a flamegraph for 30s. Requires `sudo`
 (Linux ptrace scope).
 ```bash
 ./scripts/profile/pyspy-attach.sh               # auto-finds uvicorn pid
 sudo py-spy record -o profile.svg -p <PID> -d 30 --subprocesses
 ```
 If py-spy "doesn't work", it is almost always one of:
 - Attached to the Typer CLI PID, not the uvicorn worker PID (use `pgrep -f 'uvicorn decnet.web.api'`).
 - `kernel.yama.ptrace_scope=1` — run with `sudo` or `sudo sysctl kernel.yama.ptrace_scope=0`.
 - The API isn't actually running (a `--dry-run` deploy starts nothing).
 ## 2. Per-request flamegraphs — Pyinstrument
 Set the env flag, hit endpoints, find HTML flamegraphs under `./profiles/`.
 ```bash
 DECNET_PROFILE_REQUESTS=true decnet deploy --mode unihost --deckies 1
 # in another shell:
 curl http://127.0.0.1:8000/api/v1/health
 open profiles/*.html
 ```
 Off by default — zero overhead when the flag is unset.
 ## 3. Deterministic call graph — cProfile + snakeviz
 For one-shot profiling of CLI commands or scripts.
 ```bash
 ./scripts/profile/cprofile-cli.sh services      # profiles `decnet services`
 snakeviz profiles/cprofile.prof
 ```
 ## 4. Micro-benchmarks — pytest-benchmark
 Regression-gate repository hot paths.
 ```bash
 pytest -m bench tests/perf/ -n0                 # SQLite backend (default)
 DECNET_DB_TYPE=mysql pytest -m bench tests/perf/ -n0
 ```
 Note: `-n0` disables xdist. `pytest-benchmark` refuses to measure under
 parallel workers, which is the project default (`-n logical --dist loadscope`).
 ## 5. Memory allocation — memray
 Hunt leaks and allocation hot spots in the API / workers.
 ```bash
 ./scripts/profile/memray-api.sh                 # runs uvicorn under memray
 memray flamegraph profiles/memray.bin
 ```
 ## Load generation
 Pair any of the in-process lenses (2, 5) with Locust for realistic traffic:
 ```bash
 pytest -m stress tests/stress/
 ```
--- a/tests/perf/init.py
+++ b/tests/perf/init.py
--- a/tests/perf/conftest.py
+++ b/tests/perf/conftest.py
@@ -0,0 +1,36 @@
 import asyncio
 import pytest
 from decnet.web.db.factory import get_repository
@pytest.fixture(scope="session")
 def event_loop():
    loop = asyncio.new_event_loop()
    yield loop
    loop.close()
@pytest.fixture(scope="session")
 def repo(tmp_path_factory, event_loop):
    path = tmp_path_factory.mktemp("perf") / "bench.db"
    r = get_repository(db_path=str(path))
    event_loop.run_until_complete(r.initialize())
    return r
@pytest.fixture(scope="session")
 def seeded_repo(repo, event_loop):
    async def _seed():
        for i in range(1000):
            await repo.add_log({
                "decky": f"decky-{i % 10:02d}",
                "service": ["ssh", "ftp", "smb", "rdp"][i % 4],
                "event_type": "connect",
                "attacker_ip": f"10.0.{i // 256}.{i % 256}",
                "raw_line": f"event {i}",
                "fields": "{}",
                "msg": "",
            })
    event_loop.run_until_complete(_seed())
    return repo
--- a/tests/perf/test_repo_bench.py
+++ b/tests/perf/test_repo_bench.py
@@ -0,0 +1,60 @@
 """
 Micro-benchmarks for the repository hot paths.
 Run with:
    pytest -m bench tests/perf/
 These do NOT run in the default suite (see `addopts` in pyproject.toml).
 """
 import pytest
 pytestmark = pytest.mark.bench
 def test_add_log_bench(benchmark, repo, event_loop):
    payload = {
        "decky": "decky-bench",
        "service": "ssh",
        "event_type": "connect",
        "attacker_ip": "10.0.0.1",
        "raw_line": "bench event",
        "fields": "{}",
        "msg": "",
    }
    def run():
        event_loop.run_until_complete(repo.add_log(payload))
    benchmark(run)
 def test_get_logs_bench(benchmark, seeded_repo, event_loop):
    def run():
        return event_loop.run_until_complete(seeded_repo.get_logs(limit=50, offset=0))
    result = benchmark(run)
    assert len(result) == 50
 def test_get_total_logs_bench(benchmark, seeded_repo, event_loop):
    def run():
        return event_loop.run_until_complete(seeded_repo.get_total_logs())
    benchmark(run)
 def test_get_logs_search_bench(benchmark, seeded_repo, event_loop):
    def run():
        return event_loop.run_until_complete(
            seeded_repo.get_logs(limit=50, offset=0, search="service:ssh")
        )
    benchmark(run)
 def test_get_user_by_username_bench(benchmark, seeded_repo, event_loop):
    def run():
        return event_loop.run_until_complete(seeded_repo.get_user_by_username("admin"))
    benchmark(run)