merge testing->tomerge/main #7
@@ -59,6 +59,12 @@ DECNET_SYSTEM_LOGS: str = os.environ.get("DECNET_SYSTEM_LOGS", "decnet.system.lo
|
|||||||
# which causes events to be skipped or processed twice.
|
# which causes events to be skipped or processed twice.
|
||||||
DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true"
|
DECNET_EMBED_PROFILER: bool = os.environ.get("DECNET_EMBED_PROFILER", "").lower() == "true"
|
||||||
|
|
||||||
|
# Set to "true" to mount the Pyinstrument ASGI middleware on the FastAPI app.
|
||||||
|
# Produces per-request HTML flamegraphs under ./profiles/. Off by default so
|
||||||
|
# production and normal dev runs pay zero profiling overhead.
|
||||||
|
DECNET_PROFILE_REQUESTS: bool = os.environ.get("DECNET_PROFILE_REQUESTS", "").lower() == "true"
|
||||||
|
DECNET_PROFILE_DIR: str = os.environ.get("DECNET_PROFILE_DIR", "profiles")
|
||||||
|
|
||||||
# API Options
|
# API Options
|
||||||
DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "127.0.0.1")
|
DECNET_API_HOST: str = os.environ.get("DECNET_API_HOST", "127.0.0.1")
|
||||||
DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
|
DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
|
||||||
|
|||||||
@@ -9,7 +9,14 @@ from fastapi.responses import JSONResponse
|
|||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
from decnet.env import DECNET_CORS_ORIGINS, DECNET_DEVELOPER, DECNET_EMBED_PROFILER, DECNET_INGEST_LOG_FILE
|
from decnet.env import (
|
||||||
|
DECNET_CORS_ORIGINS,
|
||||||
|
DECNET_DEVELOPER,
|
||||||
|
DECNET_EMBED_PROFILER,
|
||||||
|
DECNET_INGEST_LOG_FILE,
|
||||||
|
DECNET_PROFILE_DIR,
|
||||||
|
DECNET_PROFILE_REQUESTS,
|
||||||
|
)
|
||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
from decnet.web.dependencies import repo
|
from decnet.web.dependencies import repo
|
||||||
from decnet.collector import log_collector_worker
|
from decnet.collector import log_collector_worker
|
||||||
@@ -38,6 +45,16 @@ def get_background_tasks() -> dict[str, Optional[asyncio.Task[Any]]]:
|
|||||||
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
||||||
global ingestion_task, collector_task, attacker_task, sniffer_task
|
global ingestion_task, collector_task, attacker_task, sniffer_task
|
||||||
|
|
||||||
|
import resource
|
||||||
|
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
|
||||||
|
if soft < 4096:
|
||||||
|
log.warning(
|
||||||
|
"Low open-file limit detected (ulimit -n = %d). "
|
||||||
|
"High-traffic deployments may hit 'Too many open files' errors. "
|
||||||
|
"Raise it with: ulimit -n 65536 (session) or LimitNOFILE=65536 (systemd)",
|
||||||
|
soft,
|
||||||
|
)
|
||||||
|
|
||||||
log.info("API startup initialising database")
|
log.info("API startup initialising database")
|
||||||
for attempt in range(1, 6):
|
for attempt in range(1, 6):
|
||||||
try:
|
try:
|
||||||
@@ -125,6 +142,31 @@ app.add_middleware(
|
|||||||
allow_headers=["Authorization", "Content-Type", "Last-Event-ID"],
|
allow_headers=["Authorization", "Content-Type", "Last-Event-ID"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if DECNET_PROFILE_REQUESTS:
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from pyinstrument import Profiler
|
||||||
|
from starlette.middleware.base import BaseHTTPMiddleware
|
||||||
|
|
||||||
|
_profile_dir = Path(DECNET_PROFILE_DIR)
|
||||||
|
_profile_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
class PyinstrumentMiddleware(BaseHTTPMiddleware):
|
||||||
|
async def dispatch(self, request: Request, call_next):
|
||||||
|
profiler = Profiler(async_mode="enabled")
|
||||||
|
profiler.start()
|
||||||
|
try:
|
||||||
|
response = await call_next(request)
|
||||||
|
finally:
|
||||||
|
profiler.stop()
|
||||||
|
slug = request.url.path.strip("/").replace("/", "_") or "root"
|
||||||
|
out = _profile_dir / f"{int(time.time() * 1000)}-{request.method}-{slug}.html"
|
||||||
|
out.write_text(profiler.output_html())
|
||||||
|
return response
|
||||||
|
|
||||||
|
app.add_middleware(PyinstrumentMiddleware)
|
||||||
|
log.info("Pyinstrument middleware mounted — flamegraphs -> %s", _profile_dir)
|
||||||
|
|
||||||
# Include the modular API router
|
# Include the modular API router
|
||||||
app.include_router(api_router, prefix="/api/v1")
|
app.include_router(api_router, prefix="/api/v1")
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ dependencies = [
|
|||||||
"fastapi>=0.110.0",
|
"fastapi>=0.110.0",
|
||||||
"uvicorn>=0.29.0",
|
"uvicorn>=0.29.0",
|
||||||
"aiosqlite>=0.20.0",
|
"aiosqlite>=0.20.0",
|
||||||
"aiomysql>=0.2.0",
|
"asyncmy>=0.2.9",
|
||||||
"PyJWT>=2.8.0",
|
"PyJWT>=2.8.0",
|
||||||
"bcrypt>=4.1.0",
|
"bcrypt>=4.1.0",
|
||||||
"psutil>=5.9.0",
|
"psutil>=5.9.0",
|
||||||
@@ -32,8 +32,15 @@ tracing = [
|
|||||||
"opentelemetry-exporter-otlp>=1.20.0",
|
"opentelemetry-exporter-otlp>=1.20.0",
|
||||||
"opentelemetry-instrumentation-fastapi>=0.41b0",
|
"opentelemetry-instrumentation-fastapi>=0.41b0",
|
||||||
]
|
]
|
||||||
|
profile = [
|
||||||
|
"py-spy>=0.4.1",
|
||||||
|
"pyinstrument>=4.7",
|
||||||
|
"pytest-benchmark>=4.0",
|
||||||
|
"memray>=1.14 ; sys_platform == 'linux'",
|
||||||
|
"snakeviz>=2.2",
|
||||||
|
]
|
||||||
dev = [
|
dev = [
|
||||||
"decnet[tracing]",
|
"decnet[tracing,profile]",
|
||||||
"pytest>=9.0.3",
|
"pytest>=9.0.3",
|
||||||
"ruff>=0.15.10",
|
"ruff>=0.15.10",
|
||||||
"bandit>=1.9.4",
|
"bandit>=1.9.4",
|
||||||
@@ -54,6 +61,8 @@ dev = [
|
|||||||
"psycopg2-binary>=2.9.11",
|
"psycopg2-binary>=2.9.11",
|
||||||
"paho-mqtt>=2.1.0",
|
"paho-mqtt>=2.1.0",
|
||||||
"pymongo>=4.16.0",
|
"pymongo>=4.16.0",
|
||||||
|
"locust>=2.29",
|
||||||
|
"gevent>=24.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
@@ -62,11 +71,13 @@ decnet = "decnet.cli:app"
|
|||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
asyncio_mode = "auto"
|
asyncio_mode = "auto"
|
||||||
asyncio_debug = "true"
|
asyncio_debug = "true"
|
||||||
addopts = "-m 'not fuzz and not live' -v -q -x -n logical --dist loadscope"
|
addopts = "-m 'not fuzz and not live and not stress and not bench' -v -q -x -n logical --dist loadscope"
|
||||||
markers = [
|
markers = [
|
||||||
"fuzz: hypothesis-based fuzz tests (slow, run with -m fuzz or -m '' for all)",
|
"fuzz: hypothesis-based fuzz tests (slow, run with -m fuzz or -m '' for all)",
|
||||||
"live: live subprocess service tests (run with -m live)",
|
"live: live subprocess service tests (run with -m live)",
|
||||||
"live_docker: live Docker container tests (requires DECNET_LIVE_DOCKER=1)",
|
"live_docker: live Docker container tests (requires DECNET_LIVE_DOCKER=1)",
|
||||||
|
"stress: locust-based stress tests (run with -m stress)",
|
||||||
|
"bench: pytest-benchmark micro-benchmarks (run with -m bench)",
|
||||||
]
|
]
|
||||||
filterwarnings = [
|
filterwarnings = [
|
||||||
"ignore::pytest.PytestUnhandledThreadExceptionWarning",
|
"ignore::pytest.PytestUnhandledThreadExceptionWarning",
|
||||||
|
|||||||
17
scripts/profile/cprofile-cli.sh
Executable file
17
scripts/profile/cprofile-cli.sh
Executable file
@@ -0,0 +1,17 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Run a `decnet` subcommand under cProfile and write a .prof file for snakeviz.
|
||||||
|
# Usage: scripts/profile/cprofile-cli.sh services
|
||||||
|
# scripts/profile/cprofile-cli.sh status
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
if [[ $# -lt 1 ]]; then
|
||||||
|
echo "Usage: $0 <decnet-subcommand> [args...]" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
OUT="${OUT:-profiles/cprofile-$(date +%s).prof}"
|
||||||
|
mkdir -p "$(dirname "$OUT")"
|
||||||
|
|
||||||
|
python -m cProfile -o "${OUT}" -m decnet.cli "$@"
|
||||||
|
echo "Wrote ${OUT}"
|
||||||
|
echo "View with: snakeviz ${OUT}"
|
||||||
15
scripts/profile/memray-api.sh
Executable file
15
scripts/profile/memray-api.sh
Executable file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Run the DECNET API under memray to capture an allocation profile.
|
||||||
|
# Stop with Ctrl-C; then render with `memray flamegraph <bin>`.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
HOST="${DECNET_API_HOST:-127.0.0.1}"
|
||||||
|
PORT="${DECNET_API_PORT:-8000}"
|
||||||
|
OUT="${OUT:-profiles/memray-$(date +%s).bin}"
|
||||||
|
mkdir -p "$(dirname "$OUT")"
|
||||||
|
|
||||||
|
echo "Starting uvicorn under memray -> ${OUT}"
|
||||||
|
python -m memray run -o "${OUT}" -m uvicorn decnet.web.api:app \
|
||||||
|
--host "${HOST}" --port "${PORT}" --log-level warning
|
||||||
|
|
||||||
|
echo "Render with: memray flamegraph ${OUT}"
|
||||||
18
scripts/profile/pyspy-attach.sh
Executable file
18
scripts/profile/pyspy-attach.sh
Executable file
@@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Attach py-spy to the running DECNET uvicorn worker(s) and record a flamegraph.
|
||||||
|
# Requires sudo on Linux because of kernel.yama.ptrace_scope=1 by default.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
DURATION="${DURATION:-30}"
|
||||||
|
OUT="${OUT:-profiles/pyspy-$(date +%s).svg}"
|
||||||
|
mkdir -p "$(dirname "$OUT")"
|
||||||
|
|
||||||
|
PID="$(pgrep -f 'uvicorn decnet.web.api' | head -n 1 || true)"
|
||||||
|
if [[ -z "${PID}" ]]; then
|
||||||
|
echo "No uvicorn worker found. Start the API first (e.g. 'decnet deploy ...')." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Attaching py-spy to PID ${PID} for ${DURATION}s -> ${OUT}"
|
||||||
|
sudo py-spy record -o "${OUT}" -p "${PID}" -d "${DURATION}" --subprocesses
|
||||||
|
echo "Wrote ${OUT}"
|
||||||
69
tests/perf/README.md
Normal file
69
tests/perf/README.md
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
# DECNET Profiling
|
||||||
|
|
||||||
|
Five complementary lenses. Pick whichever answers the question you have.
|
||||||
|
|
||||||
|
## 1. Whole-process sampling — py-spy
|
||||||
|
|
||||||
|
Attach to a running API and record a flamegraph for 30s. Requires `sudo`
|
||||||
|
(Linux ptrace scope).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/profile/pyspy-attach.sh # auto-finds uvicorn pid
|
||||||
|
sudo py-spy record -o profile.svg -p <PID> -d 30 --subprocesses
|
||||||
|
```
|
||||||
|
|
||||||
|
If py-spy "doesn't work", it is almost always one of:
|
||||||
|
- Attached to the Typer CLI PID, not the uvicorn worker PID (use `pgrep -f 'uvicorn decnet.web.api'`).
|
||||||
|
- `kernel.yama.ptrace_scope=1` — run with `sudo` or `sudo sysctl kernel.yama.ptrace_scope=0`.
|
||||||
|
- The API isn't actually running (a `--dry-run` deploy starts nothing).
|
||||||
|
|
||||||
|
## 2. Per-request flamegraphs — Pyinstrument
|
||||||
|
|
||||||
|
Set the env flag, hit endpoints, find HTML flamegraphs under `./profiles/`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
DECNET_PROFILE_REQUESTS=true decnet deploy --mode unihost --deckies 1
|
||||||
|
# in another shell:
|
||||||
|
curl http://127.0.0.1:8000/api/v1/health
|
||||||
|
open profiles/*.html
|
||||||
|
```
|
||||||
|
|
||||||
|
Off by default — zero overhead when the flag is unset.
|
||||||
|
|
||||||
|
## 3. Deterministic call graph — cProfile + snakeviz
|
||||||
|
|
||||||
|
For one-shot profiling of CLI commands or scripts.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/profile/cprofile-cli.sh services # profiles `decnet services`
|
||||||
|
snakeviz profiles/cprofile.prof
|
||||||
|
```
|
||||||
|
|
||||||
|
## 4. Micro-benchmarks — pytest-benchmark
|
||||||
|
|
||||||
|
Regression-gate repository hot paths.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest -m bench tests/perf/ -n0 # SQLite backend (default)
|
||||||
|
DECNET_DB_TYPE=mysql pytest -m bench tests/perf/ -n0
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: `-n0` disables xdist. `pytest-benchmark` refuses to measure under
|
||||||
|
parallel workers, which is the project default (`-n logical --dist loadscope`).
|
||||||
|
|
||||||
|
## 5. Memory allocation — memray
|
||||||
|
|
||||||
|
Hunt leaks and allocation hot spots in the API / workers.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/profile/memray-api.sh # runs uvicorn under memray
|
||||||
|
memray flamegraph profiles/memray.bin
|
||||||
|
```
|
||||||
|
|
||||||
|
## Load generation
|
||||||
|
|
||||||
|
Pair any of the in-process lenses (2, 5) with Locust for realistic traffic:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest -m stress tests/stress/
|
||||||
|
```
|
||||||
0
tests/perf/__init__.py
Normal file
0
tests/perf/__init__.py
Normal file
36
tests/perf/conftest.py
Normal file
36
tests/perf/conftest.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
import asyncio
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from decnet.web.db.factory import get_repository
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def event_loop():
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
yield loop
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def repo(tmp_path_factory, event_loop):
|
||||||
|
path = tmp_path_factory.mktemp("perf") / "bench.db"
|
||||||
|
r = get_repository(db_path=str(path))
|
||||||
|
event_loop.run_until_complete(r.initialize())
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def seeded_repo(repo, event_loop):
|
||||||
|
async def _seed():
|
||||||
|
for i in range(1000):
|
||||||
|
await repo.add_log({
|
||||||
|
"decky": f"decky-{i % 10:02d}",
|
||||||
|
"service": ["ssh", "ftp", "smb", "rdp"][i % 4],
|
||||||
|
"event_type": "connect",
|
||||||
|
"attacker_ip": f"10.0.{i // 256}.{i % 256}",
|
||||||
|
"raw_line": f"event {i}",
|
||||||
|
"fields": "{}",
|
||||||
|
"msg": "",
|
||||||
|
})
|
||||||
|
event_loop.run_until_complete(_seed())
|
||||||
|
return repo
|
||||||
60
tests/perf/test_repo_bench.py
Normal file
60
tests/perf/test_repo_bench.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
"""
|
||||||
|
Micro-benchmarks for the repository hot paths.
|
||||||
|
|
||||||
|
Run with:
|
||||||
|
pytest -m bench tests/perf/
|
||||||
|
|
||||||
|
These do NOT run in the default suite (see `addopts` in pyproject.toml).
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.bench
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_log_bench(benchmark, repo, event_loop):
|
||||||
|
payload = {
|
||||||
|
"decky": "decky-bench",
|
||||||
|
"service": "ssh",
|
||||||
|
"event_type": "connect",
|
||||||
|
"attacker_ip": "10.0.0.1",
|
||||||
|
"raw_line": "bench event",
|
||||||
|
"fields": "{}",
|
||||||
|
"msg": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
def run():
|
||||||
|
event_loop.run_until_complete(repo.add_log(payload))
|
||||||
|
|
||||||
|
benchmark(run)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_logs_bench(benchmark, seeded_repo, event_loop):
|
||||||
|
def run():
|
||||||
|
return event_loop.run_until_complete(seeded_repo.get_logs(limit=50, offset=0))
|
||||||
|
|
||||||
|
result = benchmark(run)
|
||||||
|
assert len(result) == 50
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_total_logs_bench(benchmark, seeded_repo, event_loop):
|
||||||
|
def run():
|
||||||
|
return event_loop.run_until_complete(seeded_repo.get_total_logs())
|
||||||
|
|
||||||
|
benchmark(run)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_logs_search_bench(benchmark, seeded_repo, event_loop):
|
||||||
|
def run():
|
||||||
|
return event_loop.run_until_complete(
|
||||||
|
seeded_repo.get_logs(limit=50, offset=0, search="service:ssh")
|
||||||
|
)
|
||||||
|
|
||||||
|
benchmark(run)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_user_by_username_bench(benchmark, seeded_repo, event_loop):
|
||||||
|
def run():
|
||||||
|
return event_loop.run_until_complete(seeded_repo.get_user_by_username("admin"))
|
||||||
|
|
||||||
|
benchmark(run)
|
||||||
Reference in New Issue
Block a user