merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

View File

@@ -0,0 +1,192 @@
#!/usr/bin/env python3
"""
Aggregate pyinstrument request profiles from ./profiles/*.html.
The PyinstrumentMiddleware writes one HTML per request. After a Locust run
there are hundreds of them — reading one by one is useless. This rolls
everything up into two views:
1. Per-endpoint summary (count, mean/p50/p95/max wall-time)
2. Top hot functions by cumulative self-time across ALL requests
Usage:
scripts/profile/aggregate_requests.py # ./profiles/
scripts/profile/aggregate_requests.py --dir PATH
scripts/profile/aggregate_requests.py --top 30 # show top 30 funcs
scripts/profile/aggregate_requests.py --endpoint login # filter
Self-time of a frame = frame.time - sum(child.time) — i.e. time spent
executing the function's own code, excluding descendants. That's the
right signal for "where is the CPU actually going".
"""
from __future__ import annotations
import argparse
import json
import re
import statistics
from collections import defaultdict
from pathlib import Path
_FILENAME_RE = re.compile(r"^(?P<ts>\d+)-(?P<method>[A-Z]+)-(?P<slug>.+)\.html$")
_SESSION_RE = re.compile(r"const sessionData = (\{.*?\});\s*\n\s*pyinstrumentHTMLRenderer", re.DOTALL)
def load_session(path: Path) -> tuple[dict, dict] | None:
"""Return (session_summary, frame_tree_root) or None."""
try:
text = path.read_text()
except OSError:
return None
m = _SESSION_RE.search(text)
if not m:
return None
try:
payload = json.loads(m.group(1))
return payload["session"], payload["frame_tree"]
except (json.JSONDecodeError, KeyError):
return None
_SYNTHETIC = {"[self]", "[await]"}
def _is_synthetic(identifier: str) -> bool:
"""Pyinstrument leaf markers: `[self]` / `[await]` carry no file/line."""
return identifier in _SYNTHETIC or identifier.startswith(("[self]", "[await]"))
def walk_self_time(frame: dict | None, acc: dict[str, float], parent_ident: str | None = None) -> None:
"""
Accumulate self-time by frame identifier.
Pyinstrument attaches `[self]` / `[await]` synthetic leaves for non-sampled
execution time. Rolling them into their parent ("self-time of X" vs. a
global `[self]` bucket) is what gives us actionable per-function hotspots.
"""
if not frame:
return
ident = frame.get("identifier")
if not ident:
return
total = frame.get("time", 0.0)
children = frame.get("children") or []
child_total = sum(c.get("time", 0.0) for c in children)
self_time = total - child_total
if _is_synthetic(ident):
# Reattribute synthetic self-time to the enclosing real function.
key = parent_ident if parent_ident else ident
acc[key] = acc.get(key, 0.0) + total
return
if self_time > 0:
acc[ident] = acc.get(ident, 0.0) + self_time
for c in children:
walk_self_time(c, acc, parent_ident=ident)
def short_ident(identifier: str) -> str:
"""`func\\x00/abs/path.py\\x00LINE` -> `func path.py:LINE`."""
parts = identifier.split("\x00")
if len(parts) == 3:
func, path, line = parts
return f"{func:30s} {Path(path).name}:{line}"
return identifier[:80]
def percentile(values: list[float], p: float) -> float:
if not values:
return 0.0
values = sorted(values)
k = (len(values) - 1) * p
lo, hi = int(k), min(int(k) + 1, len(values) - 1)
if lo == hi:
return values[lo]
return values[lo] + (values[hi] - values[lo]) * (k - lo)
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--dir", default="profiles")
ap.add_argument("--top", type=int, default=20)
ap.add_argument("--endpoint", default=None, help="substring filter on endpoint slug")
args = ap.parse_args()
root = Path(args.dir)
files = sorted(root.glob("*.html"))
if not files:
raise SystemExit(f"no HTMLs in {root}/")
per_endpoint: dict[str, list[float]] = defaultdict(list)
global_self: dict[str, float] = {}
per_endpoint_self: dict[str, dict[str, float]] = defaultdict(dict)
parsed = 0
skipped = 0
for f in files:
m = _FILENAME_RE.match(f.name)
if not m:
skipped += 1
continue
endpoint = f"{m['method']} /{m['slug'].replace('_', '/')}"
if args.endpoint and args.endpoint not in endpoint:
continue
loaded = load_session(f)
if not loaded:
skipped += 1
continue
session, root_frame = loaded
duration = session.get("duration", 0.0)
per_endpoint[endpoint].append(duration)
walk_self_time(root_frame, global_self)
walk_self_time(root_frame, per_endpoint_self[endpoint])
parsed += 1
print(f"parsed: {parsed} skipped: {skipped} from {root}/\n")
print("=" * 100)
print("PER-ENDPOINT WALL-TIME")
print("=" * 100)
print(f"{'endpoint':<55} {'n':>6} {'mean':>9} {'p50':>9} {'p95':>9} {'max':>9}")
print("-" * 100)
rows = sorted(per_endpoint.items(), key=lambda kv: -statistics.mean(kv[1]) * len(kv[1]))
for ep, durations in rows:
print(f"{ep[:55]:<55} {len(durations):>6} "
f"{statistics.mean(durations)*1000:>8.1f}ms "
f"{percentile(durations,0.50)*1000:>8.1f}ms "
f"{percentile(durations,0.95)*1000:>8.1f}ms "
f"{max(durations)*1000:>8.1f}ms")
print()
print("=" * 100)
print(f"TOP {args.top} HOT FUNCTIONS BY CUMULATIVE SELF-TIME (across {parsed} requests)")
print("=" * 100)
total_self = sum(global_self.values()) or 1.0
top = sorted(global_self.items(), key=lambda kv: -kv[1])[: args.top]
print(f"{'fn file:line':<70} {'self':>10} {'share':>8}")
print("-" * 100)
for ident, t in top:
share = t / total_self * 100
print(f"{short_ident(ident):<70} {t*1000:>8.1f}ms {share:>6.1f}%")
print()
print("=" * 100)
print("TOP 3 HOT FUNCTIONS PER ENDPOINT")
print("=" * 100)
for ep in sorted(per_endpoint_self, key=lambda e: -sum(per_endpoint_self[e].values())):
acc = per_endpoint_self[ep]
ep_total = sum(acc.values()) or 1.0
print(f"\n{ep} ({len(per_endpoint[ep])} samples, {ep_total*1000:.0f}ms total self)")
top3 = sorted(acc.items(), key=lambda kv: -kv[1])[:3]
for ident, t in top3:
print(f" {short_ident(ident):<70} {t*1000:>7.1f}ms ({t/ep_total*100:>4.1f}%)")
if __name__ == "__main__":
main()

105
scripts/profile/classify_usage.py Executable file
View File

@@ -0,0 +1,105 @@
#!/usr/bin/env python3
"""
Classify the shape of a memray usage_over_time.csv as plateau, climb,
or climb-and-drop. Operates on the `memory_size_bytes` column.
Usage:
scripts/profile/classify_usage.py profiles/usage_over_time.csv
scripts/profile/classify_usage.py # newest *.csv in ./profiles/
"""
from __future__ import annotations
import csv
import statistics
import sys
from pathlib import Path
def _mb(n: float) -> str:
return f"{n / (1024 * 1024):.1f} MB"
def load(path: Path) -> list[tuple[int, int]]:
with path.open() as f:
rows = list(csv.DictReader(f))
out: list[tuple[int, int]] = []
for r in rows:
try:
out.append((int(r["timestamp"]), int(r["memory_size_bytes"])))
except (KeyError, ValueError):
continue
if not out:
sys.exit(f"no usable rows in {path}")
out.sort(key=lambda t: t[0])
return out
def classify(series: list[tuple[int, int]]) -> None:
mem = [v for _, v in series]
n = len(mem)
peak = max(mem)
peak_idx = mem.index(peak)
# Pre-peak baseline = first 10% of samples.
baseline = statistics.median(mem[: max(1, n // 10)])
# Plateau = last 10% of samples (what we settle to).
plateau = statistics.median(mem[-max(1, n // 10) :])
# "Tail drop" — how much we released after the peak.
tail_drop = peak - plateau
tail_drop_pct = (tail_drop / peak * 100) if peak else 0.0
# "Growth during run" — end vs beginning.
net_growth = plateau - baseline
net_growth_pct = (net_growth / baseline * 100) if baseline else 0.0
# Where is the peak in the timeline?
peak_position = peak_idx / (n - 1) if n > 1 else 0.0
print(f"samples: {n}")
print(f"baseline (first 10%): {_mb(baseline)}")
print(f"peak: {_mb(peak)} at {peak_position:.0%} of run")
print(f"plateau (last 10%): {_mb(plateau)}")
print(f"tail drop: {_mb(tail_drop)} ({tail_drop_pct:+.1f}% vs peak)")
print(f"net growth: {_mb(net_growth)} ({net_growth_pct:+.1f}% vs baseline)")
print()
# Heuristic: the only reliable leak signal without a post-load rest
# period is how much memory was released AFTER the peak. Net-growth-vs-
# cold-start is not useful — an active workload always grows vs. a cold
# interpreter.
#
# Caveat: if the workload was still running when memray stopped,
# "sustained-at-peak" is inconclusive (not necessarily a leak). Re-run
# with a rest period after the scan for a definitive answer.
if tail_drop_pct >= 10:
print("verdict: CLIMB-AND-DROP — memory released after peak.")
print(" → no leak. Profile CPU next (pyinstrument).")
elif tail_drop_pct >= 3:
print("verdict: MOSTLY-RELEASED — partial release after peak.")
print(" → likely healthy; re-run with a rest period after load")
print(" to confirm (memray should capture post-workload idle).")
else:
print("verdict: SUSTAINED-AT-PEAK — memory held near peak at end of capture.")
print(" → AMBIGUOUS: could be a leak, or the workload was still")
print(" running when memray stopped. Re-run with a rest period")
print(" after load, then check: memray flamegraph --leaks <bin>")
def main() -> None:
if len(sys.argv) > 1:
target = Path(sys.argv[1])
else:
profiles = Path("profiles")
csvs = sorted(profiles.glob("*.csv"), key=lambda p: p.stat().st_mtime)
if not csvs:
sys.exit("no CSV found; pass a path or put one in ./profiles/")
target = csvs[-1]
print(f"analyzing {target}\n")
classify(load(target))
if __name__ == "__main__":
main()

17
scripts/profile/cprofile-cli.sh Executable file
View File

@@ -0,0 +1,17 @@
#!/usr/bin/env bash
# Run a `decnet` subcommand under cProfile and write a .prof file for snakeviz.
# Usage: scripts/profile/cprofile-cli.sh services
# scripts/profile/cprofile-cli.sh status
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: $0 <decnet-subcommand> [args...]" >&2
exit 1
fi
OUT="${OUT:-profiles/cprofile-$(date +%s).prof}"
mkdir -p "$(dirname "$OUT")"
python -m cProfile -o "${OUT}" -m decnet.cli "$@"
echo "Wrote ${OUT}"
echo "View with: snakeviz ${OUT}"

16
scripts/profile/memray-api.sh Executable file
View File

@@ -0,0 +1,16 @@
#!/usr/bin/env bash
# Run the DECNET API under memray to capture an allocation profile.
# Stop with Ctrl-C; then render with `memray flamegraph <bin>`.
set -euo pipefail
HOST="${DECNET_API_HOST:-127.0.0.1}"
PORT="${DECNET_API_PORT:-8000}"
OUT="${OUT:-profiles/memray-$(date +%s).bin}"
mkdir -p "$(dirname "$OUT")"
echo "Starting uvicorn under memray -> ${OUT}"
python -m memray run --trace-python-allocators --follow-fork \
-o "${OUT}" -m uvicorn decnet.web.api:app \
--host "${HOST}" --port "${PORT}" --log-level warning
echo "Render with: memray flamegraph ${OUT}"

32
scripts/profile/pyspy-attach.sh Executable file
View File

@@ -0,0 +1,32 @@
#!/usr/bin/env bash
# Attach py-spy to the running DECNET uvicorn worker(s) and record a flamegraph.
# Requires sudo on Linux because of kernel.yama.ptrace_scope=1 by default.
set -euo pipefail
DURATION="${DURATION:-30}"
OUT="${OUT:-profiles/pyspy-$(date +%s).svg}"
mkdir -p "$(dirname "$OUT")"
PID="$(pgrep -f 'uvicorn decnet.web.api' | head -n 1 || true)"
if [[ -z "${PID}" ]]; then
echo "No uvicorn worker found. Start the API first (e.g. 'decnet deploy ...')." >&2
exit 1
fi
PY_VER="$(python -c 'import sys; print(f"{sys.version_info[0]}.{sys.version_info[1]}")')"
if [[ "${PY_VER}" == "3.14" ]] || [[ "${PY_VER}" > "3.14" ]]; then
cat >&2 <<EOF
WARNING: py-spy 0.4.1 (latest on PyPI) does not yet support Python ${PY_VER}.
Attaching will fail with "No python processes found in process <pid>".
Use one of the other lenses for now:
DECNET_PROFILE_REQUESTS=true # pyinstrument, per-request flamegraphs
scripts/profile/memray-api.sh # memory allocation profiling
scripts/profile/cprofile-cli.sh <cmd> # deterministic CLI profiling
Track upstream: https://github.com/benfred/py-spy/releases
EOF
exit 2
fi
echo "Attaching py-spy to PID ${PID} for ${DURATION}s -> ${OUT}"
sudo .venv/bin/py-spy record -o "${OUT}" -p "${PID}" -d "${DURATION}" --subprocesses
echo "Wrote ${OUT}"

69
scripts/profile/view.sh Executable file
View File

@@ -0,0 +1,69 @@
#!/usr/bin/env bash
# Open the newest profile artifact in the right viewer.
#
# Usage:
# scripts/profile/view.sh # newest file in ./profiles/
# scripts/profile/view.sh <file> # explicit path
# scripts/profile/view.sh cprofile # newest .prof
# scripts/profile/view.sh memray # newest memray .bin
# scripts/profile/view.sh pyspy # newest .svg
# scripts/profile/view.sh pyinstrument # newest pyinstrument .html
#
# Memray viewer override:
# VIEW=flamegraph|table|tree|stats|summary (default: flamegraph)
# VIEW=leaks (render flamegraph with --leaks filter)
set -euo pipefail
DIR="${DIR:-profiles}"
VIEW="${VIEW:-flamegraph}"
if [[ ! -d "${DIR}" ]]; then
echo "No ${DIR}/ directory yet — run one of the profile scripts first." >&2
exit 1
fi
pick_newest() {
local pattern="$1"
find "${DIR}" -maxdepth 1 -type f -name "${pattern}" -printf '%T@ %p\n' 2>/dev/null \
| sort -n | tail -n 1 | cut -d' ' -f2-
}
TARGET=""
case "${1:-}" in
"") TARGET="$(pick_newest '*')" ;;
cprofile) TARGET="$(pick_newest '*.prof')" ;;
memray) TARGET="$(pick_newest 'memray-*.bin')" ;;
pyspy) TARGET="$(pick_newest 'pyspy-*.svg')" ;;
pyinstrument) TARGET="$(find "${DIR}" -maxdepth 1 -type f -name '*.html' \
! -name 'memray-*' -printf '%T@ %p\n' 2>/dev/null \
| sort -n | tail -n 1 | cut -d' ' -f2-)" ;;
*) TARGET="$1" ;;
esac
if [[ -z "${TARGET}" || ! -f "${TARGET}" ]]; then
echo "No matching profile artifact found." >&2
exit 1
fi
echo "Opening ${TARGET}"
case "${TARGET}" in
*.prof)
exec snakeviz "${TARGET}"
;;
*memray*.bin|*.bin)
case "${VIEW}" in
leaks) exec memray flamegraph --leaks -f "${TARGET}" ;;
flamegraph|table) exec memray "${VIEW}" -f "${TARGET}" ;;
tree|stats|summary) exec memray "${VIEW}" "${TARGET}" ;;
*) echo "Unknown VIEW=${VIEW}" >&2; exit 1 ;;
esac
;;
*.svg|*.html)
exec xdg-open "${TARGET}"
;;
*)
echo "Don't know how to view ${TARGET}" >&2
exit 1
;;
esac