merge: testing → main (reconcile 2-week divergence)

2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions
--- a/scripts/profile/aggregate_requests.py
+++ b/scripts/profile/aggregate_requests.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+"""
+Aggregate pyinstrument request profiles from ./profiles/*.html.
+
+The PyinstrumentMiddleware writes one HTML per request. After a Locust run
+there are hundreds of them — reading one by one is useless. This rolls
+everything up into two views:
+
+    1. Per-endpoint summary (count, mean/p50/p95/max wall-time)
+    2. Top hot functions by cumulative self-time across ALL requests
+
+Usage:
+    scripts/profile/aggregate_requests.py               # ./profiles/
+    scripts/profile/aggregate_requests.py --dir PATH
+    scripts/profile/aggregate_requests.py --top 30      # show top 30 funcs
+    scripts/profile/aggregate_requests.py --endpoint login   # filter
+
+Self-time of a frame = frame.time - sum(child.time) — i.e. time spent
+executing the function's own code, excluding descendants. That's the
+right signal for "where is the CPU actually going".
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import statistics
+from collections import defaultdict
+from pathlib import Path
+
+
+_FILENAME_RE = re.compile(r"^(?P<ts>\d+)-(?P<method>[A-Z]+)-(?P<slug>.+)\.html$")
+_SESSION_RE = re.compile(r"const sessionData = (\{.*?\});\s*\n\s*pyinstrumentHTMLRenderer", re.DOTALL)
+
+
+def load_session(path: Path) -> tuple[dict, dict] | None:
+    """Return (session_summary, frame_tree_root) or None."""
+    try:
+        text = path.read_text()
+    except OSError:
+        return None
+    m = _SESSION_RE.search(text)
+    if not m:
+        return None
+    try:
+        payload = json.loads(m.group(1))
+        return payload["session"], payload["frame_tree"]
+    except (json.JSONDecodeError, KeyError):
+        return None
+
+
+_SYNTHETIC = {"[self]", "[await]"}
+
+
+def _is_synthetic(identifier: str) -> bool:
+    """Pyinstrument leaf markers: `[self]` / `[await]` carry no file/line."""
+    return identifier in _SYNTHETIC or identifier.startswith(("[self]", "[await]"))
+
+
+def walk_self_time(frame: dict | None, acc: dict[str, float], parent_ident: str | None = None) -> None:
+    """
+    Accumulate self-time by frame identifier.
+
+    Pyinstrument attaches `[self]` / `[await]` synthetic leaves for non-sampled
+    execution time. Rolling them into their parent ("self-time of X" vs. a
+    global `[self]` bucket) is what gives us actionable per-function hotspots.
+    """
+    if not frame:
+        return
+    ident = frame.get("identifier")
+    if not ident:
+        return
+    total = frame.get("time", 0.0)
+    children = frame.get("children") or []
+    child_total = sum(c.get("time", 0.0) for c in children)
+    self_time = total - child_total
+
+    if _is_synthetic(ident):
+        # Reattribute synthetic self-time to the enclosing real function.
+        key = parent_ident if parent_ident else ident
+        acc[key] = acc.get(key, 0.0) + total
+        return
+
+    if self_time > 0:
+        acc[ident] = acc.get(ident, 0.0) + self_time
+    for c in children:
+        walk_self_time(c, acc, parent_ident=ident)
+
+
+def short_ident(identifier: str) -> str:
+    """`func\\x00/abs/path.py\\x00LINE` -> `func  path.py:LINE`."""
+    parts = identifier.split("\x00")
+    if len(parts) == 3:
+        func, path, line = parts
+        return f"{func:30s}  {Path(path).name}:{line}"
+    return identifier[:80]
+
+
+def percentile(values: list[float], p: float) -> float:
+    if not values:
+        return 0.0
+    values = sorted(values)
+    k = (len(values) - 1) * p
+    lo, hi = int(k), min(int(k) + 1, len(values) - 1)
+    if lo == hi:
+        return values[lo]
+    return values[lo] + (values[hi] - values[lo]) * (k - lo)
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--dir", default="profiles")
+    ap.add_argument("--top", type=int, default=20)
+    ap.add_argument("--endpoint", default=None, help="substring filter on endpoint slug")
+    args = ap.parse_args()
+
+    root = Path(args.dir)
+    files = sorted(root.glob("*.html"))
+    if not files:
+        raise SystemExit(f"no HTMLs in {root}/")
+
+    per_endpoint: dict[str, list[float]] = defaultdict(list)
+    global_self: dict[str, float] = {}
+    per_endpoint_self: dict[str, dict[str, float]] = defaultdict(dict)
+    parsed = 0
+    skipped = 0
+
+    for f in files:
+        m = _FILENAME_RE.match(f.name)
+        if not m:
+            skipped += 1
+            continue
+        endpoint = f"{m['method']} /{m['slug'].replace('_', '/')}"
+        if args.endpoint and args.endpoint not in endpoint:
+            continue
+
+        loaded = load_session(f)
+        if not loaded:
+            skipped += 1
+            continue
+        session, root_frame = loaded
+
+        duration = session.get("duration", 0.0)
+        per_endpoint[endpoint].append(duration)
+
+        walk_self_time(root_frame, global_self)
+        walk_self_time(root_frame, per_endpoint_self[endpoint])
+
+        parsed += 1
+
+    print(f"parsed: {parsed}  skipped: {skipped}  from {root}/\n")
+
+    print("=" * 100)
+    print("PER-ENDPOINT WALL-TIME")
+    print("=" * 100)
+    print(f"{'endpoint':<55} {'n':>6} {'mean':>9} {'p50':>9} {'p95':>9} {'max':>9}")
+    print("-" * 100)
+    rows = sorted(per_endpoint.items(), key=lambda kv: -statistics.mean(kv[1]) * len(kv[1]))
+    for ep, durations in rows:
+        print(f"{ep[:55]:<55} {len(durations):>6} "
+              f"{statistics.mean(durations)*1000:>8.1f}ms "
+              f"{percentile(durations,0.50)*1000:>8.1f}ms "
+              f"{percentile(durations,0.95)*1000:>8.1f}ms "
+              f"{max(durations)*1000:>8.1f}ms")
+
+    print()
+    print("=" * 100)
+    print(f"TOP {args.top} HOT FUNCTIONS BY CUMULATIVE SELF-TIME (across {parsed} requests)")
+    print("=" * 100)
+    total_self = sum(global_self.values()) or 1.0
+    top = sorted(global_self.items(), key=lambda kv: -kv[1])[: args.top]
+    print(f"{'fn  file:line':<70} {'self':>10} {'share':>8}")
+    print("-" * 100)
+    for ident, t in top:
+        share = t / total_self * 100
+        print(f"{short_ident(ident):<70} {t*1000:>8.1f}ms {share:>6.1f}%")
+
+    print()
+    print("=" * 100)
+    print("TOP 3 HOT FUNCTIONS PER ENDPOINT")
+    print("=" * 100)
+    for ep in sorted(per_endpoint_self, key=lambda e: -sum(per_endpoint_self[e].values())):
+        acc = per_endpoint_self[ep]
+        ep_total = sum(acc.values()) or 1.0
+        print(f"\n{ep}   ({len(per_endpoint[ep])} samples, {ep_total*1000:.0f}ms total self)")
+        top3 = sorted(acc.items(), key=lambda kv: -kv[1])[:3]
+        for ident, t in top3:
+            print(f"  {short_ident(ident):<70} {t*1000:>7.1f}ms  ({t/ep_total*100:>4.1f}%)")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/profile/classify_usage.py
+++ b/scripts/profile/classify_usage.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+"""
+Classify the shape of a memray usage_over_time.csv as plateau, climb,
+or climb-and-drop. Operates on the `memory_size_bytes` column.
+
+Usage:
+    scripts/profile/classify_usage.py profiles/usage_over_time.csv
+    scripts/profile/classify_usage.py                  # newest *.csv in ./profiles/
+"""
+from __future__ import annotations
+
+import csv
+import statistics
+import sys
+from pathlib import Path
+
+
+def _mb(n: float) -> str:
+    return f"{n / (1024 * 1024):.1f} MB"
+
+
+def load(path: Path) -> list[tuple[int, int]]:
+    with path.open() as f:
+        rows = list(csv.DictReader(f))
+    out: list[tuple[int, int]] = []
+    for r in rows:
+        try:
+            out.append((int(r["timestamp"]), int(r["memory_size_bytes"])))
+        except (KeyError, ValueError):
+            continue
+    if not out:
+        sys.exit(f"no usable rows in {path}")
+    out.sort(key=lambda t: t[0])
+    return out
+
+
+def classify(series: list[tuple[int, int]]) -> None:
+    mem = [v for _, v in series]
+    n = len(mem)
+    peak = max(mem)
+    peak_idx = mem.index(peak)
+
+    # Pre-peak baseline = first 10% of samples.
+    baseline = statistics.median(mem[: max(1, n // 10)])
+
+    # Plateau = last 10% of samples (what we settle to).
+    plateau = statistics.median(mem[-max(1, n // 10) :])
+
+    # "Tail drop" — how much we released after the peak.
+    tail_drop = peak - plateau
+    tail_drop_pct = (tail_drop / peak * 100) if peak else 0.0
+
+    # "Growth during run" — end vs beginning.
+    net_growth = plateau - baseline
+    net_growth_pct = (net_growth / baseline * 100) if baseline else 0.0
+
+    # Where is the peak in the timeline?
+    peak_position = peak_idx / (n - 1) if n > 1 else 0.0
+
+    print(f"samples: {n}")
+    print(f"baseline (first 10%): {_mb(baseline)}")
+    print(f"peak:                 {_mb(peak)}  at {peak_position:.0%} of run")
+    print(f"plateau (last 10%):   {_mb(plateau)}")
+    print(f"tail drop:            {_mb(tail_drop)}  ({tail_drop_pct:+.1f}% vs peak)")
+    print(f"net growth:           {_mb(net_growth)}  ({net_growth_pct:+.1f}% vs baseline)")
+    print()
+
+    # Heuristic: the only reliable leak signal without a post-load rest
+    # period is how much memory was released AFTER the peak. Net-growth-vs-
+    # cold-start is not useful — an active workload always grows vs. a cold
+    # interpreter.
+    #
+    # Caveat: if the workload was still running when memray stopped,
+    # "sustained-at-peak" is inconclusive (not necessarily a leak). Re-run
+    # with a rest period after the scan for a definitive answer.
+    if tail_drop_pct >= 10:
+        print("verdict: CLIMB-AND-DROP — memory released after peak.")
+        print("         → no leak. Profile CPU next (pyinstrument).")
+    elif tail_drop_pct >= 3:
+        print("verdict: MOSTLY-RELEASED — partial release after peak.")
+        print("         → likely healthy; re-run with a rest period after load")
+        print("           to confirm (memray should capture post-workload idle).")
+    else:
+        print("verdict: SUSTAINED-AT-PEAK — memory held near peak at end of capture.")
+        print("         → AMBIGUOUS: could be a leak, or the workload was still")
+        print("           running when memray stopped. Re-run with a rest period")
+        print("           after load, then check: memray flamegraph --leaks <bin>")
+
+
+def main() -> None:
+    if len(sys.argv) > 1:
+        target = Path(sys.argv[1])
+    else:
+        profiles = Path("profiles")
+        csvs = sorted(profiles.glob("*.csv"), key=lambda p: p.stat().st_mtime)
+        if not csvs:
+            sys.exit("no CSV found; pass a path or put one in ./profiles/")
+        target = csvs[-1]
+
+    print(f"analyzing {target}\n")
+    classify(load(target))
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/profile/cprofile-cli.sh
+++ b/scripts/profile/cprofile-cli.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+# Run a `decnet` subcommand under cProfile and write a .prof file for snakeviz.
+# Usage: scripts/profile/cprofile-cli.sh services
+#        scripts/profile/cprofile-cli.sh status
+set -euo pipefail
+
+if [[ $# -lt 1 ]]; then
+    echo "Usage: $0 <decnet-subcommand> [args...]" >&2
+    exit 1
+fi
+
+OUT="${OUT:-profiles/cprofile-$(date +%s).prof}"
+mkdir -p "$(dirname "$OUT")"
+
+python -m cProfile -o "${OUT}" -m decnet.cli "$@"
+echo "Wrote ${OUT}"
+echo "View with: snakeviz ${OUT}"
--- a/scripts/profile/memray-api.sh
+++ b/scripts/profile/memray-api.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+# Run the DECNET API under memray to capture an allocation profile.
+# Stop with Ctrl-C; then render with `memray flamegraph <bin>`.
+set -euo pipefail
+
+HOST="${DECNET_API_HOST:-127.0.0.1}"
+PORT="${DECNET_API_PORT:-8000}"
+OUT="${OUT:-profiles/memray-$(date +%s).bin}"
+mkdir -p "$(dirname "$OUT")"
+
+echo "Starting uvicorn under memray -> ${OUT}"
+python -m memray run --trace-python-allocators --follow-fork \
+    -o "${OUT}" -m uvicorn decnet.web.api:app \
+    --host "${HOST}" --port "${PORT}" --log-level warning
+
+echo "Render with: memray flamegraph ${OUT}"
--- a/scripts/profile/pyspy-attach.sh
+++ b/scripts/profile/pyspy-attach.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+# Attach py-spy to the running DECNET uvicorn worker(s) and record a flamegraph.
+# Requires sudo on Linux because of kernel.yama.ptrace_scope=1 by default.
+set -euo pipefail
+
+DURATION="${DURATION:-30}"
+OUT="${OUT:-profiles/pyspy-$(date +%s).svg}"
+mkdir -p "$(dirname "$OUT")"
+
+PID="$(pgrep -f 'uvicorn decnet.web.api' | head -n 1 || true)"
+if [[ -z "${PID}" ]]; then
+    echo "No uvicorn worker found. Start the API first (e.g. 'decnet deploy ...')." >&2
+    exit 1
+fi
+
+PY_VER="$(python -c 'import sys; print(f"{sys.version_info[0]}.{sys.version_info[1]}")')"
+if [[ "${PY_VER}" == "3.14" ]] || [[ "${PY_VER}" > "3.14" ]]; then
+    cat >&2 <<EOF
+WARNING: py-spy 0.4.1 (latest on PyPI) does not yet support Python ${PY_VER}.
+Attaching will fail with "No python processes found in process <pid>".
+Use one of the other lenses for now:
+    DECNET_PROFILE_REQUESTS=true   # pyinstrument, per-request flamegraphs
+    scripts/profile/memray-api.sh  # memory allocation profiling
+    scripts/profile/cprofile-cli.sh <cmd>  # deterministic CLI profiling
+Track upstream: https://github.com/benfred/py-spy/releases
+EOF
+    exit 2
+fi
+
+echo "Attaching py-spy to PID ${PID} for ${DURATION}s -> ${OUT}"
+sudo .venv/bin/py-spy record -o "${OUT}" -p "${PID}" -d "${DURATION}" --subprocesses
+echo "Wrote ${OUT}"
--- a/scripts/profile/view.sh
+++ b/scripts/profile/view.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+# Open the newest profile artifact in the right viewer.
+#
+# Usage:
+#   scripts/profile/view.sh                 # newest file in ./profiles/
+#   scripts/profile/view.sh <file>          # explicit path
+#   scripts/profile/view.sh cprofile        # newest .prof
+#   scripts/profile/view.sh memray          # newest memray .bin
+#   scripts/profile/view.sh pyspy           # newest .svg
+#   scripts/profile/view.sh pyinstrument    # newest pyinstrument .html
+#
+# Memray viewer override:
+#   VIEW=flamegraph|table|tree|stats|summary  (default: flamegraph)
+#   VIEW=leaks   (render flamegraph with --leaks filter)
+set -euo pipefail
+
+DIR="${DIR:-profiles}"
+VIEW="${VIEW:-flamegraph}"
+
+if [[ ! -d "${DIR}" ]]; then
+    echo "No ${DIR}/ directory yet — run one of the profile scripts first." >&2
+    exit 1
+fi
+
+pick_newest() {
+    local pattern="$1"
+    find "${DIR}" -maxdepth 1 -type f -name "${pattern}" -printf '%T@ %p\n' 2>/dev/null \
+        | sort -n | tail -n 1 | cut -d' ' -f2-
+}
+
+TARGET=""
+case "${1:-}" in
+    "")           TARGET="$(pick_newest '*')" ;;
+    cprofile)     TARGET="$(pick_newest '*.prof')" ;;
+    memray)       TARGET="$(pick_newest 'memray-*.bin')" ;;
+    pyspy)        TARGET="$(pick_newest 'pyspy-*.svg')" ;;
+    pyinstrument) TARGET="$(find "${DIR}" -maxdepth 1 -type f -name '*.html' \
+                       ! -name 'memray-*' -printf '%T@ %p\n' 2>/dev/null \
+                       | sort -n | tail -n 1 | cut -d' ' -f2-)" ;;
+    *)            TARGET="$1" ;;
+esac
+
+if [[ -z "${TARGET}" || ! -f "${TARGET}" ]]; then
+    echo "No matching profile artifact found." >&2
+    exit 1
+fi
+
+echo "Opening ${TARGET}"
+
+case "${TARGET}" in
+    *.prof)
+        exec snakeviz "${TARGET}"
+        ;;
+    *memray*.bin|*.bin)
+        case "${VIEW}" in
+            leaks)    exec memray flamegraph --leaks -f "${TARGET}" ;;
+            flamegraph|table) exec memray "${VIEW}" -f "${TARGET}" ;;
+            tree|stats|summary) exec memray "${VIEW}" "${TARGET}" ;;
+            *) echo "Unknown VIEW=${VIEW}" >&2; exit 1 ;;
+        esac
+        ;;
+    *.svg|*.html)
+        exec xdg-open "${TARGET}"
+        ;;
+    *)
+        echo "Don't know how to view ${TARGET}" >&2
+        exit 1
+        ;;
+esac