added: scripts/profile/classify_usage.py — classify memray usage_over_time.csv

Reads the memray usage CSV and emits a verdict based on tail-drop-from- peak: CLIMB-AND-DROP, MOSTLY-RELEASED, or SUSTAINED-AT-PEAK. Deliberately ignores net-growth-vs-baseline since any active workload grows vs. a cold interpreter — that metric is misleading as a leak signal.
2026-04-17 13:54:37 -04:00
parent bf4afac70f
commit c29ca977fd
1 changed files with 105 additions and 0 deletions
--- a/scripts/profile/classify_usage.py
+++ b/scripts/profile/classify_usage.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+"""
+Classify the shape of a memray usage_over_time.csv as plateau, climb,
+or climb-and-drop. Operates on the `memory_size_bytes` column.
+
+Usage:
+    scripts/profile/classify_usage.py profiles/usage_over_time.csv
+    scripts/profile/classify_usage.py                  # newest *.csv in ./profiles/
+"""
+from __future__ import annotations
+
+import csv
+import statistics
+import sys
+from pathlib import Path
+
+
+def _mb(n: float) -> str:
+    return f"{n / (1024 * 1024):.1f} MB"
+
+
+def load(path: Path) -> list[tuple[int, int]]:
+    with path.open() as f:
+        rows = list(csv.DictReader(f))
+    out: list[tuple[int, int]] = []
+    for r in rows:
+        try:
+            out.append((int(r["timestamp"]), int(r["memory_size_bytes"])))
+        except (KeyError, ValueError):
+            continue
+    if not out:
+        sys.exit(f"no usable rows in {path}")
+    out.sort(key=lambda t: t[0])
+    return out
+
+
+def classify(series: list[tuple[int, int]]) -> None:
+    mem = [v for _, v in series]
+    n = len(mem)
+    peak = max(mem)
+    peak_idx = mem.index(peak)
+
+    # Pre-peak baseline = first 10% of samples.
+    baseline = statistics.median(mem[: max(1, n // 10)])
+
+    # Plateau = last 10% of samples (what we settle to).
+    plateau = statistics.median(mem[-max(1, n // 10) :])
+
+    # "Tail drop" — how much we released after the peak.
+    tail_drop = peak - plateau
+    tail_drop_pct = (tail_drop / peak * 100) if peak else 0.0
+
+    # "Growth during run" — end vs beginning.
+    net_growth = plateau - baseline
+    net_growth_pct = (net_growth / baseline * 100) if baseline else 0.0
+
+    # Where is the peak in the timeline?
+    peak_position = peak_idx / (n - 1) if n > 1 else 0.0
+
+    print(f"samples: {n}")
+    print(f"baseline (first 10%): {_mb(baseline)}")
+    print(f"peak:                 {_mb(peak)}  at {peak_position:.0%} of run")
+    print(f"plateau (last 10%):   {_mb(plateau)}")
+    print(f"tail drop:            {_mb(tail_drop)}  ({tail_drop_pct:+.1f}% vs peak)")
+    print(f"net growth:           {_mb(net_growth)}  ({net_growth_pct:+.1f}% vs baseline)")
+    print()
+
+    # Heuristic: the only reliable leak signal without a post-load rest
+    # period is how much memory was released AFTER the peak. Net-growth-vs-
+    # cold-start is not useful — an active workload always grows vs. a cold
+    # interpreter.
+    #
+    # Caveat: if the workload was still running when memray stopped,
+    # "sustained-at-peak" is inconclusive (not necessarily a leak). Re-run
+    # with a rest period after the scan for a definitive answer.
+    if tail_drop_pct >= 10:
+        print("verdict: CLIMB-AND-DROP — memory released after peak.")
+        print("         → no leak. Profile CPU next (pyinstrument).")
+    elif tail_drop_pct >= 3:
+        print("verdict: MOSTLY-RELEASED — partial release after peak.")
+        print("         → likely healthy; re-run with a rest period after load")
+        print("           to confirm (memray should capture post-workload idle).")
+    else:
+        print("verdict: SUSTAINED-AT-PEAK — memory held near peak at end of capture.")
+        print("         → AMBIGUOUS: could be a leak, or the workload was still")
+        print("           running when memray stopped. Re-run with a rest period")
+        print("           after load, then check: memray flamegraph --leaks <bin>")
+
+
+def main() -> None:
+    if len(sys.argv) > 1:
+        target = Path(sys.argv[1])
+    else:
+        profiles = Path("profiles")
+        csvs = sorted(profiles.glob("*.csv"), key=lambda p: p.stat().st_mtime)
+        if not csvs:
+            sys.exit("no CSV found; pass a path or put one in ./profiles/")
+        target = csvs[-1]
+
+    print(f"analyzing {target}\n")
+    classify(load(target))
+
+
+if __name__ == "__main__":
+    main()