From e22d057e68355fd2f25cc1f54c16b9eb8ca69f97 Mon Sep 17 00:00:00 2001
From: anti <samuel@securejump.cl>
Date: Fri, 17 Apr 2026 14:48:59 -0400
Subject: [PATCH] =?UTF-8?q?added:=20scripts/profile/aggregate=5Frequests.p?=
 =?UTF-8?q?y=20=E2=80=94=20roll=20up=20pyinstrument=20request=20profiles?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Parses every HTML in profiles/, reattributes [self]/[await] synthetic
leaves to their parent function, and reports per-endpoint wall-time
(mean/p50/p95/max) plus top hot functions by cumulative self-time.

Makes post-locust profile dirs actually readable — otherwise they're
just a pile of hundred-plus HTML files.
---
 scripts/profile/aggregate_requests.py | 188 ++++++++++++++++++++++++++
 1 file changed, 188 insertions(+)
 create mode 100755 scripts/profile/aggregate_requests.py
diff --git a/scripts/profile/aggregate_requests.py b/scripts/profile/aggregate_requests.py
new file mode 100755
index 0000000..b636fcb
--- /dev/null
+++ b/scripts/profile/aggregate_requests.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+"""
+Aggregate pyinstrument request profiles from ./profiles/*.html.
+
+The PyinstrumentMiddleware writes one HTML per request. After a Locust run
+there are hundreds of them — reading one by one is useless. This rolls
+everything up into two views:
+
+    1. Per-endpoint summary (count, mean/p50/p95/max wall-time)
+    2. Top hot functions by cumulative self-time across ALL requests
+
+Usage:
+    scripts/profile/aggregate_requests.py               # ./profiles/
+    scripts/profile/aggregate_requests.py --dir PATH
+    scripts/profile/aggregate_requests.py --top 30      # show top 30 funcs
+    scripts/profile/aggregate_requests.py --endpoint login   # filter
+
+Self-time of a frame = frame.time - sum(child.time) — i.e. time spent
+executing the function's own code, excluding descendants. That's the
+right signal for "where is the CPU actually going".
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import statistics
+from collections import defaultdict
+from pathlib import Path
+
+
+_FILENAME_RE = re.compile(r"^(?P<ts>\d+)-(?P<method>[A-Z]+)-(?P<slug>.+)\.html$")
+_SESSION_RE = re.compile(r"const sessionData = (\{.*?\});\s*\n\s*pyinstrumentHTMLRenderer", re.DOTALL)
+
+
+def load_session(path: Path) -> tuple[dict, dict] | None:
+    """Return (session_summary, frame_tree_root) or None."""
+    try:
+        text = path.read_text()
+    except OSError:
+        return None
+    m = _SESSION_RE.search(text)
+    if not m:
+        return None
+    try:
+        payload = json.loads(m.group(1))
+        return payload["session"], payload["frame_tree"]
+    except (json.JSONDecodeError, KeyError):
+        return None
+
+
+_SYNTHETIC = {"[self]", "[await]"}
+
+
+def _is_synthetic(identifier: str) -> bool:
+    """Pyinstrument leaf markers: `[self]` / `[await]` carry no file/line."""
+    return identifier in _SYNTHETIC or identifier.startswith(("[self]", "[await]"))
+
+
+def walk_self_time(frame: dict, acc: dict[str, float], parent_ident: str | None = None) -> None:
+    """
+    Accumulate self-time by frame identifier.
+
+    Pyinstrument attaches `[self]` / `[await]` synthetic leaves for non-sampled
+    execution time. Rolling them into their parent ("self-time of X" vs. a
+    global `[self]` bucket) is what gives us actionable per-function hotspots.
+    """
+    ident = frame["identifier"]
+    total = frame.get("time", 0.0)
+    children = frame.get("children") or []
+    child_total = sum(c.get("time", 0.0) for c in children)
+    self_time = total - child_total
+
+    if _is_synthetic(ident):
+        # Reattribute synthetic self-time to the enclosing real function.
+        key = parent_ident if parent_ident else ident
+        acc[key] = acc.get(key, 0.0) + total
+        return
+
+    if self_time > 0:
+        acc[ident] = acc.get(ident, 0.0) + self_time
+    for c in children:
+        walk_self_time(c, acc, parent_ident=ident)
+
+
+def short_ident(identifier: str) -> str:
+    """`func\\x00/abs/path.py\\x00LINE` -> `func  path.py:LINE`."""
+    parts = identifier.split("\x00")
+    if len(parts) == 3:
+        func, path, line = parts
+        return f"{func:30s}  {Path(path).name}:{line}"
+    return identifier[:80]
+
+
+def percentile(values: list[float], p: float) -> float:
+    if not values:
+        return 0.0
+    values = sorted(values)
+    k = (len(values) - 1) * p
+    lo, hi = int(k), min(int(k) + 1, len(values) - 1)
+    if lo == hi:
+        return values[lo]
+    return values[lo] + (values[hi] - values[lo]) * (k - lo)
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--dir", default="profiles")
+    ap.add_argument("--top", type=int, default=20)
+    ap.add_argument("--endpoint", default=None, help="substring filter on endpoint slug")
+    args = ap.parse_args()
+
+    root = Path(args.dir)
+    files = sorted(root.glob("*.html"))
+    if not files:
+        raise SystemExit(f"no HTMLs in {root}/")
+
+    per_endpoint: dict[str, list[float]] = defaultdict(list)
+    global_self: dict[str, float] = {}
+    per_endpoint_self: dict[str, dict[str, float]] = defaultdict(dict)
+    parsed = 0
+    skipped = 0
+
+    for f in files:
+        m = _FILENAME_RE.match(f.name)
+        if not m:
+            skipped += 1
+            continue
+        endpoint = f"{m['method']} /{m['slug'].replace('_', '/')}"
+        if args.endpoint and args.endpoint not in endpoint:
+            continue
+
+        loaded = load_session(f)
+        if not loaded:
+            skipped += 1
+            continue
+        session, root_frame = loaded
+
+        duration = session.get("duration", 0.0)
+        per_endpoint[endpoint].append(duration)
+
+        walk_self_time(root_frame, global_self)
+        walk_self_time(root_frame, per_endpoint_self[endpoint])
+
+        parsed += 1
+
+    print(f"parsed: {parsed}  skipped: {skipped}  from {root}/\n")
+
+    print("=" * 100)
+    print("PER-ENDPOINT WALL-TIME")
+    print("=" * 100)
+    print(f"{'endpoint':<55} {'n':>6} {'mean':>9} {'p50':>9} {'p95':>9} {'max':>9}")
+    print("-" * 100)
+    rows = sorted(per_endpoint.items(), key=lambda kv: -statistics.mean(kv[1]) * len(kv[1]))
+    for ep, durations in rows:
+        print(f"{ep[:55]:<55} {len(durations):>6} "
+              f"{statistics.mean(durations)*1000:>8.1f}ms "
+              f"{percentile(durations,0.50)*1000:>8.1f}ms "
+              f"{percentile(durations,0.95)*1000:>8.1f}ms "
+              f"{max(durations)*1000:>8.1f}ms")
+
+    print()
+    print("=" * 100)
+    print(f"TOP {args.top} HOT FUNCTIONS BY CUMULATIVE SELF-TIME (across {parsed} requests)")
+    print("=" * 100)
+    total_self = sum(global_self.values()) or 1.0
+    top = sorted(global_self.items(), key=lambda kv: -kv[1])[: args.top]
+    print(f"{'fn  file:line':<70} {'self':>10} {'share':>8}")
+    print("-" * 100)
+    for ident, t in top:
+        share = t / total_self * 100
+        print(f"{short_ident(ident):<70} {t*1000:>8.1f}ms {share:>6.1f}%")
+
+    print()
+    print("=" * 100)
+    print("TOP 3 HOT FUNCTIONS PER ENDPOINT")
+    print("=" * 100)
+    for ep in sorted(per_endpoint_self, key=lambda e: -sum(per_endpoint_self[e].values())):
+        acc = per_endpoint_self[ep]
+        ep_total = sum(acc.values()) or 1.0
+        print(f"\n{ep}   ({len(per_endpoint[ep])} samples, {ep_total*1000:.0f}ms total self)")
+        top3 = sorted(acc.items(), key=lambda kv: -kv[1])[:3]
+        for ident, t in top3:
+            print(f"  {short_ident(ident):<70} {t*1000:>7.1f}ms  ({t/ep_total*100:>4.1f}%)")
+
+
+if __name__ == "__main__":
+    main()