merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
192
scripts/profile/aggregate_requests.py
Executable file
192
scripts/profile/aggregate_requests.py
Executable file
@@ -0,0 +1,192 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Aggregate pyinstrument request profiles from ./profiles/*.html.
|
||||
|
||||
The PyinstrumentMiddleware writes one HTML per request. After a Locust run
|
||||
there are hundreds of them — reading one by one is useless. This rolls
|
||||
everything up into two views:
|
||||
|
||||
1. Per-endpoint summary (count, mean/p50/p95/max wall-time)
|
||||
2. Top hot functions by cumulative self-time across ALL requests
|
||||
|
||||
Usage:
|
||||
scripts/profile/aggregate_requests.py # ./profiles/
|
||||
scripts/profile/aggregate_requests.py --dir PATH
|
||||
scripts/profile/aggregate_requests.py --top 30 # show top 30 funcs
|
||||
scripts/profile/aggregate_requests.py --endpoint login # filter
|
||||
|
||||
Self-time of a frame = frame.time - sum(child.time) — i.e. time spent
|
||||
executing the function's own code, excluding descendants. That's the
|
||||
right signal for "where is the CPU actually going".
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import statistics
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
_FILENAME_RE = re.compile(r"^(?P<ts>\d+)-(?P<method>[A-Z]+)-(?P<slug>.+)\.html$")
|
||||
_SESSION_RE = re.compile(r"const sessionData = (\{.*?\});\s*\n\s*pyinstrumentHTMLRenderer", re.DOTALL)
|
||||
|
||||
|
||||
def load_session(path: Path) -> tuple[dict, dict] | None:
|
||||
"""Return (session_summary, frame_tree_root) or None."""
|
||||
try:
|
||||
text = path.read_text()
|
||||
except OSError:
|
||||
return None
|
||||
m = _SESSION_RE.search(text)
|
||||
if not m:
|
||||
return None
|
||||
try:
|
||||
payload = json.loads(m.group(1))
|
||||
return payload["session"], payload["frame_tree"]
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
return None
|
||||
|
||||
|
||||
_SYNTHETIC = {"[self]", "[await]"}
|
||||
|
||||
|
||||
def _is_synthetic(identifier: str) -> bool:
|
||||
"""Pyinstrument leaf markers: `[self]` / `[await]` carry no file/line."""
|
||||
return identifier in _SYNTHETIC or identifier.startswith(("[self]", "[await]"))
|
||||
|
||||
|
||||
def walk_self_time(frame: dict | None, acc: dict[str, float], parent_ident: str | None = None) -> None:
|
||||
"""
|
||||
Accumulate self-time by frame identifier.
|
||||
|
||||
Pyinstrument attaches `[self]` / `[await]` synthetic leaves for non-sampled
|
||||
execution time. Rolling them into their parent ("self-time of X" vs. a
|
||||
global `[self]` bucket) is what gives us actionable per-function hotspots.
|
||||
"""
|
||||
if not frame:
|
||||
return
|
||||
ident = frame.get("identifier")
|
||||
if not ident:
|
||||
return
|
||||
total = frame.get("time", 0.0)
|
||||
children = frame.get("children") or []
|
||||
child_total = sum(c.get("time", 0.0) for c in children)
|
||||
self_time = total - child_total
|
||||
|
||||
if _is_synthetic(ident):
|
||||
# Reattribute synthetic self-time to the enclosing real function.
|
||||
key = parent_ident if parent_ident else ident
|
||||
acc[key] = acc.get(key, 0.0) + total
|
||||
return
|
||||
|
||||
if self_time > 0:
|
||||
acc[ident] = acc.get(ident, 0.0) + self_time
|
||||
for c in children:
|
||||
walk_self_time(c, acc, parent_ident=ident)
|
||||
|
||||
|
||||
def short_ident(identifier: str) -> str:
|
||||
"""`func\\x00/abs/path.py\\x00LINE` -> `func path.py:LINE`."""
|
||||
parts = identifier.split("\x00")
|
||||
if len(parts) == 3:
|
||||
func, path, line = parts
|
||||
return f"{func:30s} {Path(path).name}:{line}"
|
||||
return identifier[:80]
|
||||
|
||||
|
||||
def percentile(values: list[float], p: float) -> float:
|
||||
if not values:
|
||||
return 0.0
|
||||
values = sorted(values)
|
||||
k = (len(values) - 1) * p
|
||||
lo, hi = int(k), min(int(k) + 1, len(values) - 1)
|
||||
if lo == hi:
|
||||
return values[lo]
|
||||
return values[lo] + (values[hi] - values[lo]) * (k - lo)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--dir", default="profiles")
|
||||
ap.add_argument("--top", type=int, default=20)
|
||||
ap.add_argument("--endpoint", default=None, help="substring filter on endpoint slug")
|
||||
args = ap.parse_args()
|
||||
|
||||
root = Path(args.dir)
|
||||
files = sorted(root.glob("*.html"))
|
||||
if not files:
|
||||
raise SystemExit(f"no HTMLs in {root}/")
|
||||
|
||||
per_endpoint: dict[str, list[float]] = defaultdict(list)
|
||||
global_self: dict[str, float] = {}
|
||||
per_endpoint_self: dict[str, dict[str, float]] = defaultdict(dict)
|
||||
parsed = 0
|
||||
skipped = 0
|
||||
|
||||
for f in files:
|
||||
m = _FILENAME_RE.match(f.name)
|
||||
if not m:
|
||||
skipped += 1
|
||||
continue
|
||||
endpoint = f"{m['method']} /{m['slug'].replace('_', '/')}"
|
||||
if args.endpoint and args.endpoint not in endpoint:
|
||||
continue
|
||||
|
||||
loaded = load_session(f)
|
||||
if not loaded:
|
||||
skipped += 1
|
||||
continue
|
||||
session, root_frame = loaded
|
||||
|
||||
duration = session.get("duration", 0.0)
|
||||
per_endpoint[endpoint].append(duration)
|
||||
|
||||
walk_self_time(root_frame, global_self)
|
||||
walk_self_time(root_frame, per_endpoint_self[endpoint])
|
||||
|
||||
parsed += 1
|
||||
|
||||
print(f"parsed: {parsed} skipped: {skipped} from {root}/\n")
|
||||
|
||||
print("=" * 100)
|
||||
print("PER-ENDPOINT WALL-TIME")
|
||||
print("=" * 100)
|
||||
print(f"{'endpoint':<55} {'n':>6} {'mean':>9} {'p50':>9} {'p95':>9} {'max':>9}")
|
||||
print("-" * 100)
|
||||
rows = sorted(per_endpoint.items(), key=lambda kv: -statistics.mean(kv[1]) * len(kv[1]))
|
||||
for ep, durations in rows:
|
||||
print(f"{ep[:55]:<55} {len(durations):>6} "
|
||||
f"{statistics.mean(durations)*1000:>8.1f}ms "
|
||||
f"{percentile(durations,0.50)*1000:>8.1f}ms "
|
||||
f"{percentile(durations,0.95)*1000:>8.1f}ms "
|
||||
f"{max(durations)*1000:>8.1f}ms")
|
||||
|
||||
print()
|
||||
print("=" * 100)
|
||||
print(f"TOP {args.top} HOT FUNCTIONS BY CUMULATIVE SELF-TIME (across {parsed} requests)")
|
||||
print("=" * 100)
|
||||
total_self = sum(global_self.values()) or 1.0
|
||||
top = sorted(global_self.items(), key=lambda kv: -kv[1])[: args.top]
|
||||
print(f"{'fn file:line':<70} {'self':>10} {'share':>8}")
|
||||
print("-" * 100)
|
||||
for ident, t in top:
|
||||
share = t / total_self * 100
|
||||
print(f"{short_ident(ident):<70} {t*1000:>8.1f}ms {share:>6.1f}%")
|
||||
|
||||
print()
|
||||
print("=" * 100)
|
||||
print("TOP 3 HOT FUNCTIONS PER ENDPOINT")
|
||||
print("=" * 100)
|
||||
for ep in sorted(per_endpoint_self, key=lambda e: -sum(per_endpoint_self[e].values())):
|
||||
acc = per_endpoint_self[ep]
|
||||
ep_total = sum(acc.values()) or 1.0
|
||||
print(f"\n{ep} ({len(per_endpoint[ep])} samples, {ep_total*1000:.0f}ms total self)")
|
||||
top3 = sorted(acc.items(), key=lambda kv: -kv[1])[:3]
|
||||
for ident, t in top3:
|
||||
print(f" {short_ident(ident):<70} {t*1000:>7.1f}ms ({t/ep_total*100:>4.1f}%)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
105
scripts/profile/classify_usage.py
Executable file
105
scripts/profile/classify_usage.py
Executable file
@@ -0,0 +1,105 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Classify the shape of a memray usage_over_time.csv as plateau, climb,
|
||||
or climb-and-drop. Operates on the `memory_size_bytes` column.
|
||||
|
||||
Usage:
|
||||
scripts/profile/classify_usage.py profiles/usage_over_time.csv
|
||||
scripts/profile/classify_usage.py # newest *.csv in ./profiles/
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import statistics
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _mb(n: float) -> str:
|
||||
return f"{n / (1024 * 1024):.1f} MB"
|
||||
|
||||
|
||||
def load(path: Path) -> list[tuple[int, int]]:
|
||||
with path.open() as f:
|
||||
rows = list(csv.DictReader(f))
|
||||
out: list[tuple[int, int]] = []
|
||||
for r in rows:
|
||||
try:
|
||||
out.append((int(r["timestamp"]), int(r["memory_size_bytes"])))
|
||||
except (KeyError, ValueError):
|
||||
continue
|
||||
if not out:
|
||||
sys.exit(f"no usable rows in {path}")
|
||||
out.sort(key=lambda t: t[0])
|
||||
return out
|
||||
|
||||
|
||||
def classify(series: list[tuple[int, int]]) -> None:
|
||||
mem = [v for _, v in series]
|
||||
n = len(mem)
|
||||
peak = max(mem)
|
||||
peak_idx = mem.index(peak)
|
||||
|
||||
# Pre-peak baseline = first 10% of samples.
|
||||
baseline = statistics.median(mem[: max(1, n // 10)])
|
||||
|
||||
# Plateau = last 10% of samples (what we settle to).
|
||||
plateau = statistics.median(mem[-max(1, n // 10) :])
|
||||
|
||||
# "Tail drop" — how much we released after the peak.
|
||||
tail_drop = peak - plateau
|
||||
tail_drop_pct = (tail_drop / peak * 100) if peak else 0.0
|
||||
|
||||
# "Growth during run" — end vs beginning.
|
||||
net_growth = plateau - baseline
|
||||
net_growth_pct = (net_growth / baseline * 100) if baseline else 0.0
|
||||
|
||||
# Where is the peak in the timeline?
|
||||
peak_position = peak_idx / (n - 1) if n > 1 else 0.0
|
||||
|
||||
print(f"samples: {n}")
|
||||
print(f"baseline (first 10%): {_mb(baseline)}")
|
||||
print(f"peak: {_mb(peak)} at {peak_position:.0%} of run")
|
||||
print(f"plateau (last 10%): {_mb(plateau)}")
|
||||
print(f"tail drop: {_mb(tail_drop)} ({tail_drop_pct:+.1f}% vs peak)")
|
||||
print(f"net growth: {_mb(net_growth)} ({net_growth_pct:+.1f}% vs baseline)")
|
||||
print()
|
||||
|
||||
# Heuristic: the only reliable leak signal without a post-load rest
|
||||
# period is how much memory was released AFTER the peak. Net-growth-vs-
|
||||
# cold-start is not useful — an active workload always grows vs. a cold
|
||||
# interpreter.
|
||||
#
|
||||
# Caveat: if the workload was still running when memray stopped,
|
||||
# "sustained-at-peak" is inconclusive (not necessarily a leak). Re-run
|
||||
# with a rest period after the scan for a definitive answer.
|
||||
if tail_drop_pct >= 10:
|
||||
print("verdict: CLIMB-AND-DROP — memory released after peak.")
|
||||
print(" → no leak. Profile CPU next (pyinstrument).")
|
||||
elif tail_drop_pct >= 3:
|
||||
print("verdict: MOSTLY-RELEASED — partial release after peak.")
|
||||
print(" → likely healthy; re-run with a rest period after load")
|
||||
print(" to confirm (memray should capture post-workload idle).")
|
||||
else:
|
||||
print("verdict: SUSTAINED-AT-PEAK — memory held near peak at end of capture.")
|
||||
print(" → AMBIGUOUS: could be a leak, or the workload was still")
|
||||
print(" running when memray stopped. Re-run with a rest period")
|
||||
print(" after load, then check: memray flamegraph --leaks <bin>")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if len(sys.argv) > 1:
|
||||
target = Path(sys.argv[1])
|
||||
else:
|
||||
profiles = Path("profiles")
|
||||
csvs = sorted(profiles.glob("*.csv"), key=lambda p: p.stat().st_mtime)
|
||||
if not csvs:
|
||||
sys.exit("no CSV found; pass a path or put one in ./profiles/")
|
||||
target = csvs[-1]
|
||||
|
||||
print(f"analyzing {target}\n")
|
||||
classify(load(target))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
17
scripts/profile/cprofile-cli.sh
Executable file
17
scripts/profile/cprofile-cli.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
# Run a `decnet` subcommand under cProfile and write a .prof file for snakeviz.
|
||||
# Usage: scripts/profile/cprofile-cli.sh services
|
||||
# scripts/profile/cprofile-cli.sh status
|
||||
set -euo pipefail
|
||||
|
||||
if [[ $# -lt 1 ]]; then
|
||||
echo "Usage: $0 <decnet-subcommand> [args...]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
OUT="${OUT:-profiles/cprofile-$(date +%s).prof}"
|
||||
mkdir -p "$(dirname "$OUT")"
|
||||
|
||||
python -m cProfile -o "${OUT}" -m decnet.cli "$@"
|
||||
echo "Wrote ${OUT}"
|
||||
echo "View with: snakeviz ${OUT}"
|
||||
16
scripts/profile/memray-api.sh
Executable file
16
scripts/profile/memray-api.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
# Run the DECNET API under memray to capture an allocation profile.
|
||||
# Stop with Ctrl-C; then render with `memray flamegraph <bin>`.
|
||||
set -euo pipefail
|
||||
|
||||
HOST="${DECNET_API_HOST:-127.0.0.1}"
|
||||
PORT="${DECNET_API_PORT:-8000}"
|
||||
OUT="${OUT:-profiles/memray-$(date +%s).bin}"
|
||||
mkdir -p "$(dirname "$OUT")"
|
||||
|
||||
echo "Starting uvicorn under memray -> ${OUT}"
|
||||
python -m memray run --trace-python-allocators --follow-fork \
|
||||
-o "${OUT}" -m uvicorn decnet.web.api:app \
|
||||
--host "${HOST}" --port "${PORT}" --log-level warning
|
||||
|
||||
echo "Render with: memray flamegraph ${OUT}"
|
||||
32
scripts/profile/pyspy-attach.sh
Executable file
32
scripts/profile/pyspy-attach.sh
Executable file
@@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env bash
|
||||
# Attach py-spy to the running DECNET uvicorn worker(s) and record a flamegraph.
|
||||
# Requires sudo on Linux because of kernel.yama.ptrace_scope=1 by default.
|
||||
set -euo pipefail
|
||||
|
||||
DURATION="${DURATION:-30}"
|
||||
OUT="${OUT:-profiles/pyspy-$(date +%s).svg}"
|
||||
mkdir -p "$(dirname "$OUT")"
|
||||
|
||||
PID="$(pgrep -f 'uvicorn decnet.web.api' | head -n 1 || true)"
|
||||
if [[ -z "${PID}" ]]; then
|
||||
echo "No uvicorn worker found. Start the API first (e.g. 'decnet deploy ...')." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
PY_VER="$(python -c 'import sys; print(f"{sys.version_info[0]}.{sys.version_info[1]}")')"
|
||||
if [[ "${PY_VER}" == "3.14" ]] || [[ "${PY_VER}" > "3.14" ]]; then
|
||||
cat >&2 <<EOF
|
||||
WARNING: py-spy 0.4.1 (latest on PyPI) does not yet support Python ${PY_VER}.
|
||||
Attaching will fail with "No python processes found in process <pid>".
|
||||
Use one of the other lenses for now:
|
||||
DECNET_PROFILE_REQUESTS=true # pyinstrument, per-request flamegraphs
|
||||
scripts/profile/memray-api.sh # memory allocation profiling
|
||||
scripts/profile/cprofile-cli.sh <cmd> # deterministic CLI profiling
|
||||
Track upstream: https://github.com/benfred/py-spy/releases
|
||||
EOF
|
||||
exit 2
|
||||
fi
|
||||
|
||||
echo "Attaching py-spy to PID ${PID} for ${DURATION}s -> ${OUT}"
|
||||
sudo .venv/bin/py-spy record -o "${OUT}" -p "${PID}" -d "${DURATION}" --subprocesses
|
||||
echo "Wrote ${OUT}"
|
||||
69
scripts/profile/view.sh
Executable file
69
scripts/profile/view.sh
Executable file
@@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env bash
|
||||
# Open the newest profile artifact in the right viewer.
|
||||
#
|
||||
# Usage:
|
||||
# scripts/profile/view.sh # newest file in ./profiles/
|
||||
# scripts/profile/view.sh <file> # explicit path
|
||||
# scripts/profile/view.sh cprofile # newest .prof
|
||||
# scripts/profile/view.sh memray # newest memray .bin
|
||||
# scripts/profile/view.sh pyspy # newest .svg
|
||||
# scripts/profile/view.sh pyinstrument # newest pyinstrument .html
|
||||
#
|
||||
# Memray viewer override:
|
||||
# VIEW=flamegraph|table|tree|stats|summary (default: flamegraph)
|
||||
# VIEW=leaks (render flamegraph with --leaks filter)
|
||||
set -euo pipefail
|
||||
|
||||
DIR="${DIR:-profiles}"
|
||||
VIEW="${VIEW:-flamegraph}"
|
||||
|
||||
if [[ ! -d "${DIR}" ]]; then
|
||||
echo "No ${DIR}/ directory yet — run one of the profile scripts first." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
pick_newest() {
|
||||
local pattern="$1"
|
||||
find "${DIR}" -maxdepth 1 -type f -name "${pattern}" -printf '%T@ %p\n' 2>/dev/null \
|
||||
| sort -n | tail -n 1 | cut -d' ' -f2-
|
||||
}
|
||||
|
||||
TARGET=""
|
||||
case "${1:-}" in
|
||||
"") TARGET="$(pick_newest '*')" ;;
|
||||
cprofile) TARGET="$(pick_newest '*.prof')" ;;
|
||||
memray) TARGET="$(pick_newest 'memray-*.bin')" ;;
|
||||
pyspy) TARGET="$(pick_newest 'pyspy-*.svg')" ;;
|
||||
pyinstrument) TARGET="$(find "${DIR}" -maxdepth 1 -type f -name '*.html' \
|
||||
! -name 'memray-*' -printf '%T@ %p\n' 2>/dev/null \
|
||||
| sort -n | tail -n 1 | cut -d' ' -f2-)" ;;
|
||||
*) TARGET="$1" ;;
|
||||
esac
|
||||
|
||||
if [[ -z "${TARGET}" || ! -f "${TARGET}" ]]; then
|
||||
echo "No matching profile artifact found." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Opening ${TARGET}"
|
||||
|
||||
case "${TARGET}" in
|
||||
*.prof)
|
||||
exec snakeviz "${TARGET}"
|
||||
;;
|
||||
*memray*.bin|*.bin)
|
||||
case "${VIEW}" in
|
||||
leaks) exec memray flamegraph --leaks -f "${TARGET}" ;;
|
||||
flamegraph|table) exec memray "${VIEW}" -f "${TARGET}" ;;
|
||||
tree|stats|summary) exec memray "${VIEW}" "${TARGET}" ;;
|
||||
*) echo "Unknown VIEW=${VIEW}" >&2; exit 1 ;;
|
||||
esac
|
||||
;;
|
||||
*.svg|*.html)
|
||||
exec xdg-open "${TARGET}"
|
||||
;;
|
||||
*)
|
||||
echo "Don't know how to view ${TARGET}" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
Reference in New Issue
Block a user