merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

44
scripts/bus/pub.py Executable file
View File

@@ -0,0 +1,44 @@
#!/usr/bin/env python3
"""Publish a single event to the local DECNET bus.
Usage: scripts/bus/pub.py <topic> [json-payload] [--type EVENT_TYPE]
Examples:
scripts/bus/pub.py topology.abc.status '{"state": "active"}'
scripts/bus/pub.py topology.abc.mutation.applied '{"id": 1}' --type applied
"""
from __future__ import annotations
import argparse
import asyncio
import json
import os
from decnet.bus.unix_client import UnixSocketBus
async def main(topic: str, payload: dict, event_type: str) -> None:
sock = os.environ.get("DECNET_BUS_SOCKET", "/tmp/decnet-bus.sock")
client = UnixSocketBus(sock, client_name="scripts-pub")
await client.connect()
try:
await client.publish(topic, payload, event_type=event_type)
print(f"pub: {topic} type={event_type!r} payload={payload}")
finally:
await client.close()
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument("topic")
ap.add_argument("payload", nargs="?", default="{}", help="JSON object (default {})")
ap.add_argument("--type", dest="event_type", default="", help="optional event_type tag")
args = ap.parse_args()
try:
payload = json.loads(args.payload)
except json.JSONDecodeError as exc:
raise SystemExit(f"pub: payload is not valid JSON: {exc}")
if not isinstance(payload, dict):
raise SystemExit("pub: payload must be a JSON object")
asyncio.run(main(args.topic, payload, args.event_type))

90
scripts/bus/smoke-mutator.sh Executable file
View File

@@ -0,0 +1,90 @@
#!/usr/bin/env bash
# Mutator-family topic smoke test: boots a bus worker, subscribes to
# `topology.>`, publishes one event per mutation-lifecycle state
# (enqueued → applying → applied) plus a topology.status transition,
# and verifies each lands on the subscriber.
#
# This is a cheap E2E for the topic hierarchy wired into the mutator
# and SSE route — the full DB + mutator + API loop is exercised by the
# pytest suite under tests/topology/ and tests/api/topology/.
#
# Usage: scripts/bus/smoke-mutator.sh
set -euo pipefail
SOCK="$(mktemp -u -t decnet-bus-mut-smoke.XXXXXX.sock)"
export DECNET_BUS_SOCKET="${SOCK}"
LOGDIR="$(mktemp -d -t decnet-bus-mut-smoke.XXXXXX)"
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TID="smoke-$(date +%s)"
cleanup() {
kill "${SUB_PID:-0}" 2>/dev/null || true
kill "${WORKER_PID:-0}" 2>/dev/null || true
wait 2>/dev/null || true
rm -f "${SOCK}"
rm -rf "${LOGDIR}"
}
trap cleanup EXIT
echo "smoke-mutator: socket=${SOCK} topology=${TID}"
decnet bus --socket "${SOCK}" --group "" --heartbeat 5 \
> "${LOGDIR}/worker.log" 2>&1 &
WORKER_PID=$!
for _ in {1..40}; do
[[ -S "${SOCK}" ]] && break
sleep 0.05
done
if [[ ! -S "${SOCK}" ]]; then
echo "smoke-mutator: FAIL — bus worker never created ${SOCK}" >&2
cat "${LOGDIR}/worker.log" >&2
exit 1
fi
python "${HERE}/sub.py" 'topology.>' > "${LOGDIR}/sub.log" 2>&1 &
SUB_PID=$!
sleep 0.3
publish() {
local topic="$1" payload="$2"
python "${HERE}/pub.py" "${topic}" "${payload}" >/dev/null
}
publish "topology.${TID}.mutation.enqueued" '{"mutation_id": "m1", "op": "add_lan"}'
publish "topology.${TID}.mutation.applying" '{"mutation_id": "m1", "op": "add_lan"}'
publish "topology.${TID}.mutation.applied" '{"mutation_id": "m1", "op": "add_lan"}'
publish "topology.${TID}.status" '{"state": "degraded", "reason": "smoke"}'
expected=(
"topology.${TID}.mutation.enqueued"
"topology.${TID}.mutation.applying"
"topology.${TID}.mutation.applied"
"topology.${TID}.status"
)
for _ in {1..60}; do
missing=0
for topic in "${expected[@]}"; do
if ! grep -q "${topic}" "${LOGDIR}/sub.log"; then
missing=1
break
fi
done
[[ "${missing}" -eq 0 ]] && break
sleep 0.05
done
for topic in "${expected[@]}"; do
if ! grep -q "${topic}" "${LOGDIR}/sub.log"; then
echo "smoke-mutator: FAIL — missing ${topic}" >&2
echo "--- worker.log ---" >&2; cat "${LOGDIR}/worker.log" >&2
echo "--- sub.log ---" >&2; cat "${LOGDIR}/sub.log" >&2
exit 1
fi
done
echo "smoke-mutator: OK — all 4 mutator-family events delivered"
grep -E 'mutation|status' "${LOGDIR}/sub.log" || true

57
scripts/bus/smoke.sh Executable file
View File

@@ -0,0 +1,57 @@
#!/usr/bin/env bash
# End-to-end bus smoke test: boots a worker, subscribes, publishes,
# verifies the event lands, tears everything down. Exits non-zero if
# anything misbehaves.
#
# Usage: scripts/bus/smoke.sh
set -euo pipefail
SOCK="$(mktemp -u -t decnet-bus-smoke.XXXXXX.sock)"
export DECNET_BUS_SOCKET="${SOCK}"
LOGDIR="$(mktemp -d -t decnet-bus-smoke.XXXXXX)"
trap 'rm -f "${SOCK}"; rm -rf "${LOGDIR}"' EXIT
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
echo "smoke: socket=${SOCK}"
decnet bus --socket "${SOCK}" --group "" --heartbeat 1 \
> "${LOGDIR}/worker.log" 2>&1 &
WORKER_PID=$!
trap 'kill ${WORKER_PID} 2>/dev/null || true; wait ${WORKER_PID} 2>/dev/null || true; rm -f "${SOCK}"; rm -rf "${LOGDIR}"' EXIT
# Wait for the socket to exist.
for _ in {1..40}; do
[[ -S "${SOCK}" ]] && break
sleep 0.05
done
if [[ ! -S "${SOCK}" ]]; then
echo "smoke: FAIL — worker never created ${SOCK}" >&2
cat "${LOGDIR}/worker.log" >&2
exit 1
fi
# Subscriber in the background, redirected to a file we can tail.
python "${HERE}/sub.py" 'topology.>' > "${LOGDIR}/sub.log" 2>&1 &
SUB_PID=$!
trap 'kill ${SUB_PID} 2>/dev/null || true; kill ${WORKER_PID} 2>/dev/null || true; wait 2>/dev/null || true; rm -f "${SOCK}"; rm -rf "${LOGDIR}"' EXIT
# Give the SUB frame a tick to register.
sleep 0.3
python "${HERE}/pub.py" topology.abc.status '{"state": "active"}' >/dev/null
# Wait up to 2s for the event to show up.
for _ in {1..40}; do
if grep -q 'topology.abc.status' "${LOGDIR}/sub.log"; then
echo "smoke: OK — subscriber received event"
grep 'topology.abc.status' "${LOGDIR}/sub.log"
exit 0
fi
sleep 0.05
done
echo "smoke: FAIL — subscriber never saw the event" >&2
echo "--- worker.log ---" >&2; cat "${LOGDIR}/worker.log" >&2
echo "--- sub.log ---" >&2; cat "${LOGDIR}/sub.log" >&2
exit 1

11
scripts/bus/start.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
# Start a local `decnet bus` worker for manual smoke-testing.
# Uses /tmp so it works without root and without the `decnet` POSIX group.
# Usage: scripts/bus/start.sh [heartbeat-seconds]
set -euo pipefail
SOCK="${DECNET_BUS_SOCKET:-/tmp/decnet-bus.sock}"
HEARTBEAT="${1:-3}"
echo "bus: socket=${SOCK} heartbeat=${HEARTBEAT}s (Ctrl-C to stop)"
exec decnet bus --socket "${SOCK}" --group "" --heartbeat "${HEARTBEAT}"

38
scripts/bus/sub.py Executable file
View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python3
"""Subscribe to a pattern on the local DECNET bus and print events.
Usage: scripts/bus/sub.py 'topology.>'
scripts/bus/sub.py 'system.bus.health'
DECNET_BUS_SOCKET=/tmp/decnet-bus.sock scripts/bus/sub.py 'topology.*.status'
"""
from __future__ import annotations
import asyncio
import os
import sys
from decnet.bus.unix_client import UnixSocketBus
async def main(pattern: str) -> None:
sock = os.environ.get("DECNET_BUS_SOCKET", "/tmp/decnet-bus.sock")
client = UnixSocketBus(sock, client_name="scripts-sub")
await client.connect()
sub = client.subscribe(pattern)
print(f"sub: pattern={pattern!r} socket={sock} (Ctrl-C to stop)", flush=True)
try:
async with sub:
async for ev in sub:
print(f"{ev.topic} type={ev.type!r} payload={ev.payload}", flush=True)
finally:
await client.close()
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: sub.py <pattern>", file=sys.stderr)
sys.exit(2)
try:
asyncio.run(main(sys.argv[1]))
except KeyboardInterrupt:
pass

3
scripts/decnet-init.sh Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/bash
sudo .311/bin/decnet init --force --install-dir "$PWD" --user anti --group anti

201
scripts/mock-webhook-receiver.py Executable file
View File

@@ -0,0 +1,201 @@
#!/usr/bin/env python3
"""Mock webhook receiver for local DECNET testing.
Listens on a local port, accepts POSTs from the `decnet webhook`
worker (or the `/api/v1/webhooks/{uuid}/test` admin endpoint), and
pretty-prints each delivery with HMAC verification status.
Usage:
# Start a receiver on port 8765, skip HMAC verification (unverified badge)
scripts/mock-webhook-receiver.py
# Verify HMAC against a known secret — reads DECNET_MOCK_SECRET env or --secret
scripts/mock-webhook-receiver.py --secret deadbeefdeadbeef
# Bind a different port / host
scripts/mock-webhook-receiver.py --host 0.0.0.0 --port 9000
# Simulate SIEM downtime — return a failure status for every POST so the
# worker's retry/backoff path can be exercised end-to-end.
scripts/mock-webhook-receiver.py --fail 503
Once running, create a webhook in DECNET pointing at the URL printed on
startup (e.g. http://localhost:8765/). The receiver accepts any path
— it's a catch-all — so the URL path after the host is yours to pick.
Pure stdlib. No dependencies to install.
"""
from __future__ import annotations
import argparse
import hashlib
import hmac
import json
import os
import sys
from datetime import datetime
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
# ANSI colors — stripped when stdout isn't a TTY.
_ISATTY = sys.stdout.isatty()
def _c(code: str) -> str:
return code if _ISATTY else ""
RESET = _c("\033[0m")
DIM = _c("\033[2m")
BOLD = _c("\033[1m")
GREEN = _c("\033[32m")
RED = _c("\033[31m")
YELLOW = _c("\033[33m")
CYAN = _c("\033[36m")
MAGENTA = _c("\033[35m")
GRAY = _c("\033[90m")
def _verify_hmac(secret: str, body: bytes, sig_header: str) -> bool:
"""Return True iff the received signature matches our recomputed HMAC."""
if not sig_header.startswith("sha256="):
return False
received = sig_header[len("sha256="):]
expected = hmac.new(
secret.encode("utf-8"), body, hashlib.sha256
).hexdigest()
return hmac.compare_digest(received, expected)
class WebhookHandler(BaseHTTPRequestHandler):
# Class-level config injected by `main`.
secret: str | None = None
fail_status: int | None = None
# Silence the default noisy per-request log line — we print our own.
def log_message(self, format, *args): # noqa: A002,N802 — BaseHTTPRequestHandler API
return
def do_GET(self): # noqa: N802 — BaseHTTPRequestHandler API
"""Friendly health check so you can `curl http://localhost:8765/`."""
body = (
b"DECNET mock webhook receiver.\n"
b"POST to any path to test delivery.\n"
)
self.send_response(200)
self.send_header("Content-Type", "text/plain; charset=utf-8")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
def do_POST(self): # noqa: N802 — BaseHTTPRequestHandler API
length = int(self.headers.get("Content-Length") or 0)
raw_body = self.rfile.read(length) if length else b""
sig = self.headers.get("X-DECNET-Signature", "")
event_id = self.headers.get("X-DECNET-Event-Id", "")
topic = self.headers.get("X-DECNET-Event-Topic", "")
ts_hdr = self.headers.get("X-DECNET-Timestamp", "")
# Signature verification
if self.secret is None:
sig_badge = f"{YELLOW}UNVERIFIED{RESET}"
elif not sig:
sig_badge = f"{RED}NO SIGNATURE{RESET}"
elif _verify_hmac(self.secret, raw_body, sig):
sig_badge = f"{GREEN}HMAC OK{RESET}"
else:
sig_badge = f"{RED}HMAC MISMATCH{RESET}"
# Decode the body — print as JSON when possible, raw otherwise.
try:
payload = json.loads(raw_body.decode("utf-8") or "{}")
body_text = json.dumps(payload, indent=2, sort_keys=True)
except (ValueError, UnicodeDecodeError):
body_text = raw_body.decode("utf-8", errors="replace")
now = datetime.now().strftime("%H:%M:%S")
print(
f"{DIM}{now}{RESET} "
f"{BOLD}{MAGENTA}[POST {self.path}]{RESET} "
f"{sig_badge} "
f"{CYAN}topic={topic}{RESET} "
f"{GRAY}event_id={event_id}{RESET}"
f"{(' ' + GRAY + 'ts=' + ts_hdr + RESET) if ts_hdr else ''}",
flush=True,
)
for line in body_text.splitlines() or [""]:
print(f" {line}", flush=True)
print("", flush=True)
# Response — success by default; configurable for retry-path testing.
if self.fail_status is not None:
status = self.fail_status
reason = f"mock failure (--fail {self.fail_status})"
else:
status = 200
reason = "ok"
resp = json.dumps({"received": True, "reason": reason}).encode()
self.send_response(status)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(resp)))
self.end_headers()
self.wfile.write(resp)
def main() -> None:
ap = argparse.ArgumentParser(
description="Mock HTTP receiver for DECNET webhook testing.",
)
ap.add_argument("--host", default="127.0.0.1", help="Bind host (default: 127.0.0.1)")
ap.add_argument("--port", type=int, default=8765, help="Bind port (default: 8765)")
ap.add_argument(
"--secret",
default=os.environ.get("DECNET_MOCK_SECRET"),
help="Webhook secret — HMAC is verified against received body when provided. "
"Falls back to $DECNET_MOCK_SECRET. Omit to skip verification.",
)
ap.add_argument(
"--fail",
type=int,
metavar="STATUS",
help="Return this HTTP status for every POST instead of 200. "
"Useful for exercising the worker's retry backoff "
"(try --fail 503 or --fail 429).",
)
args = ap.parse_args()
WebhookHandler.secret = args.secret
WebhookHandler.fail_status = args.fail
verify_note = (
f"{GREEN}HMAC verification ENABLED{RESET}"
if args.secret
else f"{YELLOW}HMAC verification OFF (pass --secret to enable){RESET}"
)
fail_note = (
f"\n {RED}RESPONSE MODE: failing every request with {args.fail}{RESET}"
if args.fail is not None
else ""
)
url = f"http://{args.host}:{args.port}/"
banner = (
f"\n{BOLD}{CYAN}DECNET mock webhook receiver{RESET}\n"
f" listening on {BOLD}{url}{RESET}\n"
f" {verify_note}{fail_note}\n"
f" POST to any path; GET / for a health reply.\n"
f" Ctrl-C to stop.\n"
)
print(banner, flush=True)
server = ThreadingHTTPServer((args.host, args.port), WebhookHandler)
try:
server.serve_forever()
except KeyboardInterrupt:
print(f"\n{DIM}receiver stopped.{RESET}", flush=True)
server.server_close()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,192 @@
#!/usr/bin/env python3
"""
Aggregate pyinstrument request profiles from ./profiles/*.html.
The PyinstrumentMiddleware writes one HTML per request. After a Locust run
there are hundreds of them — reading one by one is useless. This rolls
everything up into two views:
1. Per-endpoint summary (count, mean/p50/p95/max wall-time)
2. Top hot functions by cumulative self-time across ALL requests
Usage:
scripts/profile/aggregate_requests.py # ./profiles/
scripts/profile/aggregate_requests.py --dir PATH
scripts/profile/aggregate_requests.py --top 30 # show top 30 funcs
scripts/profile/aggregate_requests.py --endpoint login # filter
Self-time of a frame = frame.time - sum(child.time) — i.e. time spent
executing the function's own code, excluding descendants. That's the
right signal for "where is the CPU actually going".
"""
from __future__ import annotations
import argparse
import json
import re
import statistics
from collections import defaultdict
from pathlib import Path
_FILENAME_RE = re.compile(r"^(?P<ts>\d+)-(?P<method>[A-Z]+)-(?P<slug>.+)\.html$")
_SESSION_RE = re.compile(r"const sessionData = (\{.*?\});\s*\n\s*pyinstrumentHTMLRenderer", re.DOTALL)
def load_session(path: Path) -> tuple[dict, dict] | None:
"""Return (session_summary, frame_tree_root) or None."""
try:
text = path.read_text()
except OSError:
return None
m = _SESSION_RE.search(text)
if not m:
return None
try:
payload = json.loads(m.group(1))
return payload["session"], payload["frame_tree"]
except (json.JSONDecodeError, KeyError):
return None
_SYNTHETIC = {"[self]", "[await]"}
def _is_synthetic(identifier: str) -> bool:
"""Pyinstrument leaf markers: `[self]` / `[await]` carry no file/line."""
return identifier in _SYNTHETIC or identifier.startswith(("[self]", "[await]"))
def walk_self_time(frame: dict | None, acc: dict[str, float], parent_ident: str | None = None) -> None:
"""
Accumulate self-time by frame identifier.
Pyinstrument attaches `[self]` / `[await]` synthetic leaves for non-sampled
execution time. Rolling them into their parent ("self-time of X" vs. a
global `[self]` bucket) is what gives us actionable per-function hotspots.
"""
if not frame:
return
ident = frame.get("identifier")
if not ident:
return
total = frame.get("time", 0.0)
children = frame.get("children") or []
child_total = sum(c.get("time", 0.0) for c in children)
self_time = total - child_total
if _is_synthetic(ident):
# Reattribute synthetic self-time to the enclosing real function.
key = parent_ident if parent_ident else ident
acc[key] = acc.get(key, 0.0) + total
return
if self_time > 0:
acc[ident] = acc.get(ident, 0.0) + self_time
for c in children:
walk_self_time(c, acc, parent_ident=ident)
def short_ident(identifier: str) -> str:
"""`func\\x00/abs/path.py\\x00LINE` -> `func path.py:LINE`."""
parts = identifier.split("\x00")
if len(parts) == 3:
func, path, line = parts
return f"{func:30s} {Path(path).name}:{line}"
return identifier[:80]
def percentile(values: list[float], p: float) -> float:
if not values:
return 0.0
values = sorted(values)
k = (len(values) - 1) * p
lo, hi = int(k), min(int(k) + 1, len(values) - 1)
if lo == hi:
return values[lo]
return values[lo] + (values[hi] - values[lo]) * (k - lo)
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--dir", default="profiles")
ap.add_argument("--top", type=int, default=20)
ap.add_argument("--endpoint", default=None, help="substring filter on endpoint slug")
args = ap.parse_args()
root = Path(args.dir)
files = sorted(root.glob("*.html"))
if not files:
raise SystemExit(f"no HTMLs in {root}/")
per_endpoint: dict[str, list[float]] = defaultdict(list)
global_self: dict[str, float] = {}
per_endpoint_self: dict[str, dict[str, float]] = defaultdict(dict)
parsed = 0
skipped = 0
for f in files:
m = _FILENAME_RE.match(f.name)
if not m:
skipped += 1
continue
endpoint = f"{m['method']} /{m['slug'].replace('_', '/')}"
if args.endpoint and args.endpoint not in endpoint:
continue
loaded = load_session(f)
if not loaded:
skipped += 1
continue
session, root_frame = loaded
duration = session.get("duration", 0.0)
per_endpoint[endpoint].append(duration)
walk_self_time(root_frame, global_self)
walk_self_time(root_frame, per_endpoint_self[endpoint])
parsed += 1
print(f"parsed: {parsed} skipped: {skipped} from {root}/\n")
print("=" * 100)
print("PER-ENDPOINT WALL-TIME")
print("=" * 100)
print(f"{'endpoint':<55} {'n':>6} {'mean':>9} {'p50':>9} {'p95':>9} {'max':>9}")
print("-" * 100)
rows = sorted(per_endpoint.items(), key=lambda kv: -statistics.mean(kv[1]) * len(kv[1]))
for ep, durations in rows:
print(f"{ep[:55]:<55} {len(durations):>6} "
f"{statistics.mean(durations)*1000:>8.1f}ms "
f"{percentile(durations,0.50)*1000:>8.1f}ms "
f"{percentile(durations,0.95)*1000:>8.1f}ms "
f"{max(durations)*1000:>8.1f}ms")
print()
print("=" * 100)
print(f"TOP {args.top} HOT FUNCTIONS BY CUMULATIVE SELF-TIME (across {parsed} requests)")
print("=" * 100)
total_self = sum(global_self.values()) or 1.0
top = sorted(global_self.items(), key=lambda kv: -kv[1])[: args.top]
print(f"{'fn file:line':<70} {'self':>10} {'share':>8}")
print("-" * 100)
for ident, t in top:
share = t / total_self * 100
print(f"{short_ident(ident):<70} {t*1000:>8.1f}ms {share:>6.1f}%")
print()
print("=" * 100)
print("TOP 3 HOT FUNCTIONS PER ENDPOINT")
print("=" * 100)
for ep in sorted(per_endpoint_self, key=lambda e: -sum(per_endpoint_self[e].values())):
acc = per_endpoint_self[ep]
ep_total = sum(acc.values()) or 1.0
print(f"\n{ep} ({len(per_endpoint[ep])} samples, {ep_total*1000:.0f}ms total self)")
top3 = sorted(acc.items(), key=lambda kv: -kv[1])[:3]
for ident, t in top3:
print(f" {short_ident(ident):<70} {t*1000:>7.1f}ms ({t/ep_total*100:>4.1f}%)")
if __name__ == "__main__":
main()

105
scripts/profile/classify_usage.py Executable file
View File

@@ -0,0 +1,105 @@
#!/usr/bin/env python3
"""
Classify the shape of a memray usage_over_time.csv as plateau, climb,
or climb-and-drop. Operates on the `memory_size_bytes` column.
Usage:
scripts/profile/classify_usage.py profiles/usage_over_time.csv
scripts/profile/classify_usage.py # newest *.csv in ./profiles/
"""
from __future__ import annotations
import csv
import statistics
import sys
from pathlib import Path
def _mb(n: float) -> str:
return f"{n / (1024 * 1024):.1f} MB"
def load(path: Path) -> list[tuple[int, int]]:
with path.open() as f:
rows = list(csv.DictReader(f))
out: list[tuple[int, int]] = []
for r in rows:
try:
out.append((int(r["timestamp"]), int(r["memory_size_bytes"])))
except (KeyError, ValueError):
continue
if not out:
sys.exit(f"no usable rows in {path}")
out.sort(key=lambda t: t[0])
return out
def classify(series: list[tuple[int, int]]) -> None:
mem = [v for _, v in series]
n = len(mem)
peak = max(mem)
peak_idx = mem.index(peak)
# Pre-peak baseline = first 10% of samples.
baseline = statistics.median(mem[: max(1, n // 10)])
# Plateau = last 10% of samples (what we settle to).
plateau = statistics.median(mem[-max(1, n // 10) :])
# "Tail drop" — how much we released after the peak.
tail_drop = peak - plateau
tail_drop_pct = (tail_drop / peak * 100) if peak else 0.0
# "Growth during run" — end vs beginning.
net_growth = plateau - baseline
net_growth_pct = (net_growth / baseline * 100) if baseline else 0.0
# Where is the peak in the timeline?
peak_position = peak_idx / (n - 1) if n > 1 else 0.0
print(f"samples: {n}")
print(f"baseline (first 10%): {_mb(baseline)}")
print(f"peak: {_mb(peak)} at {peak_position:.0%} of run")
print(f"plateau (last 10%): {_mb(plateau)}")
print(f"tail drop: {_mb(tail_drop)} ({tail_drop_pct:+.1f}% vs peak)")
print(f"net growth: {_mb(net_growth)} ({net_growth_pct:+.1f}% vs baseline)")
print()
# Heuristic: the only reliable leak signal without a post-load rest
# period is how much memory was released AFTER the peak. Net-growth-vs-
# cold-start is not useful — an active workload always grows vs. a cold
# interpreter.
#
# Caveat: if the workload was still running when memray stopped,
# "sustained-at-peak" is inconclusive (not necessarily a leak). Re-run
# with a rest period after the scan for a definitive answer.
if tail_drop_pct >= 10:
print("verdict: CLIMB-AND-DROP — memory released after peak.")
print(" → no leak. Profile CPU next (pyinstrument).")
elif tail_drop_pct >= 3:
print("verdict: MOSTLY-RELEASED — partial release after peak.")
print(" → likely healthy; re-run with a rest period after load")
print(" to confirm (memray should capture post-workload idle).")
else:
print("verdict: SUSTAINED-AT-PEAK — memory held near peak at end of capture.")
print(" → AMBIGUOUS: could be a leak, or the workload was still")
print(" running when memray stopped. Re-run with a rest period")
print(" after load, then check: memray flamegraph --leaks <bin>")
def main() -> None:
if len(sys.argv) > 1:
target = Path(sys.argv[1])
else:
profiles = Path("profiles")
csvs = sorted(profiles.glob("*.csv"), key=lambda p: p.stat().st_mtime)
if not csvs:
sys.exit("no CSV found; pass a path or put one in ./profiles/")
target = csvs[-1]
print(f"analyzing {target}\n")
classify(load(target))
if __name__ == "__main__":
main()

17
scripts/profile/cprofile-cli.sh Executable file
View File

@@ -0,0 +1,17 @@
#!/usr/bin/env bash
# Run a `decnet` subcommand under cProfile and write a .prof file for snakeviz.
# Usage: scripts/profile/cprofile-cli.sh services
# scripts/profile/cprofile-cli.sh status
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: $0 <decnet-subcommand> [args...]" >&2
exit 1
fi
OUT="${OUT:-profiles/cprofile-$(date +%s).prof}"
mkdir -p "$(dirname "$OUT")"
python -m cProfile -o "${OUT}" -m decnet.cli "$@"
echo "Wrote ${OUT}"
echo "View with: snakeviz ${OUT}"

16
scripts/profile/memray-api.sh Executable file
View File

@@ -0,0 +1,16 @@
#!/usr/bin/env bash
# Run the DECNET API under memray to capture an allocation profile.
# Stop with Ctrl-C; then render with `memray flamegraph <bin>`.
set -euo pipefail
HOST="${DECNET_API_HOST:-127.0.0.1}"
PORT="${DECNET_API_PORT:-8000}"
OUT="${OUT:-profiles/memray-$(date +%s).bin}"
mkdir -p "$(dirname "$OUT")"
echo "Starting uvicorn under memray -> ${OUT}"
python -m memray run --trace-python-allocators --follow-fork \
-o "${OUT}" -m uvicorn decnet.web.api:app \
--host "${HOST}" --port "${PORT}" --log-level warning
echo "Render with: memray flamegraph ${OUT}"

32
scripts/profile/pyspy-attach.sh Executable file
View File

@@ -0,0 +1,32 @@
#!/usr/bin/env bash
# Attach py-spy to the running DECNET uvicorn worker(s) and record a flamegraph.
# Requires sudo on Linux because of kernel.yama.ptrace_scope=1 by default.
set -euo pipefail
DURATION="${DURATION:-30}"
OUT="${OUT:-profiles/pyspy-$(date +%s).svg}"
mkdir -p "$(dirname "$OUT")"
PID="$(pgrep -f 'uvicorn decnet.web.api' | head -n 1 || true)"
if [[ -z "${PID}" ]]; then
echo "No uvicorn worker found. Start the API first (e.g. 'decnet deploy ...')." >&2
exit 1
fi
PY_VER="$(python -c 'import sys; print(f"{sys.version_info[0]}.{sys.version_info[1]}")')"
if [[ "${PY_VER}" == "3.14" ]] || [[ "${PY_VER}" > "3.14" ]]; then
cat >&2 <<EOF
WARNING: py-spy 0.4.1 (latest on PyPI) does not yet support Python ${PY_VER}.
Attaching will fail with "No python processes found in process <pid>".
Use one of the other lenses for now:
DECNET_PROFILE_REQUESTS=true # pyinstrument, per-request flamegraphs
scripts/profile/memray-api.sh # memory allocation profiling
scripts/profile/cprofile-cli.sh <cmd> # deterministic CLI profiling
Track upstream: https://github.com/benfred/py-spy/releases
EOF
exit 2
fi
echo "Attaching py-spy to PID ${PID} for ${DURATION}s -> ${OUT}"
sudo .venv/bin/py-spy record -o "${OUT}" -p "${PID}" -d "${DURATION}" --subprocesses
echo "Wrote ${OUT}"

69
scripts/profile/view.sh Executable file
View File

@@ -0,0 +1,69 @@
#!/usr/bin/env bash
# Open the newest profile artifact in the right viewer.
#
# Usage:
# scripts/profile/view.sh # newest file in ./profiles/
# scripts/profile/view.sh <file> # explicit path
# scripts/profile/view.sh cprofile # newest .prof
# scripts/profile/view.sh memray # newest memray .bin
# scripts/profile/view.sh pyspy # newest .svg
# scripts/profile/view.sh pyinstrument # newest pyinstrument .html
#
# Memray viewer override:
# VIEW=flamegraph|table|tree|stats|summary (default: flamegraph)
# VIEW=leaks (render flamegraph with --leaks filter)
set -euo pipefail
DIR="${DIR:-profiles}"
VIEW="${VIEW:-flamegraph}"
if [[ ! -d "${DIR}" ]]; then
echo "No ${DIR}/ directory yet — run one of the profile scripts first." >&2
exit 1
fi
pick_newest() {
local pattern="$1"
find "${DIR}" -maxdepth 1 -type f -name "${pattern}" -printf '%T@ %p\n' 2>/dev/null \
| sort -n | tail -n 1 | cut -d' ' -f2-
}
TARGET=""
case "${1:-}" in
"") TARGET="$(pick_newest '*')" ;;
cprofile) TARGET="$(pick_newest '*.prof')" ;;
memray) TARGET="$(pick_newest 'memray-*.bin')" ;;
pyspy) TARGET="$(pick_newest 'pyspy-*.svg')" ;;
pyinstrument) TARGET="$(find "${DIR}" -maxdepth 1 -type f -name '*.html' \
! -name 'memray-*' -printf '%T@ %p\n' 2>/dev/null \
| sort -n | tail -n 1 | cut -d' ' -f2-)" ;;
*) TARGET="$1" ;;
esac
if [[ -z "${TARGET}" || ! -f "${TARGET}" ]]; then
echo "No matching profile artifact found." >&2
exit 1
fi
echo "Opening ${TARGET}"
case "${TARGET}" in
*.prof)
exec snakeviz "${TARGET}"
;;
*memray*.bin|*.bin)
case "${VIEW}" in
leaks) exec memray flamegraph --leaks -f "${TARGET}" ;;
flamegraph|table) exec memray "${VIEW}" -f "${TARGET}" ;;
tree|stats|summary) exec memray "${VIEW}" "${TARGET}" ;;
*) echo "Unknown VIEW=${VIEW}" >&2; exit 1 ;;
esac
;;
*.svg|*.html)
exec xdg-open "${TARGET}"
;;
*)
echo "Don't know how to view ${TARGET}" >&2
exit 1
;;
esac

View File

@@ -0,0 +1,22 @@
"""Vulture whitelist — names that look unused but aren't.
Run via:
vulture decnet vulture_whitelist.py --min-confidence 80
Each entry suppresses a known false positive. Add a comment with the
file:line and the reason so future-you can revisit.
"""
# FastAPI auth dependencies — `Depends()` runs for the side effect
# (auth/RBAC enforcement) even when the injected value is unused inside
# the handler body. Vulture can't see that.
viewer # decnet/web/router/canary/api_tokens.py:176, 198, 284 — Depends(require_viewer)
admin # any handler with admin: dict = Depends(require_admin) where the body doesn't read it
user # any handler with user: dict = Depends(require_user) where the body doesn't read it
# IMAP stub — UID SEARCH vs sequence SEARCH is a real protocol
# differentiator, but in this honeypot stub UID == seq number (see the
# "UID == sequence number" comment at the top of the email fixtures), so
# the parameter is intentionally a no-op.
uid_mode # decnet/templates/imap/server.py:646