merge: testing → main (reconcile 2-week divergence)

2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions
--- a/scripts/bus/pub.py
+++ b/scripts/bus/pub.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+"""Publish a single event to the local DECNET bus.
+
+Usage: scripts/bus/pub.py <topic> [json-payload] [--type EVENT_TYPE]
+Examples:
+    scripts/bus/pub.py topology.abc.status '{"state": "active"}'
+    scripts/bus/pub.py topology.abc.mutation.applied '{"id": 1}' --type applied
+"""
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import os
+
+from decnet.bus.unix_client import UnixSocketBus
+
+
+async def main(topic: str, payload: dict, event_type: str) -> None:
+    sock = os.environ.get("DECNET_BUS_SOCKET", "/tmp/decnet-bus.sock")
+    client = UnixSocketBus(sock, client_name="scripts-pub")
+    await client.connect()
+    try:
+        await client.publish(topic, payload, event_type=event_type)
+        print(f"pub: {topic}  type={event_type!r}  payload={payload}")
+    finally:
+        await client.close()
+
+
+if __name__ == "__main__":
+    ap = argparse.ArgumentParser()
+    ap.add_argument("topic")
+    ap.add_argument("payload", nargs="?", default="{}", help="JSON object (default {})")
+    ap.add_argument("--type", dest="event_type", default="", help="optional event_type tag")
+    args = ap.parse_args()
+
+    try:
+        payload = json.loads(args.payload)
+    except json.JSONDecodeError as exc:
+        raise SystemExit(f"pub: payload is not valid JSON: {exc}")
+    if not isinstance(payload, dict):
+        raise SystemExit("pub: payload must be a JSON object")
+
+    asyncio.run(main(args.topic, payload, args.event_type))
--- a/scripts/bus/smoke-mutator.sh
+++ b/scripts/bus/smoke-mutator.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+# Mutator-family topic smoke test: boots a bus worker, subscribes to
+# `topology.>`, publishes one event per mutation-lifecycle state
+# (enqueued → applying → applied) plus a topology.status transition,
+# and verifies each lands on the subscriber.
+#
+# This is a cheap E2E for the topic hierarchy wired into the mutator
+# and SSE route — the full DB + mutator + API loop is exercised by the
+# pytest suite under tests/topology/ and tests/api/topology/.
+#
+# Usage: scripts/bus/smoke-mutator.sh
+set -euo pipefail
+
+SOCK="$(mktemp -u -t decnet-bus-mut-smoke.XXXXXX.sock)"
+export DECNET_BUS_SOCKET="${SOCK}"
+LOGDIR="$(mktemp -d -t decnet-bus-mut-smoke.XXXXXX)"
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TID="smoke-$(date +%s)"
+
+cleanup() {
+    kill "${SUB_PID:-0}" 2>/dev/null || true
+    kill "${WORKER_PID:-0}" 2>/dev/null || true
+    wait 2>/dev/null || true
+    rm -f "${SOCK}"
+    rm -rf "${LOGDIR}"
+}
+trap cleanup EXIT
+
+echo "smoke-mutator: socket=${SOCK} topology=${TID}"
+
+decnet bus --socket "${SOCK}" --group "" --heartbeat 5 \
+    > "${LOGDIR}/worker.log" 2>&1 &
+WORKER_PID=$!
+
+for _ in {1..40}; do
+    [[ -S "${SOCK}" ]] && break
+    sleep 0.05
+done
+if [[ ! -S "${SOCK}" ]]; then
+    echo "smoke-mutator: FAIL — bus worker never created ${SOCK}" >&2
+    cat "${LOGDIR}/worker.log" >&2
+    exit 1
+fi
+
+python "${HERE}/sub.py" 'topology.>' > "${LOGDIR}/sub.log" 2>&1 &
+SUB_PID=$!
+
+sleep 0.3
+
+publish() {
+    local topic="$1" payload="$2"
+    python "${HERE}/pub.py" "${topic}" "${payload}" >/dev/null
+}
+
+publish "topology.${TID}.mutation.enqueued"  '{"mutation_id": "m1", "op": "add_lan"}'
+publish "topology.${TID}.mutation.applying"  '{"mutation_id": "m1", "op": "add_lan"}'
+publish "topology.${TID}.mutation.applied"   '{"mutation_id": "m1", "op": "add_lan"}'
+publish "topology.${TID}.status"             '{"state": "degraded", "reason": "smoke"}'
+
+expected=(
+    "topology.${TID}.mutation.enqueued"
+    "topology.${TID}.mutation.applying"
+    "topology.${TID}.mutation.applied"
+    "topology.${TID}.status"
+)
+
+for _ in {1..60}; do
+    missing=0
+    for topic in "${expected[@]}"; do
+        if ! grep -q "${topic}" "${LOGDIR}/sub.log"; then
+            missing=1
+            break
+        fi
+    done
+    [[ "${missing}" -eq 0 ]] && break
+    sleep 0.05
+done
+
+for topic in "${expected[@]}"; do
+    if ! grep -q "${topic}" "${LOGDIR}/sub.log"; then
+        echo "smoke-mutator: FAIL — missing ${topic}" >&2
+        echo "--- worker.log ---" >&2; cat "${LOGDIR}/worker.log" >&2
+        echo "--- sub.log ---"    >&2; cat "${LOGDIR}/sub.log"    >&2
+        exit 1
+    fi
+done
+
+echo "smoke-mutator: OK — all 4 mutator-family events delivered"
+grep -E 'mutation|status' "${LOGDIR}/sub.log" || true
--- a/scripts/bus/smoke.sh
+++ b/scripts/bus/smoke.sh
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+# End-to-end bus smoke test: boots a worker, subscribes, publishes,
+# verifies the event lands, tears everything down. Exits non-zero if
+# anything misbehaves.
+#
+# Usage: scripts/bus/smoke.sh
+set -euo pipefail
+
+SOCK="$(mktemp -u -t decnet-bus-smoke.XXXXXX.sock)"
+export DECNET_BUS_SOCKET="${SOCK}"
+LOGDIR="$(mktemp -d -t decnet-bus-smoke.XXXXXX)"
+trap 'rm -f "${SOCK}"; rm -rf "${LOGDIR}"' EXIT
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+echo "smoke: socket=${SOCK}"
+
+decnet bus --socket "${SOCK}" --group "" --heartbeat 1 \
+    > "${LOGDIR}/worker.log" 2>&1 &
+WORKER_PID=$!
+trap 'kill ${WORKER_PID} 2>/dev/null || true; wait ${WORKER_PID} 2>/dev/null || true; rm -f "${SOCK}"; rm -rf "${LOGDIR}"' EXIT
+
+# Wait for the socket to exist.
+for _ in {1..40}; do
+    [[ -S "${SOCK}" ]] && break
+    sleep 0.05
+done
+if [[ ! -S "${SOCK}" ]]; then
+    echo "smoke: FAIL — worker never created ${SOCK}" >&2
+    cat "${LOGDIR}/worker.log" >&2
+    exit 1
+fi
+
+# Subscriber in the background, redirected to a file we can tail.
+python "${HERE}/sub.py" 'topology.>' > "${LOGDIR}/sub.log" 2>&1 &
+SUB_PID=$!
+trap 'kill ${SUB_PID} 2>/dev/null || true; kill ${WORKER_PID} 2>/dev/null || true; wait 2>/dev/null || true; rm -f "${SOCK}"; rm -rf "${LOGDIR}"' EXIT
+
+# Give the SUB frame a tick to register.
+sleep 0.3
+
+python "${HERE}/pub.py" topology.abc.status '{"state": "active"}' >/dev/null
+
+# Wait up to 2s for the event to show up.
+for _ in {1..40}; do
+    if grep -q 'topology.abc.status' "${LOGDIR}/sub.log"; then
+        echo "smoke: OK — subscriber received event"
+        grep 'topology.abc.status' "${LOGDIR}/sub.log"
+        exit 0
+    fi
+    sleep 0.05
+done
+
+echo "smoke: FAIL — subscriber never saw the event" >&2
+echo "--- worker.log ---" >&2; cat "${LOGDIR}/worker.log" >&2
+echo "--- sub.log ---"    >&2; cat "${LOGDIR}/sub.log"    >&2
+exit 1
--- a/scripts/bus/start.sh
+++ b/scripts/bus/start.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+# Start a local `decnet bus` worker for manual smoke-testing.
+# Uses /tmp so it works without root and without the `decnet` POSIX group.
+# Usage: scripts/bus/start.sh [heartbeat-seconds]
+set -euo pipefail
+
+SOCK="${DECNET_BUS_SOCKET:-/tmp/decnet-bus.sock}"
+HEARTBEAT="${1:-3}"
+
+echo "bus: socket=${SOCK} heartbeat=${HEARTBEAT}s  (Ctrl-C to stop)"
+exec decnet bus --socket "${SOCK}" --group "" --heartbeat "${HEARTBEAT}"
--- a/scripts/bus/sub.py
+++ b/scripts/bus/sub.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+"""Subscribe to a pattern on the local DECNET bus and print events.
+
+Usage: scripts/bus/sub.py 'topology.>'
+       scripts/bus/sub.py 'system.bus.health'
+       DECNET_BUS_SOCKET=/tmp/decnet-bus.sock scripts/bus/sub.py 'topology.*.status'
+"""
+from __future__ import annotations
+
+import asyncio
+import os
+import sys
+
+from decnet.bus.unix_client import UnixSocketBus
+
+
+async def main(pattern: str) -> None:
+    sock = os.environ.get("DECNET_BUS_SOCKET", "/tmp/decnet-bus.sock")
+    client = UnixSocketBus(sock, client_name="scripts-sub")
+    await client.connect()
+    sub = client.subscribe(pattern)
+    print(f"sub: pattern={pattern!r} socket={sock}  (Ctrl-C to stop)", flush=True)
+    try:
+        async with sub:
+            async for ev in sub:
+                print(f"{ev.topic}  type={ev.type!r}  payload={ev.payload}", flush=True)
+    finally:
+        await client.close()
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: sub.py <pattern>", file=sys.stderr)
+        sys.exit(2)
+    try:
+        asyncio.run(main(sys.argv[1]))
+    except KeyboardInterrupt:
+        pass
--- a/scripts/decnet-init.sh
+++ b/scripts/decnet-init.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+sudo .311/bin/decnet init --force --install-dir "$PWD" --user anti --group anti
--- a/scripts/mock-webhook-receiver.py
+++ b/scripts/mock-webhook-receiver.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+"""Mock webhook receiver for local DECNET testing.
+
+Listens on a local port, accepts POSTs from the `decnet webhook`
+worker (or the `/api/v1/webhooks/{uuid}/test` admin endpoint), and
+pretty-prints each delivery with HMAC verification status.
+
+Usage:
+    # Start a receiver on port 8765, skip HMAC verification (unverified badge)
+    scripts/mock-webhook-receiver.py
+
+    # Verify HMAC against a known secret — reads DECNET_MOCK_SECRET env or --secret
+    scripts/mock-webhook-receiver.py --secret deadbeefdeadbeef
+
+    # Bind a different port / host
+    scripts/mock-webhook-receiver.py --host 0.0.0.0 --port 9000
+
+    # Simulate SIEM downtime — return a failure status for every POST so the
+    # worker's retry/backoff path can be exercised end-to-end.
+    scripts/mock-webhook-receiver.py --fail 503
+
+Once running, create a webhook in DECNET pointing at the URL printed on
+startup (e.g. http://localhost:8765/). The receiver accepts any path
+— it's a catch-all — so the URL path after the host is yours to pick.
+
+Pure stdlib. No dependencies to install.
+"""
+from __future__ import annotations
+
+import argparse
+import hashlib
+import hmac
+import json
+import os
+import sys
+from datetime import datetime
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+
+
+# ANSI colors — stripped when stdout isn't a TTY.
+_ISATTY = sys.stdout.isatty()
+
+
+def _c(code: str) -> str:
+    return code if _ISATTY else ""
+
+
+RESET = _c("\033[0m")
+DIM = _c("\033[2m")
+BOLD = _c("\033[1m")
+GREEN = _c("\033[32m")
+RED = _c("\033[31m")
+YELLOW = _c("\033[33m")
+CYAN = _c("\033[36m")
+MAGENTA = _c("\033[35m")
+GRAY = _c("\033[90m")
+
+
+def _verify_hmac(secret: str, body: bytes, sig_header: str) -> bool:
+    """Return True iff the received signature matches our recomputed HMAC."""
+    if not sig_header.startswith("sha256="):
+        return False
+    received = sig_header[len("sha256="):]
+    expected = hmac.new(
+        secret.encode("utf-8"), body, hashlib.sha256
+    ).hexdigest()
+    return hmac.compare_digest(received, expected)
+
+
+class WebhookHandler(BaseHTTPRequestHandler):
+    # Class-level config injected by `main`.
+    secret: str | None = None
+    fail_status: int | None = None
+
+    # Silence the default noisy per-request log line — we print our own.
+    def log_message(self, format, *args):  # noqa: A002,N802 — BaseHTTPRequestHandler API
+        return
+
+    def do_GET(self):  # noqa: N802 — BaseHTTPRequestHandler API
+        """Friendly health check so you can `curl http://localhost:8765/`."""
+        body = (
+            b"DECNET mock webhook receiver.\n"
+            b"POST to any path to test delivery.\n"
+        )
+        self.send_response(200)
+        self.send_header("Content-Type", "text/plain; charset=utf-8")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+    def do_POST(self):  # noqa: N802 — BaseHTTPRequestHandler API
+        length = int(self.headers.get("Content-Length") or 0)
+        raw_body = self.rfile.read(length) if length else b""
+
+        sig = self.headers.get("X-DECNET-Signature", "")
+        event_id = self.headers.get("X-DECNET-Event-Id", "—")
+        topic = self.headers.get("X-DECNET-Event-Topic", "—")
+        ts_hdr = self.headers.get("X-DECNET-Timestamp", "")
+
+        # Signature verification
+        if self.secret is None:
+            sig_badge = f"{YELLOW}UNVERIFIED{RESET}"
+        elif not sig:
+            sig_badge = f"{RED}NO SIGNATURE{RESET}"
+        elif _verify_hmac(self.secret, raw_body, sig):
+            sig_badge = f"{GREEN}HMAC OK{RESET}"
+        else:
+            sig_badge = f"{RED}HMAC MISMATCH{RESET}"
+
+        # Decode the body — print as JSON when possible, raw otherwise.
+        try:
+            payload = json.loads(raw_body.decode("utf-8") or "{}")
+            body_text = json.dumps(payload, indent=2, sort_keys=True)
+        except (ValueError, UnicodeDecodeError):
+            body_text = raw_body.decode("utf-8", errors="replace")
+
+        now = datetime.now().strftime("%H:%M:%S")
+        print(
+            f"{DIM}{now}{RESET} "
+            f"{BOLD}{MAGENTA}[POST {self.path}]{RESET} "
+            f"{sig_badge} "
+            f"{CYAN}topic={topic}{RESET} "
+            f"{GRAY}event_id={event_id}{RESET}"
+            f"{(' ' + GRAY + 'ts=' + ts_hdr + RESET) if ts_hdr else ''}",
+            flush=True,
+        )
+        for line in body_text.splitlines() or [""]:
+            print(f"  {line}", flush=True)
+        print("", flush=True)
+
+        # Response — success by default; configurable for retry-path testing.
+        if self.fail_status is not None:
+            status = self.fail_status
+            reason = f"mock failure (--fail {self.fail_status})"
+        else:
+            status = 200
+            reason = "ok"
+        resp = json.dumps({"received": True, "reason": reason}).encode()
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(resp)))
+        self.end_headers()
+        self.wfile.write(resp)
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(
+        description="Mock HTTP receiver for DECNET webhook testing.",
+    )
+    ap.add_argument("--host", default="127.0.0.1", help="Bind host (default: 127.0.0.1)")
+    ap.add_argument("--port", type=int, default=8765, help="Bind port (default: 8765)")
+    ap.add_argument(
+        "--secret",
+        default=os.environ.get("DECNET_MOCK_SECRET"),
+        help="Webhook secret — HMAC is verified against received body when provided. "
+             "Falls back to $DECNET_MOCK_SECRET. Omit to skip verification.",
+    )
+    ap.add_argument(
+        "--fail",
+        type=int,
+        metavar="STATUS",
+        help="Return this HTTP status for every POST instead of 200. "
+             "Useful for exercising the worker's retry backoff "
+             "(try --fail 503 or --fail 429).",
+    )
+    args = ap.parse_args()
+
+    WebhookHandler.secret = args.secret
+    WebhookHandler.fail_status = args.fail
+
+    verify_note = (
+        f"{GREEN}HMAC verification ENABLED{RESET}"
+        if args.secret
+        else f"{YELLOW}HMAC verification OFF (pass --secret to enable){RESET}"
+    )
+    fail_note = (
+        f"\n  {RED}RESPONSE MODE: failing every request with {args.fail}{RESET}"
+        if args.fail is not None
+        else ""
+    )
+
+    url = f"http://{args.host}:{args.port}/"
+    banner = (
+        f"\n{BOLD}{CYAN}DECNET mock webhook receiver{RESET}\n"
+        f"  listening on {BOLD}{url}{RESET}\n"
+        f"  {verify_note}{fail_note}\n"
+        f"  POST to any path; GET / for a health reply.\n"
+        f"  Ctrl-C to stop.\n"
+    )
+    print(banner, flush=True)
+
+    server = ThreadingHTTPServer((args.host, args.port), WebhookHandler)
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        print(f"\n{DIM}receiver stopped.{RESET}", flush=True)
+        server.server_close()
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/profile/aggregate_requests.py
+++ b/scripts/profile/aggregate_requests.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+"""
+Aggregate pyinstrument request profiles from ./profiles/*.html.
+
+The PyinstrumentMiddleware writes one HTML per request. After a Locust run
+there are hundreds of them — reading one by one is useless. This rolls
+everything up into two views:
+
+    1. Per-endpoint summary (count, mean/p50/p95/max wall-time)
+    2. Top hot functions by cumulative self-time across ALL requests
+
+Usage:
+    scripts/profile/aggregate_requests.py               # ./profiles/
+    scripts/profile/aggregate_requests.py --dir PATH
+    scripts/profile/aggregate_requests.py --top 30      # show top 30 funcs
+    scripts/profile/aggregate_requests.py --endpoint login   # filter
+
+Self-time of a frame = frame.time - sum(child.time) — i.e. time spent
+executing the function's own code, excluding descendants. That's the
+right signal for "where is the CPU actually going".
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import statistics
+from collections import defaultdict
+from pathlib import Path
+
+
+_FILENAME_RE = re.compile(r"^(?P<ts>\d+)-(?P<method>[A-Z]+)-(?P<slug>.+)\.html$")
+_SESSION_RE = re.compile(r"const sessionData = (\{.*?\});\s*\n\s*pyinstrumentHTMLRenderer", re.DOTALL)
+
+
+def load_session(path: Path) -> tuple[dict, dict] | None:
+    """Return (session_summary, frame_tree_root) or None."""
+    try:
+        text = path.read_text()
+    except OSError:
+        return None
+    m = _SESSION_RE.search(text)
+    if not m:
+        return None
+    try:
+        payload = json.loads(m.group(1))
+        return payload["session"], payload["frame_tree"]
+    except (json.JSONDecodeError, KeyError):
+        return None
+
+
+_SYNTHETIC = {"[self]", "[await]"}
+
+
+def _is_synthetic(identifier: str) -> bool:
+    """Pyinstrument leaf markers: `[self]` / `[await]` carry no file/line."""
+    return identifier in _SYNTHETIC or identifier.startswith(("[self]", "[await]"))
+
+
+def walk_self_time(frame: dict | None, acc: dict[str, float], parent_ident: str | None = None) -> None:
+    """
+    Accumulate self-time by frame identifier.
+
+    Pyinstrument attaches `[self]` / `[await]` synthetic leaves for non-sampled
+    execution time. Rolling them into their parent ("self-time of X" vs. a
+    global `[self]` bucket) is what gives us actionable per-function hotspots.
+    """
+    if not frame:
+        return
+    ident = frame.get("identifier")
+    if not ident:
+        return
+    total = frame.get("time", 0.0)
+    children = frame.get("children") or []
+    child_total = sum(c.get("time", 0.0) for c in children)
+    self_time = total - child_total
+
+    if _is_synthetic(ident):
+        # Reattribute synthetic self-time to the enclosing real function.
+        key = parent_ident if parent_ident else ident
+        acc[key] = acc.get(key, 0.0) + total
+        return
+
+    if self_time > 0:
+        acc[ident] = acc.get(ident, 0.0) + self_time
+    for c in children:
+        walk_self_time(c, acc, parent_ident=ident)
+
+
+def short_ident(identifier: str) -> str:
+    """`func\\x00/abs/path.py\\x00LINE` -> `func  path.py:LINE`."""
+    parts = identifier.split("\x00")
+    if len(parts) == 3:
+        func, path, line = parts
+        return f"{func:30s}  {Path(path).name}:{line}"
+    return identifier[:80]
+
+
+def percentile(values: list[float], p: float) -> float:
+    if not values:
+        return 0.0
+    values = sorted(values)
+    k = (len(values) - 1) * p
+    lo, hi = int(k), min(int(k) + 1, len(values) - 1)
+    if lo == hi:
+        return values[lo]
+    return values[lo] + (values[hi] - values[lo]) * (k - lo)
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--dir", default="profiles")
+    ap.add_argument("--top", type=int, default=20)
+    ap.add_argument("--endpoint", default=None, help="substring filter on endpoint slug")
+    args = ap.parse_args()
+
+    root = Path(args.dir)
+    files = sorted(root.glob("*.html"))
+    if not files:
+        raise SystemExit(f"no HTMLs in {root}/")
+
+    per_endpoint: dict[str, list[float]] = defaultdict(list)
+    global_self: dict[str, float] = {}
+    per_endpoint_self: dict[str, dict[str, float]] = defaultdict(dict)
+    parsed = 0
+    skipped = 0
+
+    for f in files:
+        m = _FILENAME_RE.match(f.name)
+        if not m:
+            skipped += 1
+            continue
+        endpoint = f"{m['method']} /{m['slug'].replace('_', '/')}"
+        if args.endpoint and args.endpoint not in endpoint:
+            continue
+
+        loaded = load_session(f)
+        if not loaded:
+            skipped += 1
+            continue
+        session, root_frame = loaded
+
+        duration = session.get("duration", 0.0)
+        per_endpoint[endpoint].append(duration)
+
+        walk_self_time(root_frame, global_self)
+        walk_self_time(root_frame, per_endpoint_self[endpoint])
+
+        parsed += 1
+
+    print(f"parsed: {parsed}  skipped: {skipped}  from {root}/\n")
+
+    print("=" * 100)
+    print("PER-ENDPOINT WALL-TIME")
+    print("=" * 100)
+    print(f"{'endpoint':<55} {'n':>6} {'mean':>9} {'p50':>9} {'p95':>9} {'max':>9}")
+    print("-" * 100)
+    rows = sorted(per_endpoint.items(), key=lambda kv: -statistics.mean(kv[1]) * len(kv[1]))
+    for ep, durations in rows:
+        print(f"{ep[:55]:<55} {len(durations):>6} "
+              f"{statistics.mean(durations)*1000:>8.1f}ms "
+              f"{percentile(durations,0.50)*1000:>8.1f}ms "
+              f"{percentile(durations,0.95)*1000:>8.1f}ms "
+              f"{max(durations)*1000:>8.1f}ms")
+
+    print()
+    print("=" * 100)
+    print(f"TOP {args.top} HOT FUNCTIONS BY CUMULATIVE SELF-TIME (across {parsed} requests)")
+    print("=" * 100)
+    total_self = sum(global_self.values()) or 1.0
+    top = sorted(global_self.items(), key=lambda kv: -kv[1])[: args.top]
+    print(f"{'fn  file:line':<70} {'self':>10} {'share':>8}")
+    print("-" * 100)
+    for ident, t in top:
+        share = t / total_self * 100
+        print(f"{short_ident(ident):<70} {t*1000:>8.1f}ms {share:>6.1f}%")
+
+    print()
+    print("=" * 100)
+    print("TOP 3 HOT FUNCTIONS PER ENDPOINT")
+    print("=" * 100)
+    for ep in sorted(per_endpoint_self, key=lambda e: -sum(per_endpoint_self[e].values())):
+        acc = per_endpoint_self[ep]
+        ep_total = sum(acc.values()) or 1.0
+        print(f"\n{ep}   ({len(per_endpoint[ep])} samples, {ep_total*1000:.0f}ms total self)")
+        top3 = sorted(acc.items(), key=lambda kv: -kv[1])[:3]
+        for ident, t in top3:
+            print(f"  {short_ident(ident):<70} {t*1000:>7.1f}ms  ({t/ep_total*100:>4.1f}%)")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/profile/classify_usage.py
+++ b/scripts/profile/classify_usage.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+"""
+Classify the shape of a memray usage_over_time.csv as plateau, climb,
+or climb-and-drop. Operates on the `memory_size_bytes` column.
+
+Usage:
+    scripts/profile/classify_usage.py profiles/usage_over_time.csv
+    scripts/profile/classify_usage.py                  # newest *.csv in ./profiles/
+"""
+from __future__ import annotations
+
+import csv
+import statistics
+import sys
+from pathlib import Path
+
+
+def _mb(n: float) -> str:
+    return f"{n / (1024 * 1024):.1f} MB"
+
+
+def load(path: Path) -> list[tuple[int, int]]:
+    with path.open() as f:
+        rows = list(csv.DictReader(f))
+    out: list[tuple[int, int]] = []
+    for r in rows:
+        try:
+            out.append((int(r["timestamp"]), int(r["memory_size_bytes"])))
+        except (KeyError, ValueError):
+            continue
+    if not out:
+        sys.exit(f"no usable rows in {path}")
+    out.sort(key=lambda t: t[0])
+    return out
+
+
+def classify(series: list[tuple[int, int]]) -> None:
+    mem = [v for _, v in series]
+    n = len(mem)
+    peak = max(mem)
+    peak_idx = mem.index(peak)
+
+    # Pre-peak baseline = first 10% of samples.
+    baseline = statistics.median(mem[: max(1, n // 10)])
+
+    # Plateau = last 10% of samples (what we settle to).
+    plateau = statistics.median(mem[-max(1, n // 10) :])
+
+    # "Tail drop" — how much we released after the peak.
+    tail_drop = peak - plateau
+    tail_drop_pct = (tail_drop / peak * 100) if peak else 0.0
+
+    # "Growth during run" — end vs beginning.
+    net_growth = plateau - baseline
+    net_growth_pct = (net_growth / baseline * 100) if baseline else 0.0
+
+    # Where is the peak in the timeline?
+    peak_position = peak_idx / (n - 1) if n > 1 else 0.0
+
+    print(f"samples: {n}")
+    print(f"baseline (first 10%): {_mb(baseline)}")
+    print(f"peak:                 {_mb(peak)}  at {peak_position:.0%} of run")
+    print(f"plateau (last 10%):   {_mb(plateau)}")
+    print(f"tail drop:            {_mb(tail_drop)}  ({tail_drop_pct:+.1f}% vs peak)")
+    print(f"net growth:           {_mb(net_growth)}  ({net_growth_pct:+.1f}% vs baseline)")
+    print()
+
+    # Heuristic: the only reliable leak signal without a post-load rest
+    # period is how much memory was released AFTER the peak. Net-growth-vs-
+    # cold-start is not useful — an active workload always grows vs. a cold
+    # interpreter.
+    #
+    # Caveat: if the workload was still running when memray stopped,
+    # "sustained-at-peak" is inconclusive (not necessarily a leak). Re-run
+    # with a rest period after the scan for a definitive answer.
+    if tail_drop_pct >= 10:
+        print("verdict: CLIMB-AND-DROP — memory released after peak.")
+        print("         → no leak. Profile CPU next (pyinstrument).")
+    elif tail_drop_pct >= 3:
+        print("verdict: MOSTLY-RELEASED — partial release after peak.")
+        print("         → likely healthy; re-run with a rest period after load")
+        print("           to confirm (memray should capture post-workload idle).")
+    else:
+        print("verdict: SUSTAINED-AT-PEAK — memory held near peak at end of capture.")
+        print("         → AMBIGUOUS: could be a leak, or the workload was still")
+        print("           running when memray stopped. Re-run with a rest period")
+        print("           after load, then check: memray flamegraph --leaks <bin>")
+
+
+def main() -> None:
+    if len(sys.argv) > 1:
+        target = Path(sys.argv[1])
+    else:
+        profiles = Path("profiles")
+        csvs = sorted(profiles.glob("*.csv"), key=lambda p: p.stat().st_mtime)
+        if not csvs:
+            sys.exit("no CSV found; pass a path or put one in ./profiles/")
+        target = csvs[-1]
+
+    print(f"analyzing {target}\n")
+    classify(load(target))
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/profile/cprofile-cli.sh
+++ b/scripts/profile/cprofile-cli.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+# Run a `decnet` subcommand under cProfile and write a .prof file for snakeviz.
+# Usage: scripts/profile/cprofile-cli.sh services
+#        scripts/profile/cprofile-cli.sh status
+set -euo pipefail
+
+if [[ $# -lt 1 ]]; then
+    echo "Usage: $0 <decnet-subcommand> [args...]" >&2
+    exit 1
+fi
+
+OUT="${OUT:-profiles/cprofile-$(date +%s).prof}"
+mkdir -p "$(dirname "$OUT")"
+
+python -m cProfile -o "${OUT}" -m decnet.cli "$@"
+echo "Wrote ${OUT}"
+echo "View with: snakeviz ${OUT}"
--- a/scripts/profile/memray-api.sh
+++ b/scripts/profile/memray-api.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+# Run the DECNET API under memray to capture an allocation profile.
+# Stop with Ctrl-C; then render with `memray flamegraph <bin>`.
+set -euo pipefail
+
+HOST="${DECNET_API_HOST:-127.0.0.1}"
+PORT="${DECNET_API_PORT:-8000}"
+OUT="${OUT:-profiles/memray-$(date +%s).bin}"
+mkdir -p "$(dirname "$OUT")"
+
+echo "Starting uvicorn under memray -> ${OUT}"
+python -m memray run --trace-python-allocators --follow-fork \
+    -o "${OUT}" -m uvicorn decnet.web.api:app \
+    --host "${HOST}" --port "${PORT}" --log-level warning
+
+echo "Render with: memray flamegraph ${OUT}"
--- a/scripts/profile/pyspy-attach.sh
+++ b/scripts/profile/pyspy-attach.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+# Attach py-spy to the running DECNET uvicorn worker(s) and record a flamegraph.
+# Requires sudo on Linux because of kernel.yama.ptrace_scope=1 by default.
+set -euo pipefail
+
+DURATION="${DURATION:-30}"
+OUT="${OUT:-profiles/pyspy-$(date +%s).svg}"
+mkdir -p "$(dirname "$OUT")"
+
+PID="$(pgrep -f 'uvicorn decnet.web.api' | head -n 1 || true)"
+if [[ -z "${PID}" ]]; then
+    echo "No uvicorn worker found. Start the API first (e.g. 'decnet deploy ...')." >&2
+    exit 1
+fi
+
+PY_VER="$(python -c 'import sys; print(f"{sys.version_info[0]}.{sys.version_info[1]}")')"
+if [[ "${PY_VER}" == "3.14" ]] || [[ "${PY_VER}" > "3.14" ]]; then
+    cat >&2 <<EOF
+WARNING: py-spy 0.4.1 (latest on PyPI) does not yet support Python ${PY_VER}.
+Attaching will fail with "No python processes found in process <pid>".
+Use one of the other lenses for now:
+    DECNET_PROFILE_REQUESTS=true   # pyinstrument, per-request flamegraphs
+    scripts/profile/memray-api.sh  # memory allocation profiling
+    scripts/profile/cprofile-cli.sh <cmd>  # deterministic CLI profiling
+Track upstream: https://github.com/benfred/py-spy/releases
+EOF
+    exit 2
+fi
+
+echo "Attaching py-spy to PID ${PID} for ${DURATION}s -> ${OUT}"
+sudo .venv/bin/py-spy record -o "${OUT}" -p "${PID}" -d "${DURATION}" --subprocesses
+echo "Wrote ${OUT}"
--- a/scripts/profile/view.sh
+++ b/scripts/profile/view.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+# Open the newest profile artifact in the right viewer.
+#
+# Usage:
+#   scripts/profile/view.sh                 # newest file in ./profiles/
+#   scripts/profile/view.sh <file>          # explicit path
+#   scripts/profile/view.sh cprofile        # newest .prof
+#   scripts/profile/view.sh memray          # newest memray .bin
+#   scripts/profile/view.sh pyspy           # newest .svg
+#   scripts/profile/view.sh pyinstrument    # newest pyinstrument .html
+#
+# Memray viewer override:
+#   VIEW=flamegraph|table|tree|stats|summary  (default: flamegraph)
+#   VIEW=leaks   (render flamegraph with --leaks filter)
+set -euo pipefail
+
+DIR="${DIR:-profiles}"
+VIEW="${VIEW:-flamegraph}"
+
+if [[ ! -d "${DIR}" ]]; then
+    echo "No ${DIR}/ directory yet — run one of the profile scripts first." >&2
+    exit 1
+fi
+
+pick_newest() {
+    local pattern="$1"
+    find "${DIR}" -maxdepth 1 -type f -name "${pattern}" -printf '%T@ %p\n' 2>/dev/null \
+        | sort -n | tail -n 1 | cut -d' ' -f2-
+}
+
+TARGET=""
+case "${1:-}" in
+    "")           TARGET="$(pick_newest '*')" ;;
+    cprofile)     TARGET="$(pick_newest '*.prof')" ;;
+    memray)       TARGET="$(pick_newest 'memray-*.bin')" ;;
+    pyspy)        TARGET="$(pick_newest 'pyspy-*.svg')" ;;
+    pyinstrument) TARGET="$(find "${DIR}" -maxdepth 1 -type f -name '*.html' \
+                       ! -name 'memray-*' -printf '%T@ %p\n' 2>/dev/null \
+                       | sort -n | tail -n 1 | cut -d' ' -f2-)" ;;
+    *)            TARGET="$1" ;;
+esac
+
+if [[ -z "${TARGET}" || ! -f "${TARGET}" ]]; then
+    echo "No matching profile artifact found." >&2
+    exit 1
+fi
+
+echo "Opening ${TARGET}"
+
+case "${TARGET}" in
+    *.prof)
+        exec snakeviz "${TARGET}"
+        ;;
+    *memray*.bin|*.bin)
+        case "${VIEW}" in
+            leaks)    exec memray flamegraph --leaks -f "${TARGET}" ;;
+            flamegraph|table) exec memray "${VIEW}" -f "${TARGET}" ;;
+            tree|stats|summary) exec memray "${VIEW}" "${TARGET}" ;;
+            *) echo "Unknown VIEW=${VIEW}" >&2; exit 1 ;;
+        esac
+        ;;
+    *.svg|*.html)
+        exec xdg-open "${TARGET}"
+        ;;
+    *)
+        echo "Don't know how to view ${TARGET}" >&2
+        exit 1
+        ;;
+esac
--- a/scripts/vulture_whitelist.py
+++ b/scripts/vulture_whitelist.py
@@ -0,0 +1,22 @@
+"""Vulture whitelist — names that look unused but aren't.
+
+Run via:
+
+    vulture decnet vulture_whitelist.py --min-confidence 80
+
+Each entry suppresses a known false positive. Add a comment with the
+file:line and the reason so future-you can revisit.
+"""
+
+# FastAPI auth dependencies — `Depends()` runs for the side effect
+# (auth/RBAC enforcement) even when the injected value is unused inside
+# the handler body. Vulture can't see that.
+viewer  # decnet/web/router/canary/api_tokens.py:176, 198, 284 — Depends(require_viewer)
+admin   # any handler with admin: dict = Depends(require_admin) where the body doesn't read it
+user    # any handler with user: dict = Depends(require_user) where the body doesn't read it
+
+# IMAP stub — UID SEARCH vs sequence SEARCH is a real protocol
+# differentiator, but in this honeypot stub UID == seq number (see the
+# "UID == sequence number" comment at the top of the email fixtures), so
+# the parameter is intentionally a no-op.
+uid_mode  # decnet/templates/imap/server.py:646