ci: auto-merge dev → testing

dev(ci): simplified pipeline
ci: auto-merge dev → testing [skip ci]
2026-04-28 22:17:36 +00:00 · 2026-04-28 18:16:26 -04:00 · 2026-04-28 22:03:20 +00:00 · 2026-04-28 17:56:29 -04:00 · 2026-04-28 17:47:28 -04:00 · 2026-04-28 16:15:02 -04:00
1318 changed files with 179430 additions and 7276 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -1,7 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "mcp__plugin_context-mode_context-mode__ctx_batch_execute"
-    ]
-  }
-}
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,12 @@
+# API Options
+DECNET_API_HOST=0.0.0.0
+DECNET_API_PORT=8000
+DECNET_JWT_SECRET=supersecretkey12345678901234567
+DECNET_INGEST_LOG_FILE=/var/log/decnet/decnet.log
+
+# Web Dashboard Options
+DECNET_WEB_HOST=0.0.0.0
+DECNET_WEB_PORT=8080
+DECNET_ADMIN_USER=admin
+DECNET_ADMIN_PASSWORD=admin
+DECNET_DEVELOPER=False
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -11,79 +11,131 @@ jobs:
  lint:
    name: Lint (ruff)
    runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/dev'
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: "3.11"
      - run: pip install ruff
-      - run: ruff check .
-
-  test:
-    name: Test (pytest)
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.11", "3.12"]
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - run: pip install -e .
-      - run: pytest tests/ -v --tb=short
+      - run: ruff check decnet/

  bandit:
    name: SAST (bandit)
    runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/dev'
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: "3.11"
      - run: pip install bandit
-      - run: bandit -r decnet/ -ll -x decnet/services/registry.py
+      - run: bandit -r decnet/ -ll -x decnet/services/registry.py -x decnet/templates/

  pip-audit:
    name: Dependency audit (pip-audit)
    runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/dev'
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: "3.11"
      - run: pip install pip-audit
-      - run: pip install -e .
-      - run: pip-audit --skip-editable
+      - run: pip install -e .[dev]
+      - run: pip-audit --skip-editable --ignore-vuln CVE-2025-65896 --ignore-vuln CVE-2026-3219

-  open-pr:
-    name: Open PR to main
+  merge-to-testing:
+    name: Merge dev → testing
    runs-on: ubuntu-latest
-    needs: [lint, test, bandit, pip-audit]
+    needs: [lint, bandit, pip-audit]
    if: github.ref == 'refs/heads/dev'
    steps:
-      - name: Open PR via Gitea API
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.DECNET_PR_TOKEN }}
+      - name: Configure git
        run: |
-          echo "--- Checking for existing open PRs ---"
-          LIST_RESPONSE=$(curl -s \
-            -H "Authorization: token ${{ secrets.DECNET_PR_TOKEN }}" \
-            "https://git.resacachile.cl/api/v1/repos/anti/DECNET/pulls?state=open&head=anti:dev&base=main&limit=5")
-          echo "$LIST_RESPONSE"
-          EXISTING=$(echo "$LIST_RESPONSE" | python3 -c "import sys, json; print(len(json.load(sys.stdin)))")
-          echo "Open PRs found: $EXISTING"
-          if [ "$EXISTING" -gt "0" ]; then
-            echo "PR already open, skipping."
-            exit 0
-          fi
-          echo "--- Creating PR ---"
-          CREATE_RESPONSE=$(curl -s -X POST \
-            -H "Authorization: token ${{ secrets.DECNET_PR_TOKEN }}" \
-            -H "Content-Type: application/json" \
-            -d '{
-              "title": "Auto PR: dev → main",
-              "head": "dev",
-              "base": "main",
-              "body": "All CI and security checks passed. Review and merge when ready."
-            }' \
-            "https://git.resacachile.cl/api/v1/repos/anti/DECNET/pulls")
-          echo "$CREATE_RESPONSE"
+          git config user.name "DECNET CI"
+          git config user.email "ci@decnet.local"
+      - name: Merge dev into testing
+        run: |
+          git fetch origin testing
+          git checkout testing
+          git merge origin/dev --no-ff -m "ci: auto-merge dev → testing"
+          git push origin testing
+
+  test-standard:
+    name: Test (Standard)
+    runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/testing'
+    strategy:
+      matrix:
+        python-version: ["3.11"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - run: pip install -e .[dev]
+      - run: pytest
+
+  test-live:
+    name: Test (Live)
+    runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/testing'
+    needs: [test-standard]
+    services:
+      mysql:
+        image: mysql:8.0
+        env:
+          MYSQL_ROOT_PASSWORD: root
+          MYSQL_DATABASE: decnet_test
+        ports:
+          - 3307:3306
+        options: >-
+          --health-cmd="mysqladmin ping -h 127.0.0.1"
+          --health-interval=10s
+          --health-timeout=5s
+          --health-retries=5
+    strategy:
+      matrix:
+        python-version: ["3.11"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - run: pip install -e .[dev]
+      - run: pytest -m live
+        env:
+          DECNET_MYSQL_HOST: 127.0.0.1
+          DECNET_MYSQL_PORT: 3307
+          DECNET_MYSQL_USER: root
+          DECNET_MYSQL_PASSWORD: root
+          DECNET_MYSQL_DATABASE: decnet_test
+
+  merge-to-main:
+    name: Merge testing → main
+    runs-on: ubuntu-latest
+    needs: [test-standard, test-live]
+    if: github.ref == 'refs/heads/testing'
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.DECNET_PR_TOKEN }}
+      - name: Configure git
+        run: |
+          git config user.name "DECNET CI"
+          git config user.email "ci@decnet.local"
+      - name: Merge testing into main
+        run: |
+          git fetch origin main
+          git checkout main
+          git merge origin/testing --no-ff -m "ci: auto-merge testing → main" || {
+            echo "CONFLICT: testing and main have diverged — manual resolution required"
+            exit 1
+          }
+          git push origin main
--- a/.gitea/workflows/pr.yml
+++ b/.gitea/workflows/pr.yml
@@ -30,5 +30,28 @@ jobs:
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
-      - run: pip install -e .
+      - run: pip install -e .[dev]
      - run: pytest tests/ -v --tb=short
+
+  bandit:
+    name: SAST (bandit)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - run: pip install bandit
+      - run: bandit -r decnet/ -ll -x decnet/services/registry.py
+
+  pip-audit:
+    name: Dependency audit (pip-audit)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - run: pip install pip-audit
+      - run: pip install -e .[dev]
+      - run: pip-audit --skip-editable
--- a/.gitea/workflows/release.yml
+++ b/.gitea/workflows/release.yml
@@ -22,27 +22,42 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          token: ${{ secrets.DECNET_PR_TOKEN }}

-      - name: Extract version from pyproject.toml
+      - name: Configure git
+        run: |
+          git config user.name "DECNET CI"
+          git config user.email "ci@decnet.local"
+
+      - name: Bump version and Tag
        id: version
        run: |
-          VERSION=$(python3 -c "import tomllib; f=open('pyproject.toml','rb'); d=tomllib.load(f); print(d['project']['version'])")
-          echo "version=$VERSION" >> $GITHUB_OUTPUT
+          # Calculate next version (v0.x)
+          LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0")
+          NEXT_VER=$(python3 -c "
+          tag = '$LATEST_TAG'.lstrip('v')
+          parts = tag.split('.')
+          major = int(parts[0]) if parts[0] else 0
+          minor = int(parts[1]) if len(parts) > 1 else 0
+          print(f'{major}.{minor + 1}.0')
+          ")
          
-      - name: Create tag if not exists
-        id: tag
-        run: |
-          VERSION=${{ steps.version.outputs.version }}
-          if git rev-parse "v$VERSION" >/dev/null 2>&1; then
-            echo "Tag v$VERSION already exists, skipping."
-            echo "created=false" >> $GITHUB_OUTPUT
-          else
-            git config user.name "gitea-actions"
-            git config user.email "actions@git.resacachile.cl"
-            git tag -a "v$VERSION" -m "Release v$VERSION"
-            git push origin "v$VERSION"
+          echo "Next version: $NEXT_VER (calculated from $LATEST_TAG)"
+          
+          # Update pyproject.toml
+          sed -i "s/^version = \".*\"/version = \"$NEXT_VER\"/" pyproject.toml
+          
+          git add pyproject.toml
+          git commit -m "chore: auto-release v$NEXT_VER [skip ci]" || echo "No changes to commit"
+          CHANGELOG=$(git log ${LATEST_TAG}..HEAD --oneline --no-decorate --no-merges)
+          git tag -a "v$NEXT_VER" -m "Auto-release v$NEXT_VER
+
+Changes since $LATEST_TAG:
+$CHANGELOG"
+          git push origin main --follow-tags
+          
+          echo "version=$NEXT_VER" >> $GITHUB_OUTPUT
          echo "created=true" >> $GITHUB_OUTPUT
-          fi

  docker:
    name: Build, scan & push ${{ matrix.service }}
@@ -52,7 +67,7 @@ jobs:
      fail-fast: false
      matrix:
        service:
-          - cowrie
+          - conpot
          - docker_api
          - elasticsearch
          - ftp
@@ -69,11 +84,12 @@ jobs:
          - postgres
          - rdp
          - redis
-          - real_ssh
          - sip
          - smb
          - smtp
          - snmp
+          - ssh
+          - telnet
          - tftp
          - vnc
    steps:
@@ -99,13 +115,13 @@ jobs:
          cache-from: type=gha
          cache-to: type=gha,mode=max

+      - name: Install Trivy
+        run: |
+          curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
+
      - name: Scan with Trivy
-        uses: aquasecurity/trivy-action@master
-        with:
-          image-ref: decnet-${{ matrix.service }}:scan
-          exit-code: "1"
-          severity: CRITICAL
-          ignore-unfixed: true
+        run: |
+          trivy image --exit-code 1 --severity CRITICAL --ignore-unfixed decnet-${{ matrix.service }}:scan

      - name: Push image
        if: success()
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,10 @@
 .venv/
+.venv*/
+.311/
+.3[0-9][0-9]/
+logs/
+.claude/*
+CLAUDE.md
 __pycache__/
 *.pyc
 *.pyo
@@ -6,13 +12,42 @@ __pycache__/
 dist/
 build/
 decnet-compose.yml
+# Per-topology compose fragments emitted by `decnet topology deploy`.
+decnet-topology-*-compose.yml
+# Docker build context cache.
+.docker/
 decnet-state.json
 *.ini
-.env
 decnet.log*
 *.loggy
 *.nmap
 linterfails.log
-test-scan
 webmail
 windows1
+*.db
+*.db-shm
+*.db-wal
+decnet.*.log
+# Rotated copies (logrotate appends .1, .2, .gz...) — the existing
+# decnet.*.log glob doesn't catch the suffix.
+decnet.*.log.*
+decnet.json
+.env*
+.env.local
+.coverage
+.hypothesis/
+profiles/*
+tests/test_decnet.db*
+
+# Nested git clone of the wiki — not a submodule, just a local
+# working copy so we can edit docs without a full round-trip.
+wiki-checkout/
+
+# Scratch test/debug outputs that leak from saved `pytest > hang.log`
+# or `pytest > schem` redirections.
+hang.log
+schem
+*.pytest.log
+
+# pydeps-style dependency graph dumps from local analysis runs.
+deps.txt
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,57 +0,0 @@
-# CLAUDE.md
-
-This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
-
-## Commands
-
-```bash
-# Install (dev)
-pip install -e .
-
-# List registered service plugins
-decnet services
-
-# Dry-run (generates compose, no containers)
-decnet deploy --mode unihost --deckies 3 --randomize-services --dry-run
-
-# Full deploy (requires root for MACVLAN)
-sudo decnet deploy --mode unihost --deckies 5 --interface eth0 --randomize-services
-sudo decnet deploy --mode unihost --deckies 3 --services ssh,smb --log-target 192.168.1.5:5140
-
-# Status / teardown
-decnet status
-sudo decnet teardown --all
-sudo decnet teardown --id decky-01
-```
-
-## Project Overview
-
-DECNET is a honeypot/deception network framework. It deploys fake machines (called **deckies**) with realistic services (RDP, SMB, SSH, FTP, etc.) to lure and profile attackers. All attacker interactions are aggregated to an isolated logging network (ELK stack / SIEM).
-
-## Deployment Models
-
-**UNIHOST** — one real host spins up _n_ deckies via a container orchestrator. Simpler, single-machine deployment.
-
-**SWARM (MULTIHOST)** — _n_ real hosts each running deckies. Orchestrated via Ansible/sshpass or similar tooling.
-
-## Core Technology Choices
-
- **Containers**: Docker Compose is the starting point but other orchestration frameworks should be evaluated if they serve the project better. `debian:bookworm-slim` is the default base image; mixing in Ubuntu, CentOS, or other distros is encouraged to make the decoy network look heterogeneous.
- **Networking**: Deckies need to appear as real machines on the LAN (own MACs/IPs). MACVLAN and IPVLAN are candidates; the right driver depends on the host environment. WSL has known limitations — bare metal or a VM is preferred for testing.
- **Log pipeline**: Logstash → ELK stack → SIEM (isolated network, not reachable from decoy network)
-
-## Architecture Constraints
-
- The decoy network must be reachable from the outside (attacker-facing).
- The logging/aggregation network must be isolated from the decoy network.
- A publicly accessible real server acts as the bridge between the two networks.
- Deckies should differ in exposed services and OS fingerprints to appear as a heterogeneous network.
-
-## Development and testing
-
- For every new feature, pytests must me made.
- Pytest is the main testing framework in use.
- NEVER pass broken code to the user.
-    - Broken means: not running, not passing 100% tests, etc.
- After tests pass with 100%, always git commit your changes.
- NEVER add "Co-Authored-By" or any Claude attribution lines to git commit messages.
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@@ -1 +0,0 @@
-CI/CD TEST 2
--- a/674
+++ b/674
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.
--- a/README.md
+++ b/README.md
@@ -4,6 +4,8 @@ A honeypot deception network framework. Spin up a fleet of fake machines — cal

 Attackers probe the network, DECNET traps every interaction, and you watch from a safe, isolated logging stack.

+[![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/C0C31YDLB5)
+
 ---

 ## Table of Contents
@@ -69,7 +71,7 @@ From the outside a decky looks identical to a real machine: it has its own MAC a
 ## Installation

 ```bash
-git clone <repo-url> DECNET
+git clone https://git.resacachile.cl/anti/DECNET
 cd DECNET
 pip install -e .
 ```
@@ -207,6 +209,26 @@ sudo decnet deploy --deckies 4 --archetype windows-workstation
 [corp-workstations]
 archetype = windows-workstation
 amount    = 4
+
+[win-fileserver]
+services   = ftp
+nmap_os    = windows
+os_version = Windows Server 2019
+
+[dbsrv01]
+ip       = 192.168.1.112
+services = mysql, http
+nmap_os  = linux
+
+[dbsrv01.http]
+server_header = Apache/2.4.54 (Debian)
+response_code = 200
+fake_app      = wordpress
+
+[dbsrv01.mysql]
+mysql_version = 5.7.38-log
+mysql_banner  = MySQL Community Server
+
 ```

 ---
@@ -454,7 +476,7 @@ Key/value pairs are passed directly to the service plugin as persona config. Com
 | `mongodb` | `mongo_version` |
 | `elasticsearch` | `es_version`, `cluster_name` |
 | `ldap` | `base_dn`, `domain` |
-| `snmp` | `snmp_community`, `sys_descr` |
+| `snmp` | `snmp_community`, `sys_descr`, `snmp_archetype` (picks predefined sysDescr for `water_plant`, `hospital`, etc.) |
 | `mqtt` | `mqtt_version` |
 | `sip` | `sip_server`, `sip_domain` |
 | `k8s` | `k8s_version` |
@@ -470,6 +492,34 @@ See [`test-full.ini`](test-full.ini) — covers all 25 services across 10 role-t

 ---

+## Environment Configuration (.env)
+
+DECNET supports loading configuration from `.env.local` and `.env` files located in the project root. This is useful for securing secrets like the JWT key and configuring default ports without passing flags every time.
+
+An example `.env.example` is provided:
+
+```ini
+# API Options
+DECNET_API_HOST=0.0.0.0
+DECNET_API_PORT=8000
+DECNET_JWT_SECRET=supersecretkey12345
+DECNET_INGEST_LOG_FILE=/var/log/decnet/decnet.log
+
+# Web Dashboard Options
+DECNET_WEB_HOST=0.0.0.0
+DECNET_WEB_PORT=8080
+DECNET_ADMIN_USER=admin
+DECNET_ADMIN_PASSWORD=admin
+
+# Database pool tuning (applies to both SQLite and MySQL)
+DECNET_DB_POOL_SIZE=20       # base pool connections (default: 20)
+DECNET_DB_MAX_OVERFLOW=40    # extra connections under burst (default: 40)
+```
+
+Copy `.env.example` to `.env.local` and modify it to suit your environment.
+
+---
+
 ## Logging

 All attacker interactions are forwarded off the decoy network to an isolated logging sink. The log pipeline lives on a separate internal Docker bridge (`decnet_logs`) that is not reachable from the fake LAN.
@@ -632,6 +682,112 @@ The test suite covers:

 Every new feature requires passing tests before merging.

+### Stress Testing
+
+A [Locust](https://locust.io)-based stress test suite lives in `tests/stress/`. It hammers every API endpoint with realistic traffic patterns to find throughput ceilings and latency degradation.
+
+```bash
+# Run via pytest (starts its own server)
+pytest -m stress tests/stress/ -v -x -n0 -s
+
+# Crank it up
+STRESS_USERS=2000 STRESS_SPAWN_RATE=200 STRESS_DURATION=120 pytest -m stress tests/stress/ -v -x -n0 -s
+
+# Standalone Locust web UI against a running server
+locust -f tests/stress/locustfile.py --host http://localhost:8000
+```
+
+| Env var | Default | Description |
+|---|---|---|
+| `STRESS_USERS` | `500` | Total simulated users |
+| `STRESS_SPAWN_RATE` | `50` | Users spawned per second |
+| `STRESS_DURATION` | `60` | Test duration in seconds |
+| `STRESS_WORKERS` | CPU count (max 4) | Uvicorn workers for the test server |
+| `STRESS_MIN_RPS` | `500` | Minimum RPS to pass baseline test |
+| `STRESS_MAX_P99_MS` | `200` | Maximum p99 latency (ms) to pass |
+| `STRESS_SPIKE_USERS` | `1000` | Users for thundering herd test |
+| `STRESS_SUSTAINED_USERS` | `200` | Users for sustained load test |
+
+#### Measured baseline
+
+Reference numbers from recent Locust runs against a MySQL backend
+(asyncmy driver). All runs hold zero failures throughout.
+
+**Single worker** (unless noted):
+
+| Metric | 500u, tracing on | 1500u, tracing on | 1500u, tracing **off** | 1500u, tracing off, **pinned to 1 core** | 1500u, tracing off, **12 workers** |
+|---|---|---|---|---|---|
+| Requests served | 396,672 | 232,648 | 277,214 | 3,532 | 308,024 |
+| Failures | 0 | 0 | 0 | 0 | 0 |
+| Throughput (current RPS) | ~960 | ~880 | ~990 | ~46 | ~1,585 |
+| Average latency | 465 ms | 1,774 ms | 1,489 ms | 21.7 s | 930 ms |
+| Median (p50) | 100 ms | 690 ms | 340 ms | 270 ms | 700 ms |
+| p95 | 1.9 s | 6.5 s | 5.7 s | 115 s | 2.7 s |
+| p99 | 2.9 s | 9.5 s | 8.4 s | 122 s | 4.2 s |
+| Max observed | 8.3 s | 24.4 s | 20.9 s | 124.5 s | 16.5 s |
+
+Ramp is 15 users/s for the 500u column, 40 users/s otherwise.
+
+Takeaways:
+
+- **Tracing off**: at 1500 users, flipping `DECNET_TRACING=false`
+  halves p50 (690 → 340 ms) and pushes RPS from ~880 past the
+  500-user figure on a single worker.
+- **12 workers**: RPS scales ~1.6× over a single worker (~990 →
+  ~1585). Sublinear because the workload is DB-bound — MySQL and the
+  connection pool become the new ceiling, not Python. p99 drops from
+  8.4 s to 4.2 s.
+- **Connection math**: `DECNET_DB_POOL_SIZE=20` × `DECNET_DB_MAX_OVERFLOW=40`
+  × 12 workers = 720 connections at peak. MySQL's default
+  `max_connections=151` needs bumping (we used 2000) before running
+  multi-worker load.
+- **Single-core pinning**: ~46 RPS with p95 near two minutes. Interesting
+  as a "physics floor" datapoint — not a production config.
+
+Top endpoints by volume: `/api/v1/attackers`, `/api/v1/deckies`,
+`/api/v1/bounty`, `/api/v1/logs/histogram`, `/api/v1/config`,
+`/api/v1/health`, `/api/v1/auth/login`, `/api/v1/logs`.
+
+Notes on tuning:
+
+- **Python 3.14 is currently a no-go for the API server.** Under heavy
+  concurrent async load the reworked 3.14 GC segfaults inside
+  `mark_all_reachable` (observed in `_PyGC_Collect` during pending-GC
+  on 3.14.3). Stick to Python 3.11–3.13 until upstream stabilises.
+- Router-level TTL caches on hot count/stats endpoints (`/stats`,
+  `/logs` count, `/attackers` count, `/bounty`, `/logs/histogram`,
+  `/deckies`, `/config`) collapse concurrent duplicate work onto a
+  single DB hit per window — essential to reach this RPS on one worker.
+- Turning off request tracing (`DECNET_TRACING=false`) is the next
+  free headroom: tracing was still on during the run above.
+- On SQLite, `DECNET_DB_POOL_PRE_PING=false` skips the per-checkout
+  `SELECT 1`. On MySQL, keep it `true` — network disconnects are real.
+
+#### System tuning: open file limit
+
+Under heavy load (500+ concurrent users), the server will exhaust the default Linux open file limit (`ulimit -n`), causing `OSError: [Errno 24] Too many open files`. Most distros default to **1024**, which is far too low for stress testing or production use.
+
+**Before running stress tests:**
+
+```bash
+# Check current limit
+ulimit -n
+
+# Bump for this shell session
+ulimit -n 65536
+```
+
+**Permanent fix** — add to `/etc/security/limits.conf`:
+
+```
+*  soft  nofile  65536
+*  hard  nofile  65536
+```
+
+Or for systemd-managed services, add `LimitNOFILE=65536` to the unit file.
+
+> This applies to production deployments too — any server handling hundreds of concurrent connections needs a raised file descriptor limit.
+
 # AI Disclosure

 This project has been made with lots, and I mean lots of help from AIs. While most of the design was made by me, most of the coding was done by AI models.
--- a/decnet.ini.example
+++ b/decnet.ini.example
@@ -0,0 +1,64 @@
+; /etc/decnet/decnet.ini — DECNET host configuration
+;
+; Copy to /etc/decnet/decnet.ini and edit. Values here seed os.environ at
+; CLI startup via setdefault() — real env vars still win, so you can
+; override any value on the shell without editing this file.
+;
+; A missing file is fine; every daemon has sensible defaults. The main
+; reason to use this file is to skip typing the same flags on every
+; `decnet` invocation and to pin a host's role via `mode`.
+
+[decnet]
+; mode = agent | master
+;   agent  — worker host (runs `decnet agent`, `decnet forwarder`, `decnet updater`).
+;            Master-only commands (api, swarmctl, swarm, deploy, teardown, ...)
+;            are hidden from `decnet --help` and refuse to run.
+;   master — central server (runs `decnet api`, `decnet web`, `decnet swarmctl`,
+;            `decnet listener`). All commands visible.
+mode = agent
+
+; disallow-master = true (default when mode=agent)
+; Set to false for hybrid dev hosts that legitimately run both roles.
+disallow-master = true
+
+; log-directory — root for DECNET's per-component logs. Systemd units set
+; DECNET_SYSTEM_LOGS=<log-directory>/decnet.<component>.log so agent, forwarder,
+; and engine each get their own file. The forwarder tails decnet.log.
+log-directory = /var/log/decnet
+
+
+; ─── Agent-only settings (read when mode=agent) ───────────────────────────
+[agent]
+; Where the master's syslog-TLS listener lives. DECNET_SWARM_MASTER_HOST.
+master-host = 192.168.1.50
+; Master listener port (RFC 5425 default 6514). DECNET_SWARM_SYSLOG_PORT.
+swarm-syslog-port = 6514
+; Bind address/port for this worker's agent API (mTLS).
+agent-port = 8765
+; Cert bundle dir — must contain ca.crt, worker.crt, worker.key from enroll.
+; DECNET_AGENT_DIR — honored by the forwarder child as well.
+agent-dir = /home/anti/.decnet/agent
+; Updater cert bundle (required for `decnet updater`).
+updater-dir = /home/anti/.decnet/updater
+
+
+; ─── Master-only settings (read when mode=master) ─────────────────────────
+[master]
+; Main API (REST for the React dashboard). DECNET_API_HOST / _PORT.
+api-host = 0.0.0.0
+api-port = 8000
+; React dev-server dashboard (`decnet web`). DECNET_WEB_HOST / _PORT.
+web-host = 0.0.0.0
+web-port = 8080
+; Swarm controller (master-internal). DECNET_SWARMCTL_HOST isn't exposed
+; under that name today — this block is the forward-compatible spelling.
+; swarmctl-host = 127.0.0.1
+; swarmctl-port = 8770
+; Syslog-over-TLS listener bind address and port. DECNET_LISTENER_HOST and
+; DECNET_SWARM_SYSLOG_PORT. The listener is auto-spawned by `decnet swarmctl`.
+listener-host = 0.0.0.0
+swarm-syslog-port = 6514
+; Master CA dir (for enroll / swarm cert issuance).
+; ca-dir = /home/anti/.decnet/ca
+; JWT secret for the web API. MUST be set; 32+ bytes. Keep out of git.
+; jwt-secret = REPLACE_ME_WITH_A_32_BYTE_SECRET
--- a/decnet.log
+++ b/decnet.log
@@ -1,159 +0,0 @@
-<134>1 2026-04-04T07:40:53.045660+00:00 decky-devops k8s - startup - Kubernetes API server starting as decky-devops
-<134>1 2026-04-04T07:40:53.058000+00:00 decky-devops docker_api - startup - Docker API server starting as decky-devops
-<134>1 2026-04-04T07:40:53.147349+00:00 decky-legacy vnc - startup - VNC server starting as decky-legacy
-<134>1 2026-04-04T07:40:53.224094+00:00 decky-fileserv tftp - startup - TFTP server starting as decky-fileserv
-<134>1 2026-04-04T07:40:53.231313+00:00 decky-fileserv ftp - startup - FTP server starting as decky-fileserv on port 21
-<134>1 2026-04-04T07:40:53.237175+00:00 decky-fileserv smb - startup - SMB server starting as decky-fileserv
-<134>1 2026-04-04T07:40:53.331998+00:00 decky-webmail imap - startup - IMAP server starting as decky-webmail
-<134>1 2026-04-04T07:40:53.441710+00:00 decky-webmail http - startup - HTTP server starting as decky-webmail
-<134>1 2026-04-04T07:40:53.482287+00:00 decky-webmail smtp - startup - SMTP server starting as decky-webmail
-<134>1 2026-04-04T07:40:53.487752+00:00 decky-webmail pop3 - startup - POP3 server starting as decky-webmail
-<134>1 2026-04-04T07:40:53.493478+00:00 decky-iot mqtt - startup - MQTT server starting as decky-iot
-<134>1 2026-04-04T07:40:53.519136+00:00 decky-iot snmp - startup - SNMP server starting as decky-iot
-<134>1 2026-04-04T07:40:53.586186+00:00 decky-voip sip - startup - SIP server starting as decky-voip
-<134>1 2026-04-04T07:40:53.734237+00:00 decky-dbsrv02 postgres - startup - PostgreSQL server starting as decky-dbsrv02
-<134>1 2026-04-04T07:40:53.746573+00:00 decky-voip llmnr - startup - LLMNR/mDNS server starting as decky-voip
-<134>1 2026-04-04T07:40:53.792767+00:00 decky-dbsrv02 elasticsearch - startup - Elasticsearch server starting as decky-dbsrv02
-<134>1 2026-04-04T07:40:53.817558+00:00 decky-dbsrv02 mongodb - startup - MongoDB server starting as decky-dbsrv02
-<134>1 2026-04-04T07:40:53.848912+00:00 decky-ldapdc ldap - startup - LDAP server starting as decky-ldapdc
-<134>1 2026-04-04T07:40:53.860378+00:00 decky-winbox rdp - startup - RDP server starting as decky-winbox on port 3389
-<134>1 2026-04-04T07:40:53.911084+00:00 decky-winbox mssql - startup - MSSQL server starting as decky-winbox
-<134>1 2026-04-04T07:40:53.978994+00:00 decky-winbox smb - startup - SMB server starting as decky-winbox
-<134>1 2026-04-04T07:41:07.439918+00:00 decky-webmail pop3 - connect [decnet@55555 src="192.168.1.5" src_port="46462"]
-<134>1 2026-04-04T07:41:07.439922+00:00 decky-webmail imap - connect [decnet@55555 src="192.168.1.5" src_port="54734"]
-<134>1 2026-04-04T07:41:07.439868+00:00 decky-webmail smtp - connect [decnet@55555 src="192.168.1.5" src_port="54606"]
-<134>1 2026-04-04T07:41:07.440333+00:00 decky-fileserv ftp - connection [decnet@55555 src_ip="192.168.1.5" src_port="39736"]
-<134>1 2026-04-04T07:41:07.442465+00:00 decky-webmail smtp - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:13.446744+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="GET / HTTP/1.0"]
-<134>1 2026-04-04T07:41:13.446743+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd=""]
-<134>1 2026-04-04T07:41:13.447251+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd=""]
-<134>1 2026-04-04T07:41:13.446995+00:00 decky-webmail http - request [decnet@55555 method="GET" path="/" remote_addr="192.168.1.5" headers="{}" body=""]
-<134>1 2026-04-04T07:41:13.447556+00:00 decky-fileserv ftp - disconnect [decnet@55555 src_ip="192.168.1.5" src_port="39736"]
-<134>1 2026-04-04T07:41:18.451412+00:00 decky-webmail imap - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:18.451529+00:00 decky-webmail pop3 - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:18.451729+00:00 decky-webmail imap - connect [decnet@55555 src="192.168.1.5" src_port="55996"]
-<134>1 2026-04-04T07:41:18.451746+00:00 decky-webmail pop3 - connect [decnet@55555 src="192.168.1.5" src_port="36592"]
-<134>1 2026-04-04T07:41:18.451844+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd="OPTIONS / HTTP/1.0"]
-<134>1 2026-04-04T07:41:18.451928+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd=""]
-<134>1 2026-04-04T07:41:23.456442+00:00 decky-webmail pop3 - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:23.456408+00:00 decky-webmail imap - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:24.734697+00:00 decky-webmail pop3 - connect [decnet@55555 src="192.168.1.5" src_port="36604"]
-<134>1 2026-04-04T07:41:24.736542+00:00 decky-webmail pop3 - connect [decnet@55555 src="192.168.1.5" src_port="36606"]
-<134>1 2026-04-04T07:41:24.737069+00:00 decky-webmail smtp - connect [decnet@55555 src="192.168.1.5" src_port="56204"]
-<134>1 2026-04-04T07:41:24.737449+00:00 decky-fileserv ftp - connection [decnet@55555 src_ip="192.168.1.5" src_port="48992"]
-<134>1 2026-04-04T07:41:24.737834+00:00 decky-fileserv ftp - connection [decnet@55555 src_ip="192.168.1.5" src_port="48994"]
-<134>1 2026-04-04T07:41:24.738282+00:00 decky-fileserv ftp - connection [decnet@55555 src_ip="192.168.1.5" src_port="49002"]
-<134>1 2026-04-04T07:41:24.738760+00:00 decky-fileserv ftp - connection [decnet@55555 src_ip="192.168.1.5" src_port="49004"]
-<134>1 2026-04-04T07:41:24.739240+00:00 decky-webmail pop3 - connect [decnet@55555 src="192.168.1.5" src_port="36622"]
-<134>1 2026-04-04T07:41:24.741300+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd="STLS"]
-<134>1 2026-04-04T07:41:24.741346+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd="STLS"]
-<134>1 2026-04-04T07:41:24.741319+00:00 decky-webmail smtp - ehlo [decnet@55555 src="192.168.1.5" domain="nmap.scanme.org"]
-<134>1 2026-04-04T07:41:24.741391+00:00 decky-fileserv ftp - user [decnet@55555 username="anonymous"]
-<134>1 2026-04-04T07:41:24.741474+00:00 decky-fileserv ftp - user [decnet@55555 username="anonymous"]
-<134>1 2026-04-04T07:41:24.741374+00:00 decky-webmail http - request [decnet@55555 method="GET" path="/nmaplowercheck1775288484" remote_addr="192.168.1.5" headers="{'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:24.741566+00:00 decky-webmail http - request [decnet@55555 method="GET" path="/.git/HEAD" remote_addr="192.168.1.5" headers="{'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:24.741988+00:00 decky-webmail http - request [decnet@55555 method="OPTIONS" path="/" remote_addr="192.168.1.5" headers="{'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:24.742327+00:00 decky-webmail http - request [decnet@55555 method="PROPFIND" path="/" remote_addr="192.168.1.5" headers="{'Depth': '0', 'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:24.742608+00:00 decky-webmail http - request [decnet@55555 method="POST" path="/" remote_addr="192.168.1.5" headers="{'Content-Length': '88', 'Connection': 'close', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Content-Type': 'application/x-www-form-urlencoded', 'Host': '192.168.1.110'}" body="<methodCall> <methodName>system.listMethods</methodName> <params></params> </methodCall>"]
-<134>1 2026-04-04T07:41:24.742807+00:00 decky-webmail http - request [decnet@55555 method="GET" path="/" remote_addr="192.168.1.5" headers="{'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:24.741701+00:00 decky-webmail http - request [decnet@55555 method="GET" path="/" remote_addr="192.168.1.5" headers="{}" body=""]
-<134>1 2026-04-04T07:41:24.742699+00:00 decky-webmail http - request [decnet@55555 method="OPTIONS" path="/" remote_addr="192.168.1.5" headers="{'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:24.742135+00:00 decky-webmail http - request [decnet@55555 method="POST" path="/sdk" remote_addr="192.168.1.5" headers="{'Content-Length': '441', 'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body="<soap:Envelope xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:soap=\"http://schemas.xmlsoap.org/soap/envelope/\"><soap:Header><operationID>00000001-00000001</operationID></soap:Header><soap:Body><RetrieveServiceContent xmlns=\"urn:internalvim25\"><_this xsi:type=\"ManagedObjectReference\" type=\"ServiceInstance\">ServiceInstance</_this></RetrieveServiceContent></soap:Body></soap:Envelope>"]
-<134>1 2026-04-04T07:41:24.742460+00:00 decky-webmail http - request [decnet@55555 method="OPTIONS" path="/" remote_addr="192.168.1.5" headers="{'Connection': 'close', 'Origin': 'example.com', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Access-Control-Request-Method': 'HEAD', 'Host': '192.168.1.110'}" body=""]
-<134>1 2026-04-04T07:41:24.745408+00:00 decky-webmail pop3 - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:24.745793+00:00 decky-webmail pop3 - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:24.745837+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd="AUTH NTLM"]
-<134>1 2026-04-04T07:41:24.745797+00:00 decky-fileserv ftp - user [decnet@55555 username="anonymous"]
-<134>1 2026-04-04T07:41:24.745960+00:00 decky-fileserv ftp - auth_attempt [decnet@55555 username="anonymous" password="IEUser@"]
-<134>1 2026-04-04T07:41:24.745842+00:00 decky-webmail http - request [decnet@55555 method="FGDH" path="/" remote_addr="192.168.1.5" headers="{'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:24.746083+00:00 decky-webmail smtp - connect [decnet@55555 src="192.168.1.5" src_port="56216"]
-<134>1 2026-04-04T07:41:24.746041+00:00 decky-webmail imap - connect [decnet@55555 src="192.168.1.5" src_port="56008"]
-<134>1 2026-04-04T07:41:24.745961+00:00 decky-webmail http - request [decnet@55555 method="OPTIONS" path="/" remote_addr="192.168.1.5" headers="{'Connection': 'close', 'Origin': 'example.com', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Access-Control-Request-Method': 'GET', 'Host': '192.168.1.110'}" body=""]
-<134>1 2026-04-04T07:41:24.746514+00:00 decky-fileserv ftp - auth_attempt [decnet@55555 username="anonymous" password="IEUser@"]
-<134>1 2026-04-04T07:41:24.746245+00:00 decky-webmail http - request [decnet@55555 method="GET" path="/NmapUpperCheck1775288484" remote_addr="192.168.1.5" headers="{'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:24.746723+00:00 decky-fileserv ftp - disconnect [decnet@55555 src_ip="192.168.1.5" src_port="48994"]
-<134>1 2026-04-04T07:41:24.746073+00:00 decky-webmail http - request [decnet@55555 method="PROPFIND" path="/" remote_addr="192.168.1.5" headers="{'Content-Length': '0', 'Connection': 'close', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Host': '192.168.1.110', 'Depth': '1'}" body=""]
-<134>1 2026-04-04T07:41:24.795603+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd="TlRMTVNTUAABAAAAB4IIoAAAAAAAAAAAAAAAAAAAAAA="]
-<134>1 2026-04-04T07:41:24.795629+00:00 decky-webmail smtp - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:24.795621+00:00 decky-webmail imap - connect [decnet@55555 src="192.168.1.5" src_port="56016"]
-<134>1 2026-04-04T07:41:24.795604+00:00 decky-fileserv ftp - auth_attempt [decnet@55555 username="anonymous" password="IEUser@"]
-<134>1 2026-04-04T07:41:24.795738+00:00 decky-webmail http - request [decnet@55555 method="GET" path="/" remote_addr="192.168.1.5" headers="{'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:24.795928+00:00 decky-webmail http - request [decnet@55555 method="GET" path="/robots.txt" remote_addr="192.168.1.5" headers="{'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:24.796118+00:00 decky-webmail http - request [decnet@55555 method="PROPFIND" path="/" remote_addr="192.168.1.5" headers="{'Depth': '0', 'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:24.845180+00:00 decky-webmail smtp - connect [decnet@55555 src="192.168.1.5" src_port="56226"]
-<134>1 2026-04-04T07:41:24.845355+00:00 decky-webmail smtp - ehlo [decnet@55555 src="192.168.1.5" domain="nmap.scanme.org"]
-<134>1 2026-04-04T07:41:24.845379+00:00 decky-webmail http - request [decnet@55555 method="OPTIONS" path="/" remote_addr="192.168.1.5" headers="{'Connection': 'close', 'Origin': 'example.com', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Access-Control-Request-Method': 'POST', 'Host': '192.168.1.110'}" body=""]
-<134>1 2026-04-04T07:41:24.894554+00:00 decky-webmail pop3 - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:24.894871+00:00 decky-webmail http - request [decnet@55555 method="GET" path="/Nmap/folder/check1775288484" remote_addr="192.168.1.5" headers="{'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:24.895133+00:00 decky-webmail http - request [decnet@55555 method="POST" path="/" remote_addr="192.168.1.5" headers="{'Content-Length': '0', 'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:24.944224+00:00 decky-webmail smtp - ehlo [decnet@55555 src="192.168.1.5" domain="nmap.scanme.org"]
-<134>1 2026-04-04T07:41:24.944215+00:00 decky-webmail imap - connect [decnet@55555 src="192.168.1.5" src_port="56032"]
-<134>1 2026-04-04T07:41:24.944346+00:00 decky-webmail smtp - unknown_command [decnet@55555 src="192.168.1.5" command="HELP"]
-<134>1 2026-04-04T07:41:24.994175+00:00 decky-webmail imap - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:24.994238+00:00 decky-webmail smtp - connect [decnet@55555 src="192.168.1.5" src_port="56234"]
-<134>1 2026-04-04T07:41:24.994534+00:00 decky-webmail http - request [decnet@55555 method="OPTIONS" path="/" remote_addr="192.168.1.5" headers="{'Connection': 'close', 'Origin': 'example.com', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Access-Control-Request-Method': 'PUT', 'Host': '192.168.1.110'}" body=""]
-<134>1 2026-04-04T07:41:25.044450+00:00 decky-webmail smtp - auth_attempt [decnet@55555 src="192.168.1.5" command="AUTH NTLM"]
-<134>1 2026-04-04T07:41:25.044450+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="000b AUTHENTICATE NTLM"]
-<134>1 2026-04-04T07:41:25.044580+00:00 decky-webmail smtp - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:25.044674+00:00 decky-webmail smtp - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:25.093812+00:00 decky-webmail smtp - ehlo [decnet@55555 src="192.168.1.5" domain="nmap.scanme.org"]
-<134>1 2026-04-04T07:41:25.094022+00:00 decky-webmail http - request [decnet@55555 method="GET" path="/favicon.ico" remote_addr="192.168.1.5" headers="{'Host': '192.168.1.110', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Connection': 'close'}" body=""]
-<134>1 2026-04-04T07:41:25.142989+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="TlRMTVNTUAABAAAAB4IIoAAAAAAAAAAAAAAAAAAAAAA="]
-<134>1 2026-04-04T07:41:25.143126+00:00 decky-webmail http - request [decnet@55555 method="OPTIONS" path="/" remote_addr="192.168.1.5" headers="{'Connection': 'close', 'Origin': 'example.com', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Access-Control-Request-Method': 'DELETE', 'Host': '192.168.1.110'}" body=""]
-<134>1 2026-04-04T07:41:25.241565+00:00 decky-webmail imap - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:25.241690+00:00 decky-webmail imap - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:25.290930+00:00 decky-webmail smtp - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:25.291070+00:00 decky-webmail http - request [decnet@55555 method="OPTIONS" path="/" remote_addr="192.168.1.5" headers="{'Connection': 'close', 'Origin': 'example.com', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Access-Control-Request-Method': 'TRACE', 'Host': '192.168.1.110'}" body=""]
-<134>1 2026-04-04T07:41:25.438930+00:00 decky-webmail http - request [decnet@55555 method="OPTIONS" path="/" remote_addr="192.168.1.5" headers="{'Connection': 'close', 'Origin': 'example.com', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Access-Control-Request-Method': 'OPTIONS', 'Host': '192.168.1.110'}" body=""]
-<134>1 2026-04-04T07:41:25.586609+00:00 decky-webmail http - request [decnet@55555 method="OPTIONS" path="/" remote_addr="192.168.1.5" headers="{'Connection': 'close', 'Origin': 'example.com', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Access-Control-Request-Method': 'CONNECT', 'Host': '192.168.1.110'}" body=""]
-<134>1 2026-04-04T07:41:25.734144+00:00 decky-webmail http - request [decnet@55555 method="OPTIONS" path="/" remote_addr="192.168.1.5" headers="{'Connection': 'close', 'Origin': 'example.com', 'User-Agent': 'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)', 'Access-Control-Request-Method': 'PATCH', 'Host': '192.168.1.110'}" body=""]
-<134>1 2026-04-04T07:41:29.778527+00:00 decky-fileserv ftp - disconnect [decnet@55555 src_ip="192.168.1.5" src_port="49004"]
-<134>1 2026-04-04T07:41:31.976898+00:00 decky-fileserv ftp - disconnect [decnet@55555 src_ip="192.168.1.5" src_port="48992"]
-<134>1 2026-04-04T07:41:33.746244+00:00 decky-fileserv ftp - disconnect [decnet@55555 src_ip="192.168.1.5" src_port="49002"]
-<134>1 2026-04-04T07:41:33.747544+00:00 decky-webmail imap - connect [decnet@55555 src="192.168.1.5" src_port="39972"]
-<134>1 2026-04-04T07:41:33.748339+00:00 decky-webmail http - request [decnet@55555 method="GET" path="/" remote_addr="192.168.1.5" headers="{}" body=""]
-<134>1 2026-04-04T07:41:33.748742+00:00 decky-webmail imap - connect [decnet@55555 src="192.168.1.5" src_port="39984"]
-<134>1 2026-04-04T07:41:33.748916+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="($<03>i<EFBFBD><69>jÁ{Bк<>F<EFBFBD><46><02><>(ri[;z	<09>s~_?<3F> <20>+Ō,7n/.<0F><><14>P<EFBFBD>PO<50><4F>3=<3D>\\<5C>0RS<19>r395/<2F>,<2C>0<00>̨̩̪<CCA8><CCAA><EFBFBD><EFBFBD><EFBFBD>\]<5D>a<EFBFBD>S<EFBFBD>+<2B>/<00><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>\\<5C>`<60>R<EFBFBD>$"]
-<134>1 2026-04-04T07:41:33.748959+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="<22><00><>	<09>E<00><><EFBFBD><EFBFBD>Q<00><><EFBFBD><EFBFBD>P=<00><<00><00>Ai<>"]
-<134>1 2026-04-04T07:41:33.748983+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="<11><11><11>#
-(&				"]
-<134>1 2026-04-04T07:41:33.749009+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="+-3<04><04><11><04>aq<61>څv<DA85>+DS[\\<5C><><EFBFBD>c-'4R<34>(<28><>a<EFBFBD>J<08><>L<>2^7<><37>luѡ<75><D1A1>v<EFBFBD>^<05>g%Y<><59><EFBFBD><EFBFBD>Sx<53>r<EFBFBD>-jR<12><>C#b<><62><EFBFBD>r<EFBFBD><72>"]
-<134>1 2026-04-04T07:41:33.749035+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="<22>i<EFBFBD><69><EFBFBD>TLػ<4C><13>A<EFBFBD>1<EFBFBD>s<EFBFBD><73>'"]
-<134>1 2026-04-04T07:41:33.749060+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="<22>4,<2C><><0C>
-<EFBFBD>G<1B><>q<EFBFBD>B仠<42><01>K7O<37>Y<EFBFBD>rq<><71><EFBFBD>3VtzD<7A><44>̨"]
-<134>1 2026-04-04T07:41:33.749041+00:00 decky-webmail http - request [decnet@55555 method="GET" path="/" remote_addr="192.168.1.5" headers="{'Host': '192.168.1.110'}" body=""]
-<134>1 2026-04-04T07:41:33.749083+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="Ѓu<0F>Y<EFBFBD><59><EFBFBD><EFBFBD>-<2D>\"<22><>eSp*Zֹ	L<><4C>{	<09>#<23>:<3A><><EFBFBD><EFBFBD>9!ɂCm<43>I<EFBFBD>$ݦ1ϻo-H<><48><EFBFBD>*<2A><17>X<EFBFBD><58>{<7B><><EFBFBD><EFBFBD>p<EFBFBD>ޚ|W<><57>ƫf<16><>T<EFBFBD>%<25>F5<46>8<EFBFBD><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>WU<57>a<EFBFBD><61>c	><3E><>u\]<5D><>i~<7E>V<EFBFBD><56><EFBFBD>&<26>z"]
-<134>1 2026-04-04T07:41:33.749104+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="<22>1<16>\\<5C><>Wc<57>C<EFBFBD><1B><>v˺<76>6z<36> <20><>0<EFBFBD>$iS<69>3'<27>8<<3C>"]
-<134>1 2026-04-04T07:41:33.749122+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="<10><>2<EFBFBD><32>"]
-<134>1 2026-04-04T07:41:33.749138+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="<22><>\"/<2F><0B>E<08><><EFBFBD>tv!"]
-<134>1 2026-04-04T07:41:33.749160+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="񋞸<>)<29><>[j}<7D>`<60><>\\V|k<><6B>ԣy<D4A3>Y<EFBFBD><59>?<05>2<EFBFBD>`<17>w¬ܶ#<23>X}<7D><>[cg3<67>W8E<38>tl<74>y<<3C>Z<1B>ʇ<EFBFBD><CA87><EFBFBD>%dQBk9=+<2B><07>ȳ<EFBFBD><16><>(<28>y<EFBFBD><79><EFBFBD><EFBFBD>*[8<><38><EFBFBD>qyN`<60><><EFBFBD>5>j<><6A>	825<13>f<EFBFBD><66>2.	s\\dLar"]
-<134>1 2026-04-04T07:41:33.749238+00:00 decky-webmail imap - connect [decnet@55555 src="192.168.1.5" src_port="39996"]
-<134>1 2026-04-04T07:41:33.749290+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="WSi<><69><EFBFBD>,g<>O<EFBFBD>(T<>YC<59><01>ѢO<D1A2>Ę<EFBFBD><16><><EFBFBD><EFBFBD>"]
-<134>1 2026-04-04T07:41:33.749328+00:00 decky-webmail imap - command [decnet@55555 src="192.168.1.5" cmd="/"]
-<134>1 2026-04-04T07:41:33.749369+00:00 decky-webmail imap - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:33.749411+00:00 decky-webmail imap - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:33.749441+00:00 decky-webmail imap - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:33.749484+00:00 decky-webmail smtp - connect [decnet@55555 src="192.168.1.5" src_port="47822"]
-<134>1 2026-04-04T07:41:33.749708+00:00 decky-webmail smtp - ehlo [decnet@55555 src="192.168.1.5" domain="nmap.scanme.org"]
-<134>1 2026-04-04T07:41:33.749852+00:00 decky-webmail smtp - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:33.749936+00:00 decky-webmail smtp - connect [decnet@55555 src="192.168.1.5" src_port="47834"]
-<134>1 2026-04-04T07:41:33.750118+00:00 decky-webmail smtp - connect [decnet@55555 src="192.168.1.5" src_port="47846"]
-<134>1 2026-04-04T07:41:33.750202+00:00 decky-webmail smtp - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:33.750261+00:00 decky-webmail smtp - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:33.750423+00:00 decky-webmail pop3 - connect [decnet@55555 src="192.168.1.5" src_port="48678"]
-<134>1 2026-04-04T07:41:33.750684+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd="STLS"]
-<134>1 2026-04-04T07:41:33.750772+00:00 decky-webmail pop3 - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:33.750852+00:00 decky-webmail pop3 - connect [decnet@55555 src="192.168.1.5" src_port="48684"]
-<134>1 2026-04-04T07:41:33.750920+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd="($h_\\n<>W<EFBFBD>f	6~<10><><EFBFBD>'U<><55>ԥ\"{<7B><><EFBFBD>jg<04> <20>*M<>$<24><><EFBFBD>at}5gq<67><71>)<29>X<7F>w<EFBFBD>7<EFBFBD><37>_<>r395/<2F>,<2C>0<00>̨̩̪<CCA8><CCAA><EFBFBD><EFBFBD><EFBFBD>\]<5D>a<EFBFBD>S<EFBFBD>+<2B>/<00><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>\\<5C>`<60>R<EFBFBD>"]
-<134>1 2026-04-04T07:41:33.750964+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd="<22><00><>	<09>E<00><><EFBFBD><EFBFBD>Q<00><><EFBFBD><EFBFBD>P=<00><<00><00>Ai<>"]
-<134>1 2026-04-04T07:41:33.750997+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd="<11><11><11>#
-(&				"]
-<134>1 2026-04-04T07:41:33.751027+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd="+-3<04><04><11><04><>pEt<45>\"g3<67>Ff`c<>FY4<59>2<EFBFBD>$3<>t<EFBFBD><74>Q<EFBFBD>QKR/ <20>+5<><35><EFBFBD><EFBFBD>q
-<EFBFBD>&<26>@<40><><EFBFBD><EFBFBD><07><><1F>B<EFBFBD><42>(?<3F>3<EFBFBD>R/
-<EFBFBD>3<EFBFBD>qr<EFBFBD>! <20>"]
-<134>1 2026-04-04T07:41:33.751096+00:00 decky-webmail pop3 - connect [decnet@55555 src="192.168.1.5" src_port="48698"]
-<134>1 2026-04-04T07:41:33.751153+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd="WSi<><69><EFBFBD>{<7B><><EFBFBD>5<EFBFBD><35>5т<01>R<EFBFBD><52>!<21>;jj
-<EFBFBD>7ވ<EFBFBD><EFBFBD> "]
-<134>1 2026-04-04T07:41:33.751197+00:00 decky-webmail pop3 - command [decnet@55555 src="192.168.1.5" cmd="/"]
-<134>1 2026-04-04T07:41:33.751245+00:00 decky-webmail pop3 - disconnect [decnet@55555 src="192.168.1.5"]
-<134>1 2026-04-04T07:41:33.751285+00:00 decky-webmail pop3 - disconnect [decnet@55555 src="192.168.1.5"]
--- a/decnet/init.py
+++ b/decnet/init.py
@@ -0,0 +1,12 @@
+"""DECNET — honeypot deception-network framework.
+
+This __init__ runs once, on the first `import decnet.*`. It seeds
+os.environ from /etc/decnet/decnet.ini (if present) so that later
+module-level reads in decnet.env pick up the INI values as if they had
+been exported by the shell. Real env vars always win via setdefault().
+
+Kept minimal on purpose — any heavier work belongs in a submodule.
+"""
+from decnet.config_ini import load_ini_config as _load_ini_config
+
+_load_ini_config()
--- a/decnet/agent/init.py
+++ b/decnet/agent/init.py
@@ -0,0 +1,7 @@
+"""DECNET worker agent — runs on every SWARM worker host.
+
+Exposes an mTLS-protected FastAPI service the master's SWARM controller
+calls to deploy, mutate, and tear down deckies locally.  The agent reuses
+the existing `decnet.engine.deployer` code path unchanged, so a worker runs
+deckies the same way `decnet deploy --mode unihost` does today.
+"""
--- a/decnet/agent/app.py
+++ b/decnet/agent/app.py
@@ -0,0 +1,320 @@
+"""Worker-side FastAPI app.
+
+Protected by mTLS at the ASGI/uvicorn transport layer: uvicorn is started
+with ``--ssl-ca-certs`` + ``--ssl-cert-reqs 2`` (CERT_REQUIRED), so any
+client that cannot prove a cert signed by the DECNET CA is rejected before
+reaching a handler.  Once past the TLS handshake, all peers are trusted
+equally (the only entity holding a CA-signed cert is the master
+controller).
+
+Endpoints mirror the existing unihost CLI verbs:
+
+* ``POST /deploy``   — body: serialized ``DecnetConfig``
+* ``POST /teardown`` — body: optional ``{"decky_id": "..."}``
+* ``POST /mutate``   — body: ``{"decky_id": "...", "services": [...]}``
+* ``GET  /status``   — deployment snapshot
+* ``GET  /health``   — liveness probe, does NOT require mTLS? No — mTLS
+  still required; master pings it with its cert.
+"""
+from __future__ import annotations
+
+import asyncio
+import os
+import pathlib
+from contextlib import asynccontextmanager
+from typing import Any, Optional
+
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+
+import contextlib
+
+from decnet.agent import executor as _exec
+from decnet.agent import heartbeat as _heartbeat
+from decnet.agent import topology_ops as _topology_ops
+from decnet.bus.factory import get_bus
+from decnet.bus.publish import run_health_heartbeat
+from decnet.swarm.pki import DEFAULT_AGENT_DIR
+from decnet.agent.topology_store import AlreadyApplied, TopologyStore
+from decnet.config import DecnetConfig
+from decnet.logging import get_logger
+from decnet.topology.validate import ValidationError
+
+log = get_logger("agent.app")
+
+
+def _resolve_agent_dir() -> pathlib.Path:
+    env = os.environ.get("DECNET_AGENT_DIR")
+    if env:
+        return pathlib.Path(env)
+    system = pathlib.Path("/etc/decnet/agent")
+    if system.exists():
+        return system
+    return DEFAULT_AGENT_DIR
+
+
+# Module-level singleton.  Created lazily on first use so tests can
+# monkeypatch DECNET_AGENT_DIR before the store binds to a path.
+_topology_store: Optional[TopologyStore] = None
+
+
+def _store() -> TopologyStore:
+    global _topology_store
+    if _topology_store is None:
+        _topology_store = TopologyStore(_resolve_agent_dir() / "topology.db")
+    return _topology_store
+
+
+_collector_task: Optional[asyncio.Task] = None
+
+
+def _ensure_collector_started() -> None:
+    """Spawn the log collector on demand — called from /topology/apply
+    after a successful materialise.  We must NOT start this in the
+    lifespan hook: the agent's boot invariant is "never touch docker
+    until master tells us to" (see tests/swarm/test_agent_no_auto_restore.py).
+
+    The collector watches ``decnet.topology.service=true`` labels via
+    docker events, writing RFC 5424 lines to ``DECNET_AGENT_LOG_FILE``
+    which the forwarder ships to the master over syslog-TLS.  Idempotent:
+    subsequent calls while the task is still running are no-ops.
+    """
+    global _collector_task
+    if _collector_task is not None and not _collector_task.done():
+        return
+    from decnet.env import DECNET_AGENT_LOG_FILE
+
+    try:
+        from decnet.collector.worker import log_collector_worker
+    except Exception:  # noqa: BLE001 — docker may be unavailable on dev
+        log.warning(
+            "agent log collector not starting — collector worker import failed",
+            exc_info=True,
+        )
+        return
+    _collector_task = asyncio.create_task(
+        log_collector_worker(DECNET_AGENT_LOG_FILE),
+        name="agent-log-collector",
+    )
+    log.info("agent log collector started log_file=%s", DECNET_AGENT_LOG_FILE)
+
+
+_bus_heartbeat_task: Optional[asyncio.Task] = None
+
+
+@asynccontextmanager
+async def _lifespan(app: FastAPI):
+    # Best-effort: if identity/bundle plumbing isn't configured (e.g. dev
+    # runs or non-enrolled hosts), heartbeat.start() is a silent no-op.
+    _heartbeat.start()
+
+    # Host-local bus heartbeat (system.agent.health).  Separate channel
+    # from the mTLS master-facing heartbeat above; this one lets peers on
+    # the same host (dashboard, updater) see the agent is alive without
+    # hitting its HTTPS endpoint.  Bus-disabled path is a no-op loop.
+    bus = None
+    try:
+        bus = get_bus(client_name="agent")
+        await bus.connect()
+    except Exception as exc:  # noqa: BLE001
+        log.warning("agent: bus unavailable, skipping health heartbeat: %s", exc)
+        bus = None
+
+    global _bus_heartbeat_task
+    _bus_heartbeat_task = asyncio.create_task(
+        run_health_heartbeat(bus, "agent"),
+        name="agent-bus-heartbeat",
+    )
+
+    try:
+        yield
+    finally:
+        await _heartbeat.stop()
+        if _bus_heartbeat_task is not None:
+            _bus_heartbeat_task.cancel()
+            with contextlib.suppress(asyncio.CancelledError, Exception):
+                await _bus_heartbeat_task
+            _bus_heartbeat_task = None
+        if bus is not None:
+            with contextlib.suppress(Exception):
+                await bus.close()
+        global _collector_task
+        if _collector_task is not None and not _collector_task.done():
+            _collector_task.cancel()
+            try:
+                await _collector_task
+            except (asyncio.CancelledError, Exception):  # noqa: BLE001
+                pass
+        _collector_task = None
+        global _topology_store
+        if _topology_store is not None:
+            _topology_store.close()
+            _topology_store = None
+
+
+app = FastAPI(
+    title="DECNET SWARM Agent",
+    version="0.1.0",
+    docs_url=None,    # no interactive docs on worker — narrow attack surface
+    redoc_url=None,
+    openapi_url=None,
+    lifespan=_lifespan,
+    responses={
+        400: {"description": "Malformed request body"},
+        500: {"description": "Executor error"},
+    },
+)
+
+
+# ------------------------------------------------------------------ schemas
+
+class DeployRequest(BaseModel):
+    config: DecnetConfig = Field(..., description="Full DecnetConfig to materialise on this worker")
+    dry_run: bool = False
+    no_cache: bool = False
+
+
+class TeardownRequest(BaseModel):
+    decky_id: Optional[str] = None
+
+
+class MutateRequest(BaseModel):
+    decky_id: str
+    services: list[str]
+
+
+# ------------------------------------------------------------------ routes
+
+@app.get("/health")
+async def health() -> dict[str, str]:
+    return {"status": "ok"}
+
+
+@app.get("/status")
+async def status() -> dict:
+    return await _exec.status()
+
+
+@app.post(
+    "/deploy",
+    responses={500: {"description": "Deployer raised an exception materialising the config"}},
+)
+async def deploy(req: DeployRequest) -> dict:
+    try:
+        await _exec.deploy(req.config, dry_run=req.dry_run, no_cache=req.no_cache)
+    except Exception as exc:
+        log.exception("agent.deploy failed")
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
+    return {"status": "deployed", "deckies": len(req.config.deckies)}
+
+
+@app.post(
+    "/teardown",
+    responses={500: {"description": "Teardown raised an exception"}},
+)
+async def teardown(req: TeardownRequest) -> dict:
+    try:
+        await _exec.teardown(req.decky_id)
+    except Exception as exc:
+        log.exception("agent.teardown failed")
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
+    return {"status": "torn_down", "decky_id": req.decky_id}
+
+
+@app.post(
+    "/self-destruct",
+    responses={500: {"description": "Reaper could not be scheduled"}},
+)
+async def self_destruct() -> dict:
+    """Stop all DECNET services on this worker and delete the install
+    footprint. Called by the master during decommission. Logs under
+    /var/log/decnet* are preserved. Fire-and-forget — returns 202 before
+    the reaper starts deleting files."""
+    try:
+        await _exec.self_destruct()
+    except Exception as exc:
+        log.exception("agent.self_destruct failed")
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
+    return {"status": "self_destruct_scheduled"}
+
+
+# ------------------------------------------------------- topology endpoints
+
+
+class ApplyTopologyRequest(BaseModel):
+    hydrated: dict[str, Any] = Field(
+        ..., description="Hydrated topology dict from master.persistence.hydrate()"
+    )
+    version_hash: str = Field(
+        ..., description="Master's canonical_hash(hydrated); must match ours"
+    )
+
+
+class TeardownTopologyRequest(BaseModel):
+    topology_id: str = Field(..., description="Topology UUID to dismantle")
+
+
+@app.post(
+    "/topology/apply",
+    responses={
+        400: {"description": "Malformed hydrated topology or hash mismatch"},
+        409: {"description": "A different topology is already applied"},
+        500: {"description": "Docker or compose raised while applying"},
+    },
+)
+async def topology_apply(req: ApplyTopologyRequest) -> dict:
+    store = _store()
+    try:
+        await _topology_ops.apply(req.hydrated, req.version_hash, store)
+    except _topology_ops.HashMismatch as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+    except ValidationError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+    except AlreadyApplied as exc:
+        raise HTTPException(status_code=409, detail=str(exc)) from exc
+    except Exception as exc:
+        log.exception("agent.topology_apply failed")
+        topology_id = (req.hydrated.get("topology") or {}).get("id")
+        if topology_id:
+            try:
+                store.record_error(
+                    str(topology_id), str(exc)[:500], hydrated=req.hydrated,
+                )
+            except Exception:  # noqa: BLE001 — don't mask original failure
+                log.exception("failed to record apply error")
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
+    _ensure_collector_started()
+    return {"status": "applied", "version_hash": req.version_hash}
+
+
+@app.post(
+    "/topology/teardown",
+    responses={500: {"description": "Docker or compose raised while tearing down"}},
+)
+async def topology_teardown(req: TeardownTopologyRequest) -> dict:
+    try:
+        await _topology_ops.teardown(req.topology_id, _store())
+    except Exception as exc:
+        log.exception("agent.topology_teardown failed")
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
+    return {"status": "torn_down", "topology_id": req.topology_id}
+
+
+@app.get("/topology/state")
+async def topology_state() -> dict:
+    return _topology_ops.state(_store())
+
+
+@app.post(
+    "/mutate",
+    responses={501: {"description": "Worker-side mutate not yet implemented"}},
+)
+async def mutate(req: MutateRequest) -> dict:
+    # TODO: implement worker-side mutate. Currently the master performs
+    # mutation by re-sending a full /deploy with the updated DecnetConfig;
+    # this avoids duplicating mutation logic on the worker for v1. When
+    # ready, replace the 501 with a real redeploy-of-a-single-decky path.
+    raise HTTPException(
+        status_code=501,
+        detail="Per-decky mutate is performed via /deploy with updated services",
+    )
--- a/decnet/agent/executor.py
+++ b/decnet/agent/executor.py
@@ -0,0 +1,223 @@
+"""Thin adapter between the agent's HTTP endpoints and the existing
+``decnet.engine.deployer`` code path.
+
+Kept deliberately small: the agent does not re-implement deployment logic,
+it only translates a master RPC into the same function calls the unihost
+CLI already uses.  Everything runs in a worker thread (the deployer is
+blocking) so the FastAPI event loop stays responsive.
+"""
+from __future__ import annotations
+
+import asyncio
+from ipaddress import IPv4Network
+from typing import Any
+
+from decnet.engine import deployer as _deployer
+from decnet.config import DecnetConfig, load_state, clear_state
+from decnet.logging import get_logger
+from decnet.network import (
+    allocate_ips,
+    detect_interface,
+    detect_subnet,
+    get_host_ip,
+)
+
+log = get_logger("agent.executor")
+
+
+def _relocalize(config: DecnetConfig) -> DecnetConfig:
+    """Rewrite a master-built config to the worker's local network reality.
+
+    The master populates ``interface``/``subnet``/``gateway`` from its own
+    box before dispatching, which blows up the deployer on any worker whose
+    NIC name differs (common in heterogeneous fleets — master on ``wlp6s0``,
+    worker on ``enp0s3``). We always re-detect locally; if the worker sits
+    on a different subnet than the master, decky IPs are re-allocated from
+    the worker's subnet so they're actually reachable.
+    """
+    local_iface = detect_interface()
+    local_subnet, local_gateway = detect_subnet(local_iface)
+    local_host_ip = get_host_ip(local_iface)
+
+    updates: dict[str, Any] = {
+        "interface": local_iface,
+        "subnet": local_subnet,
+        "gateway": local_gateway,
+    }
+
+    master_net = IPv4Network(config.subnet, strict=False) if config.subnet else None
+    local_net = IPv4Network(local_subnet, strict=False)
+    if master_net is None or master_net != local_net:
+        log.info(
+            "agent.deploy subnet mismatch master=%s local=%s — re-allocating decky IPs",
+            config.subnet, local_subnet,
+        )
+        fresh_ips = allocate_ips(
+            subnet=local_subnet,
+            gateway=local_gateway,
+            host_ip=local_host_ip,
+            count=len(config.deckies),
+        )
+        new_deckies = [d.model_copy(update={"ip": ip}) for d, ip in zip(config.deckies, fresh_ips)]
+        updates["deckies"] = new_deckies
+
+    return config.model_copy(update=updates)
+
+
+async def deploy(config: DecnetConfig, dry_run: bool = False, no_cache: bool = False) -> None:
+    """Run the blocking deployer off-loop. The deployer itself calls
+    save_state() internally once the compose file is materialised."""
+    log.info(
+        "agent.deploy mode=%s deckies=%d interface=%s (incoming)",
+        config.mode, len(config.deckies), config.interface,
+    )
+    if config.mode == "swarm":
+        config = _relocalize(config)
+        log.info(
+            "agent.deploy relocalized interface=%s subnet=%s gateway=%s",
+            config.interface, config.subnet, config.gateway,
+        )
+    await asyncio.to_thread(_deployer.deploy, config, dry_run, no_cache, False)
+
+
+async def teardown(decky_id: str | None = None) -> None:
+    log.info("agent.teardown decky_id=%s", decky_id)
+    await asyncio.to_thread(_deployer.teardown, decky_id)
+    if decky_id is None:
+        await asyncio.to_thread(clear_state)
+
+
+def _decky_runtime_states(config: DecnetConfig) -> dict[str, dict[str, Any]]:
+    """Map decky_name → {"running": bool, "services": {svc: container_state}}.
+
+    Queried so the master can tell, after a partial-failure deploy, which
+    deckies actually came up instead of tainting the whole shard as failed.
+    Best-effort: a docker error returns an empty map, not an exception.
+    """
+    try:
+        import docker  # local import — agent-only path
+        client = docker.from_env()
+        live = {c.name: c.status for c in client.containers.list(all=True, ignore_removed=True)}
+    except Exception:  # pragma: no cover — defensive
+        log.exception("_decky_runtime_states: docker query failed")
+        return {}
+
+    out: dict[str, dict[str, Any]] = {}
+    for d in config.deckies:
+        svc_states = {
+            svc: live.get(f"{d.name}-{svc.replace('_', '-')}", "absent")
+            for svc in d.services
+        }
+        out[d.name] = {
+            "running": bool(svc_states) and all(s == "running" for s in svc_states.values()),
+            "services": svc_states,
+        }
+    return out
+
+
+_REAPER_SCRIPT = r"""#!/bin/bash
+# DECNET agent self-destruct reaper.
+# Runs detached from the agent process so it survives the agent's death.
+# Waits briefly for the HTTP response to drain, then stops services,
+# wipes install paths, and preserves logs.
+set +e
+
+sleep 3
+
+# Stop decky containers started by the local deployer (best-effort).
+if command -v docker >/dev/null 2>&1; then
+    docker ps -q --filter "label=com.docker.compose.project=decnet" | xargs -r docker stop
+    docker ps -aq --filter "label=com.docker.compose.project=decnet" | xargs -r docker rm -f
+    docker network rm decnet_lan 2>/dev/null
+fi
+
+# Stop+disable every systemd unit the installer may have dropped.
+for unit in decnet-agent decnet-engine decnet-collector decnet-forwarder decnet-prober decnet-reconciler decnet-sniffer decnet-updater; do
+    systemctl stop "$unit" 2>/dev/null
+    systemctl disable "$unit" 2>/dev/null
+done
+
+# Nuke install paths. Logs under /var/log/decnet* are intentionally
+# preserved — the operator typically wants them for forensic review.
+rm -rf /opt/decnet* /var/lib/decnet/* /usr/local/bin/decnet* /etc/decnet
+rm -f /etc/systemd/system/decnet-*.service /etc/systemd/system/decnet-*.timer
+
+systemctl daemon-reload 2>/dev/null
+rm -f "$0"
+"""
+
+
+async def self_destruct() -> None:
+    """Tear down deckies, then spawn a detached reaper that wipes the
+    install footprint. Returns immediately so the HTTP response can drain
+    before the reaper starts deleting files out from under the agent."""
+    import os
+    import shutil
+    import subprocess  # nosec B404
+    import tempfile
+
+    # Best-effort teardown first — the reaper also runs docker stop, but
+    # going through the deployer gives the host-macvlan/ipvlan helper a
+    # chance to clean up routes cleanly.
+    try:
+        await asyncio.to_thread(_deployer.teardown, None)
+        await asyncio.to_thread(clear_state)
+    except Exception:
+        log.exception("self_destruct: pre-reap teardown failed — reaper will force-stop containers")
+
+    # Reaper lives under /tmp so it survives rm -rf /opt/decnet*.
+    fd, path = tempfile.mkstemp(prefix="decnet-reaper-", suffix=".sh", dir="/tmp")  # nosec B108 — reaper must outlive /opt/decnet removal
+    try:
+        os.write(fd, _REAPER_SCRIPT.encode())
+    finally:
+        os.close(fd)
+    os.chmod(path, 0o700)  # nosec B103 — root-owned reaper, needs exec
+
+    # The reaper MUST run outside decnet-agent.service's cgroup — otherwise
+    # `systemctl stop decnet-agent` SIGTERMs the whole cgroup (reaper included)
+    # before rm -rf completes. `start_new_session=True` gets us a fresh POSIX
+    # session but does NOT escape the systemd cgroup. So we prefer
+    # `systemd-run --scope` (launches the command in a transient scope
+    # detached from the caller's service), falling back to a bare Popen if
+    # systemd-run is unavailable (non-systemd host / container).
+    systemd_run = shutil.which("systemd-run")
+    if systemd_run:
+        argv = [
+            systemd_run,
+            "--collect",
+            "--unit", f"decnet-reaper-{os.getpid()}",
+            "--description", "DECNET agent self-destruct reaper",
+            "/bin/bash", path,
+        ]
+        spawn_kwargs = {"start_new_session": True}
+    else:
+        argv = ["/bin/bash", path]
+        spawn_kwargs = {"start_new_session": True}
+
+    subprocess.Popen(  # nosec B603
+        argv,
+        stdin=subprocess.DEVNULL,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+        close_fds=True,
+        **spawn_kwargs,
+    )
+    log.warning(
+        "self_destruct: reaper spawned path=%s via=%s — agent will die in ~3s",
+        path, "systemd-run" if systemd_run else "popen",
+    )
+
+
+async def status() -> dict[str, Any]:
+    state = await asyncio.to_thread(load_state)
+    if state is None:
+        return {"deployed": False, "deckies": []}
+    config, _compose_path = state
+    runtime = await asyncio.to_thread(_decky_runtime_states, config)
+    return {
+        "deployed": True,
+        "mode": config.mode,
+        "compose_path": str(_compose_path),
+        "deckies": [d.model_dump() for d in config.deckies],
+        "runtime": runtime,
+    }
--- a/decnet/agent/heartbeat.py
+++ b/decnet/agent/heartbeat.py
@@ -0,0 +1,146 @@
+"""Agent → master liveness heartbeat loop.
+
+Every ``INTERVAL_S`` seconds the worker posts ``executor.status()`` to
+``POST <master>/swarm/heartbeat`` over mTLS. The master pins the
+presented client cert's SHA-256 against the ``SwarmHost`` row for the
+claimed ``host_uuid``; a match refreshes ``last_heartbeat`` + each
+``DeckyShard``'s snapshot + runtime state.
+
+Identity comes from ``/etc/decnet/decnet.ini`` (seeded by the enroll
+bundle) — specifically ``DECNET_HOST_UUID`` and ``DECNET_MASTER_HOST``.
+The worker's existing ``~/.decnet/agent/`` bundle (or
+``/etc/decnet/agent/``) provides the mTLS client cert.
+
+Started/stopped via the agent FastAPI app's lifespan. If identity
+plumbing is missing (pre-enrollment dev runs) the loop logs at DEBUG and
+declines to start — callers don't have to guard it.
+"""
+from __future__ import annotations
+
+import asyncio
+import pathlib
+from typing import Optional
+
+import httpx
+
+from decnet.agent import executor as _exec
+from decnet.logging import get_logger
+from decnet.swarm import pki
+from decnet.swarm.log_forwarder import build_worker_ssl_context
+
+log = get_logger("agent.heartbeat")
+
+INTERVAL_S = 30.0
+_TIMEOUT = httpx.Timeout(connect=5.0, read=10.0, write=5.0, pool=5.0)
+
+_task: Optional[asyncio.Task] = None
+
+
+def _resolve_agent_dir() -> pathlib.Path:
+    """Match the agent-dir resolution order used by the agent server:
+    DECNET_AGENT_DIR env, else /etc/decnet/agent (production install),
+    else ~/.decnet/agent (dev)."""
+    import os
+    env = os.environ.get("DECNET_AGENT_DIR")
+    if env:
+        return pathlib.Path(env)
+    system = pathlib.Path("/etc/decnet/agent")
+    if system.exists():
+        return system
+    return pki.DEFAULT_AGENT_DIR
+
+
+async def _tick(client: httpx.AsyncClient, url: str, host_uuid: str, agent_version: str) -> None:
+    snap = await _exec.status()
+    body: dict = {
+        "host_uuid": host_uuid,
+        "agent_version": agent_version,
+        "status": snap,
+    }
+    # Best-effort: fold in applied-topology snapshot. Failures must never
+    # wedge the heartbeat loop — master will fall back to "no topology
+    # reported" which triggers a resync if it expected one.
+    try:
+        from decnet.agent import topology_ops as _topo_ops
+        from decnet.agent.topology_store import TopologyStore
+        store = TopologyStore(_resolve_agent_dir() / "topology.db")
+        try:
+            body["topology"] = _topo_ops.state(store)
+        finally:
+            store.close()
+    except Exception:
+        log.debug("heartbeat: topology state unavailable", exc_info=True)
+
+    resp = await client.post(url, json=body)
+    # 403 / 404 are terminal-ish — we still keep looping because an
+    # operator may re-enrol the host mid-session, but we log loudly so
+    # prod ops can spot cert-pinning drift.
+    if resp.status_code == 204:
+        return
+    log.warning(
+        "heartbeat rejected status=%d body=%s",
+        resp.status_code, resp.text[:200],
+    )
+
+
+async def _loop(url: str, host_uuid: str, agent_version: str, ssl_ctx) -> None:
+    log.info("heartbeat loop starting url=%s host_uuid=%s interval=%ss",
+             url, host_uuid, INTERVAL_S)
+    async with httpx.AsyncClient(verify=ssl_ctx, timeout=_TIMEOUT) as client:
+        while True:
+            try:
+                await _tick(client, url, host_uuid, agent_version)
+            except asyncio.CancelledError:
+                raise
+            except Exception:
+                log.exception("heartbeat tick failed — will retry in %ss", INTERVAL_S)
+            await asyncio.sleep(INTERVAL_S)
+
+
+def start() -> Optional[asyncio.Task]:
+    """Kick off the background heartbeat task. No-op if identity is
+    unconfigured (dev mode) — the caller doesn't need to check."""
+    global _task
+    from decnet.env import (
+        DECNET_HOST_UUID,
+        DECNET_MASTER_HOST,
+        DECNET_SWARMCTL_PORT,
+    )
+
+    if _task is not None and not _task.done():
+        return _task
+    if not DECNET_HOST_UUID or not DECNET_MASTER_HOST:
+        log.debug("heartbeat not starting — DECNET_HOST_UUID or DECNET_MASTER_HOST unset")
+        return None
+
+    agent_dir = _resolve_agent_dir()
+    try:
+        ssl_ctx = build_worker_ssl_context(agent_dir)
+    except Exception:
+        log.exception("heartbeat not starting — worker SSL context unavailable at %s", agent_dir)
+        return None
+
+    try:
+        from decnet import __version__ as _v
+        agent_version = _v
+    except Exception:
+        agent_version = "unknown"
+
+    url = f"https://{DECNET_MASTER_HOST}:{DECNET_SWARMCTL_PORT}/swarm/heartbeat"
+    _task = asyncio.create_task(
+        _loop(url, DECNET_HOST_UUID, agent_version, ssl_ctx),
+        name="agent-heartbeat",
+    )
+    return _task
+
+
+async def stop() -> None:
+    global _task
+    if _task is None:
+        return
+    _task.cancel()
+    try:
+        await _task
+    except (asyncio.CancelledError, Exception):
+        pass
+    _task = None
--- a/decnet/agent/server.py
+++ b/decnet/agent/server.py
@@ -0,0 +1,70 @@
+"""Worker-agent uvicorn launcher.
+
+Starts ``decnet.agent.app:app`` over HTTPS with mTLS enforcement.  The
+worker must already have a bundle in ``~/.decnet/agent/`` (delivered by
+``decnet swarm enroll`` from the master); if it does not, we refuse to
+start — unauthenticated agents are not a supported mode.
+"""
+from __future__ import annotations
+
+import os
+import pathlib
+import signal
+import subprocess  # nosec B404
+import sys
+
+from decnet.logging import get_logger
+from decnet.swarm import pki
+
+log = get_logger("agent.server")
+
+
+def run(host: str, port: int, agent_dir: pathlib.Path = pki.DEFAULT_AGENT_DIR) -> int:
+    bundle = pki.load_worker_bundle(agent_dir)
+    if bundle is None:
+        print(
+            f"[agent] No cert bundle at {agent_dir}. "
+            f"Run `decnet swarm enroll` from the master first.",
+            file=sys.stderr,
+        )
+        return 2
+
+    keyfile = agent_dir / "worker.key"
+    certfile = agent_dir / "worker.crt"
+    cafile = agent_dir / "ca.crt"
+
+    cmd = [
+        sys.executable,
+        "-m",
+        "uvicorn",
+        "decnet.agent.app:app",
+        "--host",
+        host,
+        "--port",
+        str(port),
+        "--ssl-keyfile",
+        str(keyfile),
+        "--ssl-certfile",
+        str(certfile),
+        "--ssl-ca-certs",
+        str(cafile),
+        # 2 == ssl.CERT_REQUIRED — clients MUST present a CA-signed cert.
+        "--ssl-cert-reqs",
+        "2",
+    ]
+    log.info("agent starting host=%s port=%d bundle=%s", host, port, agent_dir)
+    # Own process group for clean Ctrl+C / SIGTERM propagation to uvicorn
+    # workers (same pattern as `decnet api`).
+    proc = subprocess.Popen(cmd, start_new_session=True)  # nosec B603
+    try:
+        return proc.wait()
+    except KeyboardInterrupt:
+        try:
+            os.killpg(proc.pid, signal.SIGTERM)
+            try:
+                return proc.wait(timeout=10)
+            except subprocess.TimeoutExpired:
+                os.killpg(proc.pid, signal.SIGKILL)
+                return proc.wait()
+        except ProcessLookupError:
+            return 0
--- a/decnet/agent/topology_ops.py
+++ b/decnet/agent/topology_ops.py
@@ -0,0 +1,208 @@
+"""Agent-side topology apply/teardown/state primitives.
+
+Wraps the compose + bridge machinery from :mod:`decnet.engine.deployer`
+so the agent can drive a topology without ever touching the master's
+sqlmodel repo.  The master-side ``deploy_topology`` always calls
+``transition_status(repo, …)`` which is useless (and unreachable) on
+an agent — here we operate purely on a hydrated dict + the local
+:class:`TopologyStore`.
+
+v1 constraint: one topology per agent.  A second apply for a different
+``topology_id`` triggers an on-the-spot teardown of the predecessor
+before the new apply proceeds — master is authoritative.
+"""
+from __future__ import annotations
+
+import asyncio
+import subprocess  # nosec B404
+from typing import Any
+
+import docker
+
+from decnet.agent.topology_store import (
+    TopologyStore,
+    observed,
+)
+from decnet.engine.deployer import (
+    _compose,
+    _compose_with_retry,
+    _teardown_order,
+    _topology_compose_path,
+)
+from decnet.logging import get_logger
+from decnet.network import create_bridge_network, remove_bridge_network
+from decnet.topology.compose import (
+    _network_name as _topology_network_name,
+    write_topology_compose,
+)
+from decnet.topology.hashing import canonical_hash
+from decnet.topology.validate import (
+    ValidationError,
+    errors as _validation_errors,
+    validate as _validate_topology,
+)
+
+log = get_logger("agent.topology_ops")
+
+
+class HashMismatch(RuntimeError):
+    """Raised when the master-provided version_hash doesn't match what we
+    hash locally — suggests serialisation drift.  We fail loudly rather
+    than silently papering over a schema mismatch."""
+
+
+def _topology_id(hydrated: dict[str, Any]) -> str:
+    topo = hydrated.get("topology") or {}
+    tid = topo.get("id")
+    if not tid:
+        raise ValueError("hydrated topology missing topology.id")
+    return str(tid)
+
+
+async def apply(
+    hydrated: dict[str, Any],
+    version_hash: str,
+    store: TopologyStore,
+) -> None:
+    """Materialise *hydrated* on this agent and record it in *store*.
+
+    Raises:
+      HashMismatch: master and agent disagree on the canonical hash —
+        don't touch docker, fail the apply.
+      ValidationError: topology fails structural validation.
+      Any docker / compose error propagates up; the endpoint maps it
+        to 500 and records the message on the store row.
+    """
+    local_hash = canonical_hash(hydrated)
+    if local_hash != version_hash:
+        raise HashMismatch(
+            f"master hash {version_hash!r} does not match agent hash "
+            f"{local_hash!r} — refusing to apply"
+        )
+
+    issues = _validate_topology(hydrated)
+    if _validation_errors(issues):
+        raise ValidationError(issues)
+
+    topology_id = _topology_id(hydrated)
+    # Master is authoritative.  If a different topology is pinned here
+    # — whether it fully applied, only partially applied (failure
+    # marker row + orphan containers), or drifted — teardown first,
+    # then accept the new one.  Refusing with 409 would leave the
+    # agent stuck in a state only a human could resolve.
+    existing = store.current()
+    if existing is not None and existing.topology_id != topology_id:
+        log.info(
+            "superseding topology %s with %s on master authority",
+            existing.topology_id, topology_id,
+        )
+        try:
+            await teardown(existing.topology_id, store)
+        except Exception as exc:  # noqa: BLE001 — we still want to try applying
+            log.warning(
+                "best-effort teardown of superseded topology %s failed: %s",
+                existing.topology_id, exc,
+            )
+            # Hard-clear the store row so the new apply isn't blocked
+            # by a half-torn-down predecessor.  Leftover docker objects
+            # will surface via the next heartbeat's observed block.
+            store.clear(existing.topology_id)
+
+    lans = hydrated["lans"]
+    compose_path = _topology_compose_path(topology_id)
+    client = docker.from_env()
+
+    # Bridges + compose are sync/blocking; hop to a thread so we don't
+    # stall the event loop on a slow docker daemon.
+    def _materialise() -> None:
+        for lan in lans:
+            net_name = _topology_network_name(topology_id, lan["name"])
+            internal = not lan["is_dmz"]
+            create_bridge_network(
+                client, net_name, lan["subnet"], internal=internal
+            )
+        write_topology_compose(hydrated, compose_path)
+        # ``--always-recreate-deps`` keeps service containers' netns shares
+        # fresh: every decky service joins its base's netns via
+        # ``network_mode: container:<base>``, and that share is bound at
+        # service start time. If a base is recreated (e.g. when ``ports:``
+        # changes after toggling ``forwards_l3``) but compose decides the
+        # services are unchanged, the services keep a stale netns FD
+        # pointing at the destroyed base — they end up in an empty
+        # namespace with only ``lo``, and external traffic hits a closed
+        # port on the live base. Forcing dependents to recreate alongside
+        # the base is the cheapest way to make this race impossible.
+        _compose_with_retry(
+            "up", "--build", "-d", "--always-recreate-deps",
+            compose_file=compose_path,
+        )
+
+    await asyncio.to_thread(_materialise)
+
+    store.put(topology_id, version_hash, hydrated)
+    log.info(
+        "topology %s applied on agent (%d LANs)", topology_id, len(lans)
+    )
+
+
+async def teardown(
+    topology_id: str,
+    store: TopologyStore,
+) -> None:
+    """Tear down *topology_id* on this agent.  Idempotent: if there's no
+    record and no compose file, it's a no-op that still returns cleanly."""
+    row = store.current()
+    # Prefer the stored hydrated blob — it's what we applied with.  If
+    # it's gone (db wiped) but compose-file lingers, we still try to
+    # compose-down and delete bridges by scanning the compose file's
+    # LAN membership list via the hydrated blob if available.
+    hydrated = row.hydrated if row and row.topology_id == topology_id else None
+    compose_path = _topology_compose_path(topology_id)
+    client = docker.from_env()
+
+    def _dismantle() -> None:
+        if compose_path.exists():
+            try:
+                _compose("down", "--remove-orphans", compose_file=compose_path)
+            except subprocess.CalledProcessError as exc:
+                log.warning(
+                    "topology %s compose down failed (continuing): %s",
+                    topology_id, exc,
+                )
+        if hydrated is not None:
+            for lan_name in _teardown_order(hydrated["lans"]):
+                net_name = _topology_network_name(topology_id, lan_name)
+                remove_bridge_network(client, net_name)
+        if compose_path.exists():
+            compose_path.unlink()
+
+    await asyncio.to_thread(_dismantle)
+    store.clear(topology_id)
+    log.info("topology %s torn down on agent", topology_id)
+
+
+def state(store: TopologyStore) -> dict[str, Any]:
+    """Snapshot-plus-live-observation — the shape the heartbeat embeds."""
+    row = store.current()
+    try:
+        obs = observed(docker.from_env())
+    except Exception as exc:  # noqa: BLE001 — docker socket may be gone
+        obs = {"error": str(exc)[:200]}
+    if row is None:
+        return {
+            "topology_id": None,
+            "applied_version_hash": None,
+            "applied_at": None,
+            "last_error": None,
+            "observed": obs,
+        }
+    return {
+        "topology_id": row.topology_id,
+        "applied_version_hash": row.applied_version_hash,
+        "applied_at": row.applied_at,
+        "last_error": row.last_error,
+        "observed": obs,
+    }
+
+
+__all__ = ["apply", "teardown", "state", "HashMismatch"]
--- a/decnet/agent/topology_store.py
+++ b/decnet/agent/topology_store.py
@@ -0,0 +1,213 @@
+"""Agent-side sqlite cache of the currently-applied topology.
+
+**This is a cache, not a source of truth.**  The master is the only
+authority for what the agent should be running.  This store exists so
+the agent can answer two questions quickly and offline:
+
+1. What topology did I last apply, and with what version hash?
+2. Is what docker is currently doing consistent with that?
+
+The hash goes out on every heartbeat; the master compares it to what
+it thinks this host should be running and schedules a re-push on
+mismatch.
+
+Why sqlite when the blob is JSON?  Consistent with
+:mod:`decnet.swarm.log_forwarder._OffsetStore` — single-row sqlite is
+the project-wide pattern for agent-local persistent state.  Keeps
+operational mental model small: "one state.db per thing".
+
+Design choices worth calling out:
+
+- **One row, one topology.**  v1 only supports a single topology per
+  agent.  Attempting to :meth:`put` a different ``topology_id`` while
+  a row already exists raises :class:`AlreadyApplied` — the agent
+  rejects the apply with 409 and the master is expected to teardown
+  the old one first.
+- **No auto-restore on boot.**  The agent does NOT read this db at
+  startup and try to re-apply.  Whatever docker has after a restart
+  is what it has; the next heartbeat reports the truth and the
+  master decides whether to re-push.  Same reason we don't sync
+  mutations from agent → master anywhere else: split-brain is worse
+  than temporary drift.
+"""
+from __future__ import annotations
+
+import json
+import pathlib
+import sqlite3
+import time
+from dataclasses import dataclass
+from typing import Any, Optional
+
+
+class AlreadyApplied(RuntimeError):
+    """Raised when a different topology is already pinned to this agent."""
+
+
+@dataclass(frozen=True)
+class AppliedRow:
+    topology_id: str
+    applied_version_hash: str
+    hydrated: dict[str, Any]
+    applied_at: int
+    last_error: Optional[str]
+
+
+class TopologyStore:
+    """Single-row sqlite cache. Stdlib only, sync (called from endpoints)."""
+
+    def __init__(self, db_path: pathlib.Path) -> None:
+        db_path.parent.mkdir(parents=True, exist_ok=True)
+        # check_same_thread=False: Starlette/FastAPI runs sync endpoint
+        # bodies on a worker thread distinct from where `app` is imported.
+        # The agent is single-process, so there's no real contention —
+        # sqlite's own connection lock is enough.
+        self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
+        self._conn.execute(
+            "CREATE TABLE IF NOT EXISTS applied_topology ("
+            " topology_id TEXT PRIMARY KEY,"
+            " applied_version_hash TEXT NOT NULL,"
+            " hydrated_blob_json TEXT NOT NULL,"
+            " applied_at INTEGER NOT NULL,"
+            " last_error TEXT)"
+        )
+        self._conn.commit()
+
+    # ----------------------------------------------------------------- reads
+
+    def current(self) -> Optional[AppliedRow]:
+        """Return the single applied topology, or ``None`` if idle."""
+        row = self._conn.execute(
+            "SELECT topology_id, applied_version_hash, hydrated_blob_json,"
+            " applied_at, last_error FROM applied_topology LIMIT 1"
+        ).fetchone()
+        if row is None:
+            return None
+        return AppliedRow(
+            topology_id=row[0],
+            applied_version_hash=row[1],
+            hydrated=json.loads(row[2]),
+            applied_at=int(row[3]),
+            last_error=row[4],
+        )
+
+    # ---------------------------------------------------------------- writes
+
+    def put(
+        self,
+        topology_id: str,
+        applied_version_hash: str,
+        hydrated: dict[str, Any],
+    ) -> None:
+        """Record an applied topology.
+
+        If a *different* topology is already recorded, raises
+        :class:`AlreadyApplied`.  Re-applying the same ``topology_id``
+        just updates the hash + blob (idempotent re-push).
+        """
+        existing = self.current()
+        if existing is not None and existing.topology_id != topology_id:
+            raise AlreadyApplied(
+                f"agent already has topology {existing.topology_id!r}; "
+                f"cannot apply {topology_id!r}"
+            )
+        self._conn.execute(
+            "INSERT INTO applied_topology"
+            " (topology_id, applied_version_hash, hydrated_blob_json,"
+            "  applied_at, last_error)"
+            " VALUES (?, ?, ?, ?, NULL)"
+            " ON CONFLICT(topology_id) DO UPDATE SET"
+            "  applied_version_hash=excluded.applied_version_hash,"
+            "  hydrated_blob_json=excluded.hydrated_blob_json,"
+            "  applied_at=excluded.applied_at,"
+            "  last_error=NULL",
+            (
+                topology_id,
+                applied_version_hash,
+                json.dumps(hydrated, sort_keys=True),
+                int(time.time()),
+            ),
+        )
+        self._conn.commit()
+
+    def record_error(
+        self,
+        topology_id: str,
+        message: str,
+        hydrated: Optional[dict[str, Any]] = None,
+    ) -> None:
+        """Attach a last-error message for *topology_id*.
+
+        Upserts a marker row when no apply has yet succeeded for this
+        topology — that way a failure *during* the first materialise
+        (put() hasn't been reached) still surfaces via GET
+        /topology/state and the next heartbeat.  The marker row uses an
+        empty ``applied_version_hash`` so master's heartbeat check sees
+        the hash mismatch and schedules a resync.
+
+        If *hydrated* is provided it is stored so a later teardown can
+        still walk the LAN list — otherwise a partial deploy is strands
+        containers + bridges with no breadcrumb back to them.
+        """
+        blob = json.dumps(hydrated, sort_keys=True) if hydrated else "{}"
+        self._conn.execute(
+            "INSERT INTO applied_topology"
+            " (topology_id, applied_version_hash, hydrated_blob_json,"
+            "  applied_at, last_error)"
+            " VALUES (?, '', ?, 0, ?)"
+            " ON CONFLICT(topology_id) DO UPDATE SET"
+            "  last_error=excluded.last_error,"
+            "  hydrated_blob_json=CASE"
+            "   WHEN applied_topology.hydrated_blob_json='{}'"
+            "   THEN excluded.hydrated_blob_json"
+            "   ELSE applied_topology.hydrated_blob_json END",
+            (topology_id, blob, message),
+        )
+        self._conn.commit()
+
+    def clear(self, topology_id: str) -> None:
+        """Remove the row for *topology_id* (post-teardown).
+
+        No-op if the row doesn't exist — makes teardown idempotent.
+        """
+        self._conn.execute(
+            "DELETE FROM applied_topology WHERE topology_id=?",
+            (topology_id,),
+        )
+        self._conn.commit()
+
+    def close(self) -> None:
+        self._conn.close()
+
+
+# --------------------------------------------------- live docker observation
+
+
+def observed(docker_client: Any) -> dict[str, Any]:
+    """Snapshot what docker is *actually* running on this agent.
+
+    Returns a compact dict the heartbeat can ship so the master can
+    cross-check ``applied_version_hash`` against reality (a matching
+    hash with missing bridges is still drift).  Best-effort: if docker
+    is unreachable we return an ``error`` marker rather than raising —
+    the agent still needs to heartbeat, and the master can treat
+    ``error`` as "unknown, re-push".
+    """
+    try:
+        bridges = [
+            n.name
+            for n in docker_client.networks.list()
+            if n.attrs.get("Driver") == "bridge"
+            and n.name.startswith("decnet-topology-")
+        ]
+        containers = [
+            c.name
+            for c in docker_client.containers.list(all=False)
+            if c.name.startswith("decnet-")
+        ]
+        return {"bridges": sorted(bridges), "containers": sorted(containers)}
+    except Exception as exc:  # noqa: BLE001 — best-effort observation
+        return {"error": str(exc)[:200]}
+
+
+__all__ = ["TopologyStore", "AppliedRow", "AlreadyApplied", "observed"]
--- a/decnet/archetypes.py
+++ b/decnet/archetypes.py
@@ -148,7 +148,7 @@ ARCHETYPES: dict[str, Archetype] = {
        slug="deaddeck",
        display_name="Deaddeck (Entry Point)",
        description="Internet-facing entry point with real interactive SSH — no honeypot emulation",
-        services=["real_ssh"],
+        services=["ssh"],
        preferred_distros=["debian", "ubuntu22"],
        nmap_os="linux",
    ),
@@ -167,4 +167,4 @@ def all_archetypes() -> dict[str, Archetype]:


 def random_archetype() -> Archetype:
-    return random.choice(list(ARCHETYPES.values()))
+    return random.choice(list(ARCHETYPES.values()))  # nosec B311
--- a/decnet/asn/init.py
+++ b/decnet/asn/init.py
@@ -0,0 +1,92 @@
+"""
+IP-to-ASN enrichment — maps attacker IPs to BGP-announced AS numbers and
+org names for attacker intelligence.
+
+Public surface mirrors :mod:`decnet.geoip` so callers can compose them:
+
+* :func:`get_lookup` — returns the singleton :class:`AsnLookup`.
+* :func:`enrich_ip` — takes an IP string, returns
+  ``(asn_int, asn_name, provider_name)`` or ``(None, None, None)``.
+
+Provider selection goes through :func:`~decnet.asn.factory.get_provider`
+(env ``DECNET_ASN_PROVIDER``, default ``iptoasn``). Direct imports of
+concrete providers are forbidden — mirrors the ``get_bus`` /
+``get_repository`` rule.
+"""
+from __future__ import annotations
+
+import os
+import time
+from typing import Optional, Tuple
+
+from decnet.asn.factory import get_provider
+from decnet.asn.lookup import AsnLookup
+from decnet.asn.paths import ASN_ROOT
+
+# 24 h — iptoasn refreshes daily.
+REFRESH_INTERVAL_S = 86_400
+
+_lookup: Optional[AsnLookup] = None
+_provider_name: Optional[str] = None
+
+
+def get_lookup(*, force_refresh: bool = False) -> AsnLookup:
+    """Return the cached :class:`AsnLookup`, building it on first use.
+
+    If the provider's data files are missing or older than
+    ``REFRESH_INTERVAL_S`` seconds, refresh before building. Pass
+    ``force_refresh=True`` to bypass the age check (used by a future
+    ``decnet asn refresh`` CLI command).
+    """
+    global _lookup, _provider_name
+    provider = get_provider()
+    _provider_name = provider.name
+
+    if force_refresh or _files_stale(provider):
+        provider.refresh()
+        _lookup = None  # rebuild on next access
+
+    if _lookup is None:
+        _lookup = provider.build_lookup()
+    return _lookup
+
+
+def enrich_ip(ip: str) -> Tuple[Optional[int], Optional[str], Optional[str]]:
+    """Return ``(asn, as_name, provider_name)`` or ``(None, None, None)``.
+
+    Never raises — any lookup failure collapses to all-None so the
+    caller (profiler) can upsert the attacker row regardless.
+
+    ``DECNET_ASN_ENABLED=false`` short-circuits the whole path, useful
+    for tests / agent hosts / ops wanting to disable enrichment without
+    touching provider config.
+    """
+    if os.environ.get("DECNET_ASN_ENABLED", "true").lower() == "false":
+        return (None, None, None)
+    try:
+        lookup = get_lookup()
+        info = lookup.asn(ip)
+        if info is None:
+            return (None, None, None)
+        return (info.asn, info.name or None, _provider_name or "unknown")
+    except Exception:
+        return (None, None, None)
+
+
+def _files_stale(provider) -> bool:
+    """True when the provider has no fresh data on disk.
+
+    Same semantics as :func:`decnet.geoip._files_stale`: a partial
+    cache still produces correct answers for the ranges it covers.
+    """
+    paths = provider.data_paths()
+    if not paths:
+        return True
+    now = time.time()
+    for p in paths:
+        if p.exists() and now - p.stat().st_mtime <= REFRESH_INTERVAL_S:
+            return False
+    return True
+
+
+__all__ = ["get_lookup", "enrich_ip", "ASN_ROOT", "REFRESH_INTERVAL_S"]
--- a/decnet/asn/base.py
+++ b/decnet/asn/base.py
@@ -0,0 +1,33 @@
+"""ASN provider protocol — mirror of :mod:`decnet.geoip.base`.
+
+Concrete providers (e.g. :mod:`decnet.asn.iptoasn`) implement this.
+Callers must go through :func:`decnet.asn.factory.get_provider`; never
+import a concrete provider class directly.
+"""
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Sequence
+
+from decnet.asn.lookup import AsnLookup
+
+
+class Provider(ABC):
+    """Abstract IP→ASN data provider."""
+
+    #: Short tag written to ``Attacker.asn_source`` (e.g. ``'iptoasn'``).
+    name: str
+
+    @abstractmethod
+    def refresh(self) -> None:
+        """Download / regenerate the provider's raw data files."""
+
+    @abstractmethod
+    def build_lookup(self) -> AsnLookup:
+        """Parse the on-disk data files and return a ready-to-query lookup."""
+
+    @abstractmethod
+    def data_paths(self) -> Sequence[Path]:
+        """Return the list of files this provider manages — used for staleness
+        detection. Order is not significant."""
--- a/decnet/asn/factory.py
+++ b/decnet/asn/factory.py
@@ -0,0 +1,39 @@
+"""ASN provider factory — mirror of :mod:`decnet.geoip.factory`.
+
+Dispatch key: ``DECNET_ASN_PROVIDER`` (default ``iptoasn``). Lazy
+singleton.
+"""
+from __future__ import annotations
+
+import os
+from typing import Optional
+
+from decnet.asn.base import Provider
+
+_cached: Optional[Provider] = None
+_cached_key: Optional[str] = None
+
+
+def get_provider() -> Provider:
+    """Return the configured :class:`Provider` singleton."""
+    global _cached, _cached_key
+    key = os.environ.get("DECNET_ASN_PROVIDER", "iptoasn").lower()
+    if _cached is not None and _cached_key == key:
+        return _cached
+
+    if key == "iptoasn":
+        from decnet.asn.iptoasn.provider import IptoasnProvider
+        provider: Provider = IptoasnProvider()
+    else:
+        raise ValueError(f"Unsupported ASN provider: {key!r}")
+
+    _cached = provider
+    _cached_key = key
+    return provider
+
+
+def reset_cache() -> None:
+    """Forget the singleton — tests swap providers via the env var."""
+    global _cached, _cached_key
+    _cached = None
+    _cached_key = None
--- a/decnet/asn/iptoasn/init.py
+++ b/decnet/asn/iptoasn/init.py
@@ -0,0 +1,9 @@
+"""iptoasn.com IP→ASN provider.
+
+Daily-refreshed gzipped TSV dump of the global BGP table, derived from
+RIPE RIS. Released into the public domain by upstream — no attribution
+required, no UA mandate, no terms to violate.
+
+Direct imports of :class:`IptoasnProvider` are discouraged — go through
+:func:`decnet.asn.factory.get_provider`.
+"""
--- a/decnet/asn/iptoasn/fetch.py
+++ b/decnet/asn/iptoasn/fetch.py
@@ -0,0 +1,63 @@
+"""iptoasn.com bulk dump download.
+
+One file: ``ip2asn-v4.tsv.gz``, ~5 MB compressed, refreshed daily.
+Pulled over HTTPS with the same generic UA the geoip RIR fetcher uses
+(stealth: never identify as DECNET — public-data scrapers correlated to
+honeypot operator egress is the threat model).
+"""
+from __future__ import annotations
+
+import logging
+import shutil
+import urllib.request
+from pathlib import Path
+from typing import Tuple
+
+logger = logging.getLogger("decnet.asn.iptoasn.fetch")
+
+# Mirror the (name, url) tuple shape of geoip.rir.fetch so test
+# harnesses can swap one for the other.
+IPTOASN_SOURCES: Tuple[Tuple[str, str], ...] = (
+    ("ip2asn-v4", "https://iptoasn.com/data/ip2asn-v4.tsv.gz"),
+)
+
+# Generic UA — matches geoip.rir.fetch. iptoasn.com explicitly releases
+# the data into the public domain and does NOT require an identifying UA,
+# so we keep DECNET stealth instead of advertising.
+_USER_AGENT = "Mozilla/5.0 (compatible; fetch/1.0)"
+_TIMEOUT_S = 60
+
+
+def fetch_all(dest: Path) -> list[Path]:
+    """Download every iptoasn file into *dest*. Returns the written paths.
+
+    Atomic per file: download to ``{name}.tsv.gz.tmp`` then rename. A
+    partial failure leaves the previous generation intact.
+    """
+    dest.mkdir(parents=True, exist_ok=True)
+    written: list[Path] = []
+    for name, url in IPTOASN_SOURCES:
+        target = dest / f"{name}.tsv.gz"
+        tmp = target.with_suffix(".gz.tmp")
+        try:
+            _download(url, tmp)
+            tmp.replace(target)
+            written.append(target)
+            logger.info(
+                "asn.iptoasn: fetched %s (%d bytes)",
+                name, target.stat().st_size,
+            )
+        except Exception as exc:
+            logger.error(
+                "asn.iptoasn: fetch failed for %s (%s): %s", name, url, exc
+            )
+            if tmp.exists():
+                tmp.unlink(missing_ok=True)
+            # Keep any stale previous file — better outdated than empty.
+    return written
+
+
+def _download(url: str, dest: Path) -> None:
+    req = urllib.request.Request(url, headers={"User-Agent": _USER_AGENT})
+    with urllib.request.urlopen(req, timeout=_TIMEOUT_S) as resp, dest.open("wb") as fh:  # nosec B310 — fixed https iptoasn URL
+        shutil.copyfileobj(resp, fh)
--- a/decnet/asn/iptoasn/parse.py
+++ b/decnet/asn/iptoasn/parse.py
@@ -0,0 +1,78 @@
+"""Parser for the iptoasn.com ``ip2asn-v4.tsv`` dump.
+
+Line shape (gzipped, one row per BGP-announced prefix)::
+
+    1.0.0.0\\t1.0.0.255\\t13335\\tUS\\tCLOUDFLARENET
+
+Fields: ``range_start``, ``range_end``, ``as_number``, ``country_code``,
+``as_description``. Both range columns are dotted IPv4 strings (the dump
+is IPv4-only — there's a separate ``ip2asn-v6.tsv.gz`` we don't pull).
+
+Rows skipped:
+
+* ``as_number == 0`` — iptoasn's sentinel for "unannounced" / private
+  / reserved space. Country may still be present (``"None"`` / two-letter
+  CC) but we don't care: the geoip module owns country, ASN owns BGP.
+* Rows where either range column won't parse as IPv4.
+* Rows with fewer than 3 tab-separated columns.
+"""
+from __future__ import annotations
+
+import gzip
+import ipaddress
+import logging
+from pathlib import Path
+from typing import Iterator
+
+from decnet.asn.lookup import AsnInfo, Range
+
+logger = logging.getLogger("decnet.asn.iptoasn.parse")
+
+
+def parse_file(path: Path) -> Iterator[Range]:
+    """Yield ``(start_int, end_int_inclusive, AsnInfo)`` for every BGP row.
+
+    Accepts a gzipped path (``*.tsv.gz``); plain TSV is also fine for
+    test harnesses that hand-craft small fixtures.
+    """
+    opener = gzip.open if path.suffix == ".gz" else open
+    with opener(path, "rt", encoding="utf-8", errors="replace") as fh:
+        for lineno, raw in enumerate(fh, 1):
+            line = raw.rstrip("\n")
+            if not line:
+                continue
+            parts = line.split("\t")
+            if len(parts) < 3:
+                continue
+            start_s, end_s, asn_s = parts[0], parts[1], parts[2]
+            # Description is the 5th column; iptoasn quotes nothing,
+            # but the field can contain stray whitespace. ``""`` when
+            # missing or unknown.
+            name = parts[4].strip() if len(parts) >= 5 else ""
+
+            try:
+                asn = int(asn_s)
+            except ValueError:
+                logger.debug(
+                    "asn.iptoasn: skipping malformed asn line %d in %s",
+                    lineno, path.name,
+                )
+                continue
+            # ASN 0 is iptoasn's sentinel for unannounced / sentinel
+            # space. Skip — there's no useful enrichment to attach.
+            if asn == 0:
+                continue
+
+            try:
+                start_int = int(ipaddress.IPv4Address(start_s))
+                end_int = int(ipaddress.IPv4Address(end_s))
+            except (ValueError, ipaddress.AddressValueError):
+                logger.debug(
+                    "asn.iptoasn: skipping malformed addr line %d in %s",
+                    lineno, path.name,
+                )
+                continue
+            if end_int < start_int:
+                continue
+
+            yield (start_int, end_int, AsnInfo(asn=asn, name=name))
--- a/decnet/asn/iptoasn/provider.py
+++ b/decnet/asn/iptoasn/provider.py
@@ -0,0 +1,83 @@
+"""iptoasn provider — orchestrates fetch + parse into an :class:`AsnLookup`.
+
+Mirrors :class:`decnet.geoip.rir.provider.RirProvider` exactly: fetch,
+build a pickled cache, invalidate when raw files are newer than the
+cache.
+"""
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Sequence
+
+from decnet.asn.base import Provider
+from decnet.asn.iptoasn.fetch import IPTOASN_SOURCES, fetch_all
+from decnet.asn.iptoasn.parse import parse_file
+from decnet.asn.lookup import AsnLookup
+from decnet.asn.paths import ensure_root
+
+logger = logging.getLogger("decnet.asn.iptoasn.provider")
+
+# Pickled lookup cache — skips re-parsing the ~580k-row gz dump on every
+# profiler restart. Rebuilt whenever any raw file is newer than the
+# cache, see ``_cache_fresh``.
+_CACHE_NAME = ".iptoasn_index.pkl"
+
+
+class IptoasnProvider(Provider):
+    name = "iptoasn"
+
+    def __init__(self) -> None:
+        self._root = ensure_root()
+
+    # ---------- Provider interface ----------
+
+    def refresh(self) -> None:
+        logger.info("asn.iptoasn: refreshing dump into %s", self._root)
+        fetch_all(self._root)
+        cache = self._root / _CACHE_NAME
+        if cache.exists():
+            cache.unlink(missing_ok=True)
+
+    def build_lookup(self) -> AsnLookup:
+        cache = self._root / _CACHE_NAME
+        if self._cache_fresh(cache):
+            try:
+                lookup = AsnLookup.load(cache)
+                logger.debug(
+                    "asn.iptoasn: loaded cached index (%d ranges)",
+                    len(lookup),
+                )
+                return lookup
+            except Exception as exc:
+                logger.warning(
+                    "asn.iptoasn: cache load failed, rebuilding: %s", exc
+                )
+
+        ranges = []
+        for path in self.data_paths():
+            if not path.exists():
+                continue
+            ranges.extend(parse_file(path))
+        lookup = AsnLookup.from_ranges(ranges)
+        try:
+            lookup.save(cache)
+        except Exception as exc:
+            logger.warning("asn.iptoasn: cache save failed: %s", exc)
+        logger.info("asn.iptoasn: built index with %d ranges", len(lookup))
+        return lookup
+
+    def data_paths(self) -> Sequence[Path]:
+        return [self._root / f"{name}.tsv.gz" for name, _url in IPTOASN_SOURCES]
+
+    # ---------- internals ----------
+
+    def _cache_fresh(self, cache: Path) -> bool:
+        """True when the pickle exists and is at least as new as every raw file."""
+        if not cache.exists():
+            return False
+        cache_mtime = cache.stat().st_mtime
+        for path in self.data_paths():
+            if path.exists() and path.stat().st_mtime > cache_mtime:
+                return False
+        return True
--- a/decnet/asn/lookup.py
+++ b/decnet/asn/lookup.py
@@ -0,0 +1,126 @@
+"""Provider-agnostic IP→ASN lookup.
+
+A :class:`AsnLookup` is a frozen, sorted array of ``(start_ip,
+end_ip_inclusive, AsnInfo)`` ranges queried via :mod:`bisect`.
+O(log n) on ~600k ranges (a current iptoasn dump is ~580k rows).
+
+Private/loopback/invalid IPv4 and all IPv6 addresses resolve to
+``None`` — the same policy :mod:`decnet.geoip.lookup` uses.
+"""
+from __future__ import annotations
+
+import bisect
+import ipaddress
+import pickle  # nosec B403 — self-produced cache under /var/lib/decnet, never deserialized from untrusted input
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable, List, Optional, Tuple
+
+
+@dataclass(frozen=True)
+class AsnInfo:
+    """One BGP-announced prefix's origin metadata."""
+
+    asn: int
+    name: str  # AS description / org name; "" if absent in the source data
+
+
+Range = Tuple[int, int, AsnInfo]
+
+
+@dataclass
+class AsnLookup:
+    """Indexed AS lookup over IPv4 ranges."""
+
+    # Parallel arrays for bisect: _starts[i] is the start-IP of the i-th
+    # range, _ends[i] its inclusive end, _infos[i] its AsnInfo.
+    _starts: List[int]
+    _ends: List[int]
+    _infos: List[AsnInfo]
+
+    @classmethod
+    def from_ranges(cls, ranges: Iterable[Range]) -> "AsnLookup":
+        """Build a lookup from ``(start, end_inclusive, AsnInfo)`` triples.
+
+        Ranges are sorted by start; on identical starts, last writer
+        wins (matches :class:`decnet.geoip.lookup.Lookup` semantics).
+        Non-overlapping adjacency is preserved.
+        """
+        sorted_ranges = sorted(ranges, key=lambda r: (r[0], r[1]))
+        starts: List[int] = []
+        ends: List[int] = []
+        infos: List[AsnInfo] = []
+        for start, end, info in sorted_ranges:
+            if starts and starts[-1] == start:
+                ends[-1] = end
+                infos[-1] = info
+                continue
+            starts.append(start)
+            ends.append(end)
+            infos.append(info)
+        return cls(starts, ends, infos)
+
+    def asn(self, ip: str) -> Optional[AsnInfo]:
+        """Return the :class:`AsnInfo` for ``ip`` or ``None``.
+
+        ``None`` on: IPv6, private/loopback/link-local/multicast/reserved
+        addresses, malformed strings, and IPs outside every BGP-announced
+        range in the source dump.
+        """
+        try:
+            addr = ipaddress.ip_address(ip)
+        except ValueError:
+            return None
+        if isinstance(addr, ipaddress.IPv6Address):
+            return None
+        if (
+            addr.is_private
+            or addr.is_loopback
+            or addr.is_link_local
+            or addr.is_multicast
+            or addr.is_reserved
+            or addr.is_unspecified
+        ):
+            return None
+
+        n = int(addr)
+        idx = bisect.bisect_right(self._starts, n) - 1
+        if idx < 0:
+            return None
+        if n <= self._ends[idx]:
+            return self._infos[idx]
+        return None
+
+    def __len__(self) -> int:
+        return len(self._starts)
+
+    # ---------- persistence ----------
+
+    def save(self, path: Path) -> None:
+        """Pickle the lookup to *path* (atomic rename)."""
+        tmp = path.with_suffix(path.suffix + ".tmp")
+        tmp.parent.mkdir(parents=True, exist_ok=True)
+        with tmp.open("wb") as fh:
+            pickle.dump(
+                {
+                    "version": 1,
+                    "starts": self._starts,
+                    "ends": self._ends,
+                    "infos": [(i.asn, i.name) for i in self._infos],
+                },
+                fh,
+                protocol=pickle.HIGHEST_PROTOCOL,
+            )
+        tmp.replace(path)
+
+    @classmethod
+    def load(cls, path: Path) -> "AsnLookup":
+        """Load a pickled lookup from *path*."""
+        with path.open("rb") as fh:
+            data = pickle.load(fh)  # nosec B301 — self-produced file under /var/lib/decnet
+        if data.get("version") != 1:
+            raise ValueError(
+                f"unsupported asn-lookup index version: {data.get('version')!r}"
+            )
+        infos = [AsnInfo(asn=a, name=n) for a, n in data["infos"]]
+        return cls(data["starts"], data["ends"], infos)
--- a/decnet/asn/paths.py
+++ b/decnet/asn/paths.py
@@ -0,0 +1,18 @@
+"""Filesystem layout for ASN data — mirror of :mod:`decnet.geoip.paths`.
+
+``ASN_ROOT`` is where providers drop their raw files and cache indexes.
+Default ``/var/lib/decnet/asn``. Override with ``DECNET_ASN_ROOT`` for
+test harnesses.
+"""
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+ASN_ROOT = Path(os.environ.get("DECNET_ASN_ROOT", "/var/lib/decnet/asn"))
+
+
+def ensure_root() -> Path:
+    """Create ``ASN_ROOT`` if absent and return it. No-op if present."""
+    ASN_ROOT.mkdir(parents=True, exist_ok=True)
+    return ASN_ROOT
--- a/decnet/bus/init.py
+++ b/decnet/bus/init.py
@@ -0,0 +1,18 @@
+"""DECNET ServiceBus — pub/sub notification substrate.
+
+The bus is the notification layer for DECNET's worker constellation.  The DB
+remains the source of truth for anything durable; the bus carries "something
+happened, go look" events.  Delivery is at-most-once, fire-and-forget.
+
+Consumers call :func:`get_bus` from :mod:`decnet.bus.factory`; never import
+transport implementations directly.  The factory selects the backend via
+``DECNET_BUS_TYPE`` (``nats`` or ``fake``) and honors ``DECNET_BUS_ENABLED``.
+
+Topic hierarchy is defined in :mod:`decnet.bus.topics` and locked early so
+consumers can subscribe with stable wildcard patterns.
+"""
+from __future__ import annotations
+
+from decnet.bus.base import BaseBus, Event, Subscription
+
+__all__ = ["BaseBus", "Event", "Subscription"]
--- a/decnet/bus/app.py
+++ b/decnet/bus/app.py
@@ -0,0 +1,92 @@
+"""Process-wide bus singleton for request-serving workers (API, SSE routes).
+
+A single connected :class:`~decnet.bus.base.BaseBus` shared across request
+handlers — opening a UNIX socket per request would be wasteful and add
+latency to the hot path.  The API lifespan is responsible for calling
+:func:`close_app_bus` on shutdown; connect is lazy so tests and
+contract-test mode that never hit a publish/subscribe code path don't
+pay for a bus connection they'll never use.
+
+Failures during :meth:`BaseBus.connect` are swallowed and logged — a
+dead bus must never break request serving.  Publishers should treat a
+``None`` return from :func:`get_app_bus` as "skip this notification",
+same as ``DECNET_BUS_ENABLED=false``.
+
+Connect is **retried with a short backoff** (not one-shot): a startup
+race where the API lifespan hits :func:`get_app_bus` before ``decnet
+bus`` is ready would otherwise poison the singleton for the entire
+process lifetime.  Instead we remember the last failure timestamp and
+let callers retry once ``_RETRY_BACKOFF`` seconds have passed.
+"""
+from __future__ import annotations
+
+import asyncio
+import time
+
+from decnet.bus.base import BaseBus
+from decnet.bus.factory import get_bus
+from decnet.logging import get_logger
+
+log = get_logger("bus.app")
+
+# Publishers in the hot path shouldn't pay connect-retry latency on every
+# call; the dashboard's own 5 s poll interval recovers within one tick
+# once the bus comes up.  A persistently-dead bus only gets a connect
+# attempt every 2 s, not once per request.
+_RETRY_BACKOFF: float = 2.0
+
+_lock = asyncio.Lock()
+_shared: BaseBus | None = None
+_last_failure_ts: float = 0.0
+
+
+async def get_app_bus() -> BaseBus | None:
+    """Return the process-wide connected bus, or ``None`` if unavailable.
+
+    On first call, constructs a client via :func:`get_bus` and awaits
+    ``connect()``.  Subsequent calls return the cached instance.  If a
+    connect attempt raises, the failure timestamp is recorded and
+    subsequent calls within ``_RETRY_BACKOFF`` seconds return ``None``
+    without re-attempting — after the backoff window, the next call
+    retries.  This is what lets the API recover from a
+    ``decnet bus``-started-after-API race without a full API restart.
+    """
+    global _shared, _last_failure_ts
+    if _shared is not None:
+        return _shared
+    if (time.monotonic() - _last_failure_ts) < _RETRY_BACKOFF:
+        return None
+    async with _lock:
+        if _shared is not None:
+            return _shared
+        if (time.monotonic() - _last_failure_ts) < _RETRY_BACKOFF:
+            return None
+        try:
+            candidate = get_bus(client_name="api")
+            await candidate.connect()
+            _shared = candidate
+            _last_failure_ts = 0.0
+            return _shared
+        except Exception as exc:  # noqa: BLE001
+            log.warning("app bus unavailable: %s", exc)
+            _last_failure_ts = time.monotonic()
+            return None
+
+
+async def close_app_bus() -> None:
+    """Close the shared bus if one is open; clear the backoff window.
+
+    Call from the API lifespan shutdown.  Safe to call multiple times.
+    Resetting ``_last_failure_ts`` means the next ``get_app_bus()``
+    after shutdown-and-restart-within-the-same-process (rare, but
+    tests do this) retries immediately instead of honouring a stale
+    backoff.
+    """
+    global _shared, _last_failure_ts
+    bus, _shared = _shared, None
+    _last_failure_ts = 0.0
+    if bus is not None:
+        try:
+            await bus.close()
+        except Exception as exc:  # noqa: BLE001
+            log.warning("app bus close raised: %s", exc)
--- a/decnet/bus/base.py
+++ b/decnet/bus/base.py
@@ -0,0 +1,205 @@
+"""Bus abstractions: the :class:`Event` envelope and the :class:`BaseBus` ABC.
+
+Every transport (NATS, in-process fake, null) speaks this contract.  The
+envelope is versioned (``v``) so future evolution never breaks deployed
+consumers that happen to see a newer event shape.
+
+Subscription model: :meth:`BaseBus.subscribe` returns a :class:`Subscription`
+that is an async context manager AND an async iterator.  The expected usage is:
+
+    async with bus.subscribe("topology.*.mutation.*") as sub:
+        async for event in sub:
+            handle(event)
+
+Leaving the ``async with`` releases the underlying subscription handle; the
+transport is free to drop any buffered events after that point.
+"""
+from __future__ import annotations
+
+import abc
+import asyncio
+import time
+import uuid
+from dataclasses import dataclass, field
+from typing import Any, AsyncIterator
+
+EVENT_SCHEMA_VERSION = 1
+
+
+@dataclass(frozen=True)
+class Event:
+    """The bus envelope.
+
+    ``v`` is the envelope schema version, bumped on incompatible shape
+    changes.  ``type`` is a short discriminator (``"mutation.applied"``,
+    ``"decky.state"``) useful for consumers that subscribe to a broad
+    wildcard and dispatch in Python; it is redundant with the trailing
+    segments of ``topic`` but cheaper to inspect.  ``ts`` is epoch seconds
+    (float).  ``id`` is a random UUID so consumers can de-dupe if they
+    ever see the same event twice (not expected at-most-once, but cheap
+    insurance).
+    """
+
+    topic: str
+    payload: dict[str, Any]
+    type: str = ""
+    v: int = EVENT_SCHEMA_VERSION
+    ts: float = field(default_factory=time.time)
+    id: str = field(default_factory=lambda: uuid.uuid4().hex)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "v": self.v,
+            "id": self.id,
+            "topic": self.topic,
+            "type": self.type,
+            "ts": self.ts,
+            "payload": self.payload,
+        }
+
+    @classmethod
+    def from_dict(cls, topic: str, data: dict[str, Any]) -> "Event":
+        """Reconstruct an Event from a wire-format dict.
+
+        ``topic`` is passed explicitly because the transport knows which
+        subject the message arrived on; trusting a ``topic`` field from the
+        wire would let a misbehaving publisher spoof events on topics they
+        don't actually publish to.
+        """
+        return cls(
+            topic=topic,
+            payload=data.get("payload", {}) or {},
+            type=data.get("type", "") or "",
+            v=int(data.get("v", EVENT_SCHEMA_VERSION)),
+            ts=float(data.get("ts", time.time())),
+            id=data.get("id") or uuid.uuid4().hex,
+        )
+
+
+class Subscription(abc.ABC):
+    """An open subscription — async context manager + async iterator.
+
+    Concrete transports subclass this and implement :meth:`_aclose` plus the
+    async iterator protocol.  Callers should not instantiate directly; use
+    :meth:`BaseBus.subscribe`.
+    """
+
+    def __init__(self, pattern: str) -> None:
+        self.pattern = pattern
+        self._closed = False
+
+    async def __aenter__(self) -> "Subscription":
+        return self
+
+    async def __aexit__(self, *exc: Any) -> None:
+        await self.aclose()
+
+    def __aiter__(self) -> AsyncIterator[Event]:
+        return self
+
+    async def aclose(self) -> None:
+        if self._closed:
+            return
+        self._closed = True
+        await self._aclose()
+
+    @abc.abstractmethod
+    async def __anext__(self) -> Event:  # pragma: no cover - abstract
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    async def _aclose(self) -> None:  # pragma: no cover - abstract
+        raise NotImplementedError
+
+
+class BaseBus(abc.ABC):
+    """Pub/sub transport contract.
+
+    Implementations MUST be safe to ``await connect()`` multiple times and
+    ``await close()`` multiple times.  Publishing to a closed bus raises
+    :class:`RuntimeError`; subscribing to a closed bus does too.
+    """
+
+    @abc.abstractmethod
+    async def connect(self) -> None:
+        """Establish any network/transport resources.  Idempotent."""
+
+    @abc.abstractmethod
+    async def publish(
+        self,
+        topic: str,
+        payload: dict[str, Any],
+        *,
+        event_type: str = "",
+    ) -> None:
+        """Publish *payload* on *topic*.  Fire-and-forget.
+
+        Delivery is at-most-once.  On transport error the implementation
+        logs and returns; it does not raise, because bus losses must not
+        cascade into worker failure (DB is source of truth).
+        """
+
+    @abc.abstractmethod
+    def subscribe(self, pattern: str) -> Subscription:
+        """Return a :class:`Subscription` that yields events matching *pattern*.
+
+        Patterns follow NATS wildcard semantics: ``*`` matches one topic
+        token, ``>`` matches one-or-more trailing tokens.  Examples:
+
+        * ``topology.*.mutation.applied`` — all ``applied`` events for any
+          topology.
+        * ``topology.abc123.mutation.*`` — all mutation states for one
+          topology.
+        * ``topology.>`` — every event under the ``topology`` root.
+        """
+
+    @abc.abstractmethod
+    async def close(self) -> None:
+        """Tear down transport resources.  Idempotent."""
+
+    async def __aenter__(self) -> "BaseBus":
+        await self.connect()
+        return self
+
+    async def __aexit__(self, *exc: Any) -> None:
+        await self.close()
+
+
+# ─── Wildcard matching shared across in-process transports ───────────────────
+
+def matches(pattern: str, topic: str) -> bool:
+    """Return True iff *topic* matches *pattern* under NATS wildcard rules.
+
+    ``*`` matches exactly one non-empty token; ``>`` matches one-or-more
+    trailing tokens (so ``topology.>`` matches ``topology.abc.x`` but not
+    ``topology`` alone).
+    """
+    p_tokens = pattern.split(".")
+    t_tokens = topic.split(".")
+    for i, p in enumerate(p_tokens):
+        if p == ">":
+            # Must have at least one token remaining to match.
+            return i < len(t_tokens)
+        if i >= len(t_tokens):
+            return False
+        if p == "*":
+            if not t_tokens[i]:
+                return False
+            continue
+        if p != t_tokens[i]:
+            return False
+    return len(p_tokens) == len(t_tokens)
+
+
+# Sentinel used by the in-process transports to signal "no more events"
+# through the asyncio.Queue fan-out without inventing a separate control
+# channel.  Not part of the wire protocol.
+_CLOSE_SENTINEL: Any = object()
+
+
+async def _next_or_stop(queue: "asyncio.Queue[Any]") -> Event:
+    """Pop the next item from *queue*, raising ``StopAsyncIteration`` on close."""
+    item = await queue.get()
+    if item is _CLOSE_SENTINEL:
+        raise StopAsyncIteration
+    return item
--- a/decnet/bus/factory.py
+++ b/decnet/bus/factory.py
@@ -0,0 +1,85 @@
+"""Bus factory — selects a :class:`~decnet.bus.base.BaseBus` implementation.
+
+Dispatch key: the ``DECNET_BUS_TYPE`` environment variable.
+
+* ``unix`` (default) → :class:`~decnet.bus.unix_client.UnixSocketBus`
+* ``fake``           → :class:`~decnet.bus.fake.FakeBus` (in-process)
+
+If ``DECNET_BUS_ENABLED`` is ``"false"`` the factory short-circuits to
+:class:`~decnet.bus.fake.NullBus` regardless of ``DECNET_BUS_TYPE`` — a
+cheap way for dev environments to run workers without a bus daemon.
+
+Mirrors :mod:`decnet.web.db.factory` (lazy imports inside each branch,
+env-driven dispatch, optional telemetry wrapping).  Callers MUST use
+:func:`get_bus` rather than instantiating transports directly.
+"""
+from __future__ import annotations
+
+import os
+from typing import Any
+
+from decnet.bus.base import BaseBus
+
+
+def get_bus(**kwargs: Any) -> BaseBus:
+    """Instantiate the bus implementation selected by environment.
+
+    Keyword arguments are forwarded to the concrete transport:
+
+    * ``UnixSocketBus`` accepts ``socket_path`` (overrides
+      ``DECNET_BUS_SOCKET``) and ``client_name``.
+    * ``FakeBus`` accepts ``queue_size``.
+    """
+    if os.environ.get("DECNET_BUS_ENABLED", "true").lower() == "false":
+        from decnet.bus.fake import NullBus
+        return NullBus()
+
+    bus_type = os.environ.get("DECNET_BUS_TYPE", "unix").lower()
+
+    if bus_type == "unix":
+        from decnet.bus.unix_client import UnixSocketBus
+        socket_path = kwargs.pop("socket_path", None) or _default_socket_path()
+        bus: BaseBus = UnixSocketBus(socket_path=socket_path, **kwargs)
+    elif bus_type == "fake":
+        from decnet.bus.fake import FakeBus
+        bus = FakeBus(**kwargs)
+    else:
+        raise ValueError(f"Unsupported bus type: {bus_type}")
+
+    return _maybe_wrap_telemetry(bus)
+
+
+def _default_socket_path() -> str:
+    """Return the bus socket path honoring ``DECNET_BUS_SOCKET`` and falling
+    back to ``/run/decnet/bus.sock`` → ``~/.decnet/bus.sock``.
+
+    The runtime path (``/run/decnet``) is preferred because systemd
+    ``RuntimeDirectory=decnet`` sets it up with the right perms; the home
+    fallback keeps dev boxes usable without systemd.
+    """
+    explicit = os.environ.get("DECNET_BUS_SOCKET")
+    if explicit:
+        return explicit
+
+    runtime_dir = "/run/decnet"
+    if os.path.isdir(runtime_dir) and os.access(runtime_dir, os.W_OK):
+        return f"{runtime_dir}/bus.sock"
+    return os.path.expanduser("~/.decnet/bus.sock")
+
+
+def _maybe_wrap_telemetry(bus: BaseBus) -> BaseBus:
+    """Wrap *bus* in a tracing proxy if OTEL is enabled, else return as-is.
+
+    Uses :func:`decnet.telemetry.wrap_repository` as the underlying proxy —
+    its implementation is generic (wraps any async method in a span), so we
+    reuse it with a bus-appropriate tracer name.  If telemetry isn't wired
+    up at all we no-op.
+    """
+    try:
+        from decnet.telemetry import wrap_repository  # type: ignore[attr-defined]
+    except ImportError:
+        return bus
+    try:
+        return wrap_repository(bus)
+    except Exception:  # pragma: no cover - defensive
+        return bus
--- a/decnet/bus/fake.py
+++ b/decnet/bus/fake.py
@@ -0,0 +1,183 @@
+"""In-process bus transports.
+
+* :class:`FakeBus` — real pub/sub semantics without touching a socket.  Used
+  by unit tests and anywhere ``DECNET_BUS_TYPE=fake`` is set.  Lets code
+  that depends on the bus be exercised entirely inside a single event loop,
+  matching the DECNET testing convention of not opening real network
+  sockets from unit tests.
+* :class:`NullBus` — no-op.  Returned by :func:`~decnet.bus.factory.get_bus`
+  when ``DECNET_BUS_ENABLED=false`` so workers can start cleanly in dev
+  environments where no bus daemon is running.  Publishes are dropped;
+  subscriptions yield nothing and close cleanly.
+"""
+from __future__ import annotations
+
+import asyncio
+from typing import Any
+
+from decnet.bus.base import (
+    BaseBus,
+    Event,
+    Subscription,
+    _CLOSE_SENTINEL,
+    matches,
+)
+from decnet.logging import get_logger
+
+log = get_logger("bus.fake")
+
+# Per-subscriber bounded queue: backpressure policy is drop-oldest so a slow
+# consumer cannot stall publishers (the invariant — DB is the source of
+# truth — makes dropped events acceptable).
+_DEFAULT_QUEUE_SIZE = 1024
+
+
+# ─── FakeBus ─────────────────────────────────────────────────────────────────
+
+
+class _FakeSubscription(Subscription):
+    """Subscription backed by an :class:`asyncio.Queue` fed from
+    :meth:`FakeBus.publish`.  Unregisters itself on close."""
+
+    def __init__(self, bus: "FakeBus", pattern: str, queue: "asyncio.Queue[Any]") -> None:
+        super().__init__(pattern)
+        self._bus = bus
+        self._queue = queue
+
+    async def __anext__(self) -> Event:
+        if self._closed:
+            raise StopAsyncIteration
+        item = await self._queue.get()
+        if item is _CLOSE_SENTINEL:
+            raise StopAsyncIteration
+        return item
+
+    async def _aclose(self) -> None:
+        self._bus._unregister(self)
+        # Unblock any pending __anext__ waiter.
+        try:
+            self._queue.put_nowait(_CLOSE_SENTINEL)
+        except asyncio.QueueFull:
+            pass
+
+
+class FakeBus(BaseBus):
+    """In-process pub/sub.
+
+    Publishes iterate every active subscription and enqueue the event on
+    the ones whose pattern matches the topic.  If a subscriber's queue is
+    full, the oldest event is discarded to make room — same at-most-once
+    semantics as the real UNIX-socket transport.
+    """
+
+    def __init__(self, queue_size: int = _DEFAULT_QUEUE_SIZE) -> None:
+        self._queue_size = queue_size
+        self._subs: list[_FakeSubscription] = []
+        self._connected = False
+        self._closed = False
+        self._lock = asyncio.Lock()
+
+    async def connect(self) -> None:
+        self._connected = True
+
+    async def publish(
+        self,
+        topic: str,
+        payload: dict[str, Any],
+        *,
+        event_type: str = "",
+    ) -> None:
+        if self._closed:
+            raise RuntimeError("publish on closed bus")
+        event = Event(topic=topic, payload=payload, type=event_type)
+        async with self._lock:
+            targets = [s for s in self._subs if matches(s.pattern, topic)]
+        for sub in targets:
+            _enqueue_drop_oldest(sub._queue, event)
+
+    def subscribe(self, pattern: str) -> Subscription:
+        if self._closed:
+            raise RuntimeError("subscribe on closed bus")
+        queue: asyncio.Queue[Any] = asyncio.Queue(maxsize=self._queue_size)
+        sub = _FakeSubscription(self, pattern, queue)
+        self._subs.append(sub)
+        return sub
+
+    def _unregister(self, sub: _FakeSubscription) -> None:
+        try:
+            self._subs.remove(sub)
+        except ValueError:
+            pass
+
+    async def close(self) -> None:
+        if self._closed:
+            return
+        self._closed = True
+        # Wake every still-open subscription so iterators unblock cleanly.
+        for sub in list(self._subs):
+            try:
+                sub._queue.put_nowait(_CLOSE_SENTINEL)
+            except asyncio.QueueFull:
+                pass
+        self._subs.clear()
+
+
+def _enqueue_drop_oldest(queue: "asyncio.Queue[Any]", event: Event) -> None:
+    """Put *event* on *queue*, dropping the oldest item if the queue is full.
+
+    Factored out so both FakeBus and the real UNIX server share the exact
+    same backpressure policy.
+    """
+    while True:
+        try:
+            queue.put_nowait(event)
+            return
+        except asyncio.QueueFull:
+            try:
+                dropped = queue.get_nowait()
+                log.warning(
+                    "bus.fake: subscriber queue full, dropped %s", getattr(dropped, "topic", "?")
+                )
+            except asyncio.QueueEmpty:
+                return
+
+
+# ─── NullBus ─────────────────────────────────────────────────────────────────
+
+
+class _NullSubscription(Subscription):
+    """A subscription that never yields and closes immediately on iteration."""
+
+    async def __anext__(self) -> Event:
+        raise StopAsyncIteration
+
+    async def _aclose(self) -> None:
+        return
+
+
+class NullBus(BaseBus):
+    """No-op bus used when ``DECNET_BUS_ENABLED=false``.
+
+    Publishes are silently dropped; subscriptions are empty.  Intended for
+    dev environments where no bus daemon is running — the process starts
+    cleanly, code that publishes doesn't need feature flags, and nothing
+    ever blocks on a subscriber.
+    """
+
+    async def connect(self) -> None:
+        return
+
+    async def publish(
+        self,
+        topic: str,
+        payload: dict[str, Any],
+        *,
+        event_type: str = "",
+    ) -> None:
+        return
+
+    def subscribe(self, pattern: str) -> Subscription:
+        return _NullSubscription(pattern)
+
+    async def close(self) -> None:
+        return
--- a/decnet/bus/protocol.py
+++ b/decnet/bus/protocol.py
@@ -0,0 +1,144 @@
+"""Wire protocol for the DECNET bus UNIX-socket transport.
+
+Frame layout:
+
+    <VERB> [<args ...>]\\n          # ASCII header, single line, no trailing space
+    <4-byte big-endian body length>
+    <body>                          # orjson-serialized dict, or empty (length 0)
+
+Verbs:
+
+* ``HELLO <client-name>`` — optional greeting, logged by server.  Body empty.
+* ``PUB <topic>``          — publisher → server.  Body = payload dict.
+* ``SUB <pattern>``        — subscriber → server.  Body empty.
+* ``UNSUB <pattern>``      — subscriber → server.  Body empty.
+* ``EVT <topic>``          — server → subscriber.  Body = payload dict (wrapped
+                             in an :class:`~decnet.bus.base.Event` envelope).
+* ``BYE``                  — either direction.  Body empty.  Graceful shutdown.
+
+Parsing rules:
+
+* The header is a single line terminated by ``\\n`` (LF).  ``\\r`` is tolerated
+  but not required.
+* Header tokens are whitespace-separated.  The first token is the verb;
+  everything after is verb-specific.  We split on the first space only so
+  topics / patterns with quoted content are not supported (they are not
+  needed — topic segments forbid whitespace per :mod:`decnet.bus.topics`).
+* Maximum header length is 4096 bytes; maximum body length is 1 MiB.  Beyond
+  those, the connection is dropped with a logged error.  This is a honeypot
+  framework, not a general-purpose message broker; a malformed frame is
+  treated as hostile.
+"""
+from __future__ import annotations
+
+import asyncio
+import struct
+from dataclasses import dataclass
+from typing import Any
+
+import orjson
+
+MAX_HEADER_BYTES = 4096
+MAX_BODY_BYTES = 1 * 1024 * 1024  # 1 MiB
+
+# Verb constants (callers should reference these, not bare strings).
+HELLO = "HELLO"
+PUB = "PUB"
+SUB = "SUB"
+UNSUB = "UNSUB"
+EVT = "EVT"
+BYE = "BYE"
+
+_VALID_VERBS = frozenset({HELLO, PUB, SUB, UNSUB, EVT, BYE})
+
+
+class ProtocolError(Exception):
+    """Malformed or oversized frame.  Callers should close the connection."""
+
+
+@dataclass(frozen=True)
+class Frame:
+    """A parsed frame.  ``body`` is the raw (unparsed) body bytes — callers
+    decide whether to orjson-decode it (the protocol does not know whether
+    a given verb expects a dict body or an empty one).
+    """
+
+    verb: str
+    args: str            # everything after the verb on the header line, trimmed
+    body: bytes
+
+
+def encode(verb: str, args: str = "", body: dict[str, Any] | None = None) -> bytes:
+    """Serialize a frame.
+
+    *body* is a dict that will be orjson-encoded, or ``None`` for an empty
+    body.  The header line is written verbatim — callers must supply args
+    that are free of ``\\n``.
+    """
+    if verb not in _VALID_VERBS:
+        raise ProtocolError(f"unknown verb {verb!r}")
+    if "\n" in args or "\r" in args:
+        raise ProtocolError("args must not contain newline characters")
+
+    body_bytes = b"" if body is None else orjson.dumps(body)
+    if len(body_bytes) > MAX_BODY_BYTES:
+        raise ProtocolError(
+            f"body {len(body_bytes)} bytes exceeds max {MAX_BODY_BYTES}"
+        )
+
+    header = f"{verb} {args}".rstrip() + "\n"
+    header_bytes = header.encode("ascii")
+    if len(header_bytes) > MAX_HEADER_BYTES:
+        raise ProtocolError(
+            f"header {len(header_bytes)} bytes exceeds max {MAX_HEADER_BYTES}"
+        )
+    return header_bytes + struct.pack(">I", len(body_bytes)) + body_bytes
+
+
+async def read_frame(reader: asyncio.StreamReader) -> Frame | None:
+    """Read one frame from *reader*.
+
+    Returns ``None`` on clean EOF before a new frame starts.  Raises
+    :class:`ProtocolError` on malformed input (caller should close the
+    connection).
+    """
+    try:
+        header = await reader.readuntil(b"\n")
+    except asyncio.IncompleteReadError as exc:
+        if not exc.partial:
+            return None
+        raise ProtocolError("connection closed mid-header") from exc
+    except asyncio.LimitOverrunError as exc:
+        raise ProtocolError("header exceeded buffer limit") from exc
+
+    if len(header) > MAX_HEADER_BYTES:
+        raise ProtocolError(f"header {len(header)} bytes exceeds max")
+
+    line = header.rstrip(b"\r\n").decode("ascii", errors="strict")
+    if not line:
+        raise ProtocolError("empty header line")
+
+    verb, _, args = line.partition(" ")
+    if verb not in _VALID_VERBS:
+        raise ProtocolError(f"unknown verb {verb!r}")
+
+    length_bytes = await reader.readexactly(4)
+    (body_len,) = struct.unpack(">I", length_bytes)
+    if body_len > MAX_BODY_BYTES:
+        raise ProtocolError(f"body length {body_len} exceeds max")
+
+    body = await reader.readexactly(body_len) if body_len else b""
+    return Frame(verb=verb, args=args.strip(), body=body)
+
+
+def decode_body(body: bytes) -> dict[str, Any]:
+    """Decode a frame body as a JSON dict.  Empty body → empty dict."""
+    if not body:
+        return {}
+    try:
+        obj = orjson.loads(body)
+    except orjson.JSONDecodeError as exc:
+        raise ProtocolError(f"body is not valid JSON: {exc}") from exc
+    if not isinstance(obj, dict):
+        raise ProtocolError(f"body must be a JSON object, got {type(obj).__name__}")
+    return obj
--- a/decnet/bus/publish.py
+++ b/decnet/bus/publish.py
@@ -0,0 +1,211 @@
+"""Fire-and-forget publish helpers shared across every worker.
+
+Lifted out of ``decnet/mutator/engine.py`` once a second caller showed up
+(DEBT-031).  Keeping one implementation means the "never break the worker
+loop" guarantee is audited in exactly one place.
+"""
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import os
+import signal
+import time
+from typing import Any, Callable
+
+from decnet.bus import topics as _topics
+from decnet.bus.base import BaseBus
+from decnet.logging import get_logger
+
+log = get_logger("bus.publish")
+
+
+async def publish_safely(
+    bus: BaseBus | None,
+    topic: str,
+    payload: dict[str, Any],
+    event_type: str = "",
+) -> None:
+    """Publish on *bus* without ever raising back at the caller.
+
+    The DB row (or equivalent side-effect) has already been committed by
+    the time a worker calls this; the bus is the notification layer, not
+    the source of truth.  A dropped publish is at most a few seconds of
+    UI latency until the next poll tick.  A raised exception here, by
+    contrast, would crash the worker — which is strictly worse.
+    """
+    if bus is None:
+        return
+    try:
+        await bus.publish(topic, payload, event_type=event_type)
+    except Exception as exc:  # noqa: BLE001
+        log.warning("bus publish failed topic=%s: %s", topic, exc)
+
+
+def make_thread_safe_publisher(
+    bus: BaseBus | None,
+    loop: asyncio.AbstractEventLoop,
+) -> Callable[[str, dict[str, Any], str], None]:
+    """Build a sync callable that marshals publishes back to *loop*.
+
+    Workers that run their hot paths in a worker thread (scapy sniff loop,
+    ``asyncio.to_thread`` probes, blocking socket reads) cannot ``await``
+    the bus directly.  This helper returns a plain function that schedules
+    the publish on *loop* via ``run_coroutine_threadsafe`` and returns
+    immediately — the calling thread is never blocked on the publish.
+
+    A ``None`` bus yields a no-op callable, matching the degraded-mode
+    contract the rest of this module already upholds.
+    """
+    if bus is None:
+        return lambda _topic, _payload, _event_type="": None
+
+    def _publish(topic: str, payload: dict[str, Any], event_type: str = "") -> None:
+        # Stream threads may keep draining after the bus owner closed it
+        # (shutdown race).  Short-circuit here so we don't marshal a
+        # coroutine onto a dead loop just to have publish_safely swallow
+        # it.  bus.publish's own WARN-once guard handles the rare case
+        # where _closed flips between this check and the coroutine
+        # actually running.
+        if getattr(bus, "_closed", False):
+            return
+        try:
+            asyncio.run_coroutine_threadsafe(
+                publish_safely(bus, topic, payload, event_type=event_type),
+                loop,
+            )
+        except Exception as exc:  # noqa: BLE001
+            log.debug("cross-thread bus publish failed topic=%s: %s", topic, exc)
+
+    return _publish
+
+
+async def run_health_heartbeat(
+    bus: BaseBus | None,
+    worker: str,
+    *,
+    interval: float = 30.0,
+    extra: Callable[[], dict[str, Any]] | None = None,
+) -> None:
+    """Publish ``system.<worker>.health`` every *interval* seconds.
+
+    Standard heartbeat loop shared across agent/forwarder/updater.  Emits
+    ``{"worker": <name>, "ts": <unix-ts>, **extra()}`` on each tick.  A
+    ``None`` bus turns the loop into a no-op sleep cycle — still cancellable
+    so the caller can use the same ``asyncio.create_task``/``.cancel()``
+    pattern regardless of bus state.
+
+    Cancellation-safe: unwraps the ``CancelledError`` so callers awaiting
+    the task during shutdown see a clean exit.
+    """
+    topic = _topics.system_health(worker)
+    with contextlib.suppress(asyncio.CancelledError):
+        while True:
+            payload: dict[str, Any] = {"worker": worker, "ts": time.time()}
+            if extra is not None:
+                try:
+                    payload.update(extra())
+                except Exception as exc:  # noqa: BLE001
+                    log.debug("heartbeat extra() failed worker=%s: %s", worker, exc)
+            await publish_safely(bus, topic, payload, event_type=_topics.SYSTEM_HEALTH)
+            await asyncio.sleep(interval)
+
+
+async def run_control_listener(
+    bus: BaseBus | None,
+    worker: str,
+    shutdown: asyncio.Event,
+) -> None:
+    """Subscribe to ``system.<worker>.control`` and honour stop intents.
+
+    On a well-formed ``{"action": "stop", ...}`` message the function sets
+    *shutdown* and returns — the worker's main loop is expected to check
+    the event and unwind cleanly, matching the SIGTERM path.
+
+    Malformed payloads (missing/unknown action, non-dict, exception from
+    the transport) are logged and ignored.  A ``None`` bus yields a noop
+    coroutine that simply awaits *shutdown* — callers can ``create_task``
+    this unconditionally regardless of bus state.
+
+    Cancellation-safe.
+    """
+    if bus is None:
+        with contextlib.suppress(asyncio.CancelledError):
+            await shutdown.wait()
+        return
+
+    topic = _topics.system_control(worker)
+    with contextlib.suppress(asyncio.CancelledError):
+        try:
+            async with bus.subscribe(topic) as sub:
+                async for event in sub:
+                    payload = event.payload or {}
+                    action = payload.get("action")
+                    requested_by = payload.get("requested_by", "<unknown>")
+                    if action == _topics.WORKER_CONTROL_STOP:
+                        log.info(
+                            "control: stop requested worker=%s by=%s",
+                            worker, requested_by,
+                        )
+                        shutdown.set()
+                        return
+                    log.debug(
+                        "control: ignoring unknown action worker=%s action=%r",
+                        worker, action,
+                    )
+        except Exception as exc:  # noqa: BLE001
+            log.warning(
+                "control listener failed worker=%s: %s — shutdown via bus disabled",
+                worker, exc,
+            )
+
+
+async def run_control_listener_signal(
+    bus: BaseBus | None,
+    worker: str,
+) -> None:
+    """Like :func:`run_control_listener` but signals the process on stop.
+
+    Preferred for workers whose main loop is a blocking thread
+    (container-log tail, PTY read, scapy sniff) — wiring an
+    ``asyncio.Event`` through the thread boundary is error-prone, and
+    every DECNET worker already has systemd-equivalent SIGTERM cleanup.
+    A SIGTERM self-signal routes the stop through that same path
+    without inventing a second shutdown mechanism.
+
+    Cancellation-safe.  Never raises: a failed self-signal is logged
+    and the loop simply exits (admin can fall back to ``systemctl``).
+    """
+    if bus is None:
+        return
+
+    topic = _topics.system_control(worker)
+    with contextlib.suppress(asyncio.CancelledError):
+        try:
+            async with bus.subscribe(topic) as sub:
+                async for event in sub:
+                    payload = event.payload or {}
+                    action = payload.get("action")
+                    requested_by = payload.get("requested_by", "<unknown>")
+                    if action == _topics.WORKER_CONTROL_STOP:
+                        log.info(
+                            "control: stop requested worker=%s by=%s → SIGTERM self",
+                            worker, requested_by,
+                        )
+                        try:
+                            os.kill(os.getpid(), signal.SIGTERM)
+                        except Exception as exc:  # noqa: BLE001
+                            log.warning(
+                                "control: self-signal failed worker=%s: %s",
+                                worker, exc,
+                            )
+                        return
+                    log.debug(
+                        "control: ignoring unknown action worker=%s action=%r",
+                        worker, action,
+                    )
+        except Exception as exc:  # noqa: BLE001
+            log.warning(
+                "control signal listener failed worker=%s: %s",
+                worker, exc,
+            )
--- a/decnet/bus/topics.py
+++ b/decnet/bus/topics.py
@@ -0,0 +1,398 @@
+"""Canonical topic hierarchy for the DECNET ServiceBus.
+
+Locked early so consumers can subscribe with stable wildcard patterns.
+Adding new topic families is fine; **renaming** existing ones is a breaking
+change for every subscriber and requires a coordinated rollout.
+
+Token structure (NATS-style, dot-separated):
+
+    topology.{topology_id}.mutation.{state}
+    topology.{topology_id}.status
+    decky.{decky_id}.state
+    decky.{decky_id}.traffic
+    orchestrator.traffic.{decky_id}
+    orchestrator.file.{decky_id}
+    orchestrator.email.{decky_id}
+    attacker.observed
+    attacker.scored
+    attacker.session.started
+    attacker.session.ended
+    identity.formed
+    identity.observation.linked
+    identity.merged
+    identity.unmerged
+    identity.campaign.assigned
+    campaign.formed
+    campaign.identity.assigned
+    campaign.merged
+    campaign.unmerged
+    credential.captured
+    credential.reuse.detected
+    canary.{token_id}.triggered
+    canary.{token_id}.placed
+    canary.{token_id}.revoked
+    system.log
+    system.bus.health
+    system.{worker}.health
+
+Wildcards (per :func:`decnet.bus.base.matches`):
+
+* ``*`` matches exactly one token.
+* ``>`` matches one-or-more trailing tokens (so ``topology.>`` matches
+  ``topology.abc.status`` but not the bare root ``topology``).
+"""
+from __future__ import annotations
+
+# ─── Root prefixes ───────────────────────────────────────────────────────────
+
+TOPOLOGY = "topology"
+DECKY = "decky"
+ATTACKER = "attacker"
+IDENTITY = "identity"
+CAMPAIGN = "campaign"
+SYSTEM = "system"
+CREDENTIAL = "credential"
+ORCHESTRATOR = "orchestrator"
+CANARY = "canary"
+
+
+# ─── Leaf event-type constants (the last segment of each topic) ──────────────
+
+# Topology mutation lifecycle states — keep in sync with TopologyMutation.state
+# in decnet/web/db/models.py; the bus topic mirrors the DB state machine.
+MUTATION_ENQUEUED = "enqueued"
+MUTATION_APPLYING = "applying"
+MUTATION_APPLIED = "applied"
+MUTATION_FAILED = "failed"
+
+# Topology-level status transitions (topology.{id}.status): fires when the
+# topology row's status column changes (pending/deploying/active/degraded/failed).
+TOPOLOGY_STATUS = "status"
+
+# Decky-level event types (second token).
+DECKY_STATE = "state"
+DECKY_TRAFFIC = "traffic"
+# On-demand mutation request — published by the API/CLI/UI, consumed by
+# the mutator's watch loop to force an immediate mutation of one decky
+# without waiting for its scheduled interval.  Underscored (not dotted)
+# to stay a single NATS token so the builder's validator accepts it.
+DECKY_MUTATE_REQUEST = "mutate_request"
+# Mutation transition event — distinct from DECKY_STATE ("current
+# shape") because a mutation is a *transition* that carries old/new
+# services + trigger + timing.  Correlator consumes these (via the
+# syslog sidechannel too) to interleave substrate-change markers into
+# attacker traversals.
+DECKY_MUTATION = "mutation"
+
+# Attacker event types (second token under the ``attacker`` root).  First
+# sighting, session boundary transitions, and score-threshold crossings
+# published by correlator + profiler.  Consumers typically subscribe to
+# the wildcard ``attacker.>``.
+ATTACKER_OBSERVED = "observed"
+ATTACKER_SCORED = "scored"
+# Published once per successful active probe result (JARM/HASSH/TCPfp).
+# Distinct from ``observed`` which is the correlator's first-sight signal —
+# a fingerprint is additional evidence about an already-observed attacker.
+ATTACKER_FINGERPRINTED = "fingerprinted"
+ATTACKER_SESSION_STARTED = "session.started"
+ATTACKER_SESSION_ENDED = "session.ended"
+# Published by the ``decnet enrich`` worker after an enrichment pass
+# succeeds for an attacker IP (one or more 3rd-party intel providers
+# returned a verdict).  Payload carries the aggregate verdict + per-
+# provider summary so SIEM-bound webhooks don't need to re-query the DB.
+ATTACKER_INTEL_ENRICHED = "intel.enriched"
+
+# Identity-resolution event types (second/third tokens under ``identity``).
+# Published by the (future) clusterer worker — see
+# development/IDENTITY_RESOLUTION.md.  Constants ship in this commit;
+# no publishers exist yet, but consumers (webhook worker, dashboard
+# SSE relay) can subscribe to ``identity.>`` from day one and receive
+# events the instant the clusterer comes online.
+#
+#   identity.formed              — clusterer creates a new identity from
+#                                  one or more observations
+#   identity.observation.linked  — observation attached to an existing
+#                                  identity (or reattached from another)
+#   identity.merged              — two identities collapsed; loser gets
+#                                  ``merged_into_uuid`` set, subscribers
+#                                  re-key cached references to the winner
+#   identity.unmerged            — revocable-merge undo: contradicting
+#                                  evidence cleared ``merged_into_uuid``
+#                                  and re-split observations.  The
+#                                  resurrected side's UUID is the same
+#                                  as the prior loser, so subscribers
+#                                  that cached references to the loser
+#                                  during the merged interval can
+#                                  re-attach without a new lookup.
+#
+# ``identity.campaign.assigned`` is deferred; it ships when the campaign
+# clusterer ships.  YAGNI before then.
+IDENTITY_FORMED = "formed"
+IDENTITY_OBSERVATION_LINKED = "observation.linked"
+IDENTITY_MERGED = "merged"
+IDENTITY_UNMERGED = "unmerged"
+# Campaign-clusterer cross-family event — fires under ``identity.>`` so
+# identity-stream subscribers (e.g. the IdentityDetail SSE client) get
+# notified the moment an identity's ``campaign_id`` changes without
+# having to subscribe to the campaign topic family.  The same event
+# fires under ``campaign.identity.assigned`` for campaign-side
+# subscribers.
+IDENTITY_CAMPAIGN_ASSIGNED = "campaign.assigned"
+
+# Campaign-clusterer event types (second/third tokens under
+# ``campaign``).  Mirror of the identity family at the layer above:
+# campaigns group identities into operations, and the clusterer
+# publishes the same form / link / merge / unmerge lifecycle.
+#
+#   campaign.formed              — clusterer creates a new campaign from
+#                                  one or more identities
+#   campaign.identity.assigned   — identity attached to an existing
+#                                  campaign (or reassigned from another)
+#   campaign.merged              — two campaigns collapsed; loser gets
+#                                  ``merged_into_uuid`` set, subscribers
+#                                  re-key cached references to the winner
+#   campaign.unmerged            — revocable-merge undo: contradicting
+#                                  evidence cleared ``merged_into_uuid``
+#                                  and re-split identities
+CAMPAIGN_FORMED = "formed"
+CAMPAIGN_IDENTITY_ASSIGNED = "identity.assigned"
+CAMPAIGN_MERGED = "merged"
+CAMPAIGN_UNMERGED = "unmerged"
+
+# Credential event types (second/third tokens under ``credential``).
+# ``credential.captured`` fires once per upserted Credential row — the
+# correlator listens for it and runs the cred-reuse query in response,
+# so reuse detection latency is sub-second after a fresh capture.
+# ``credential.reuse.detected`` fires when the correlator inserts a new
+# CredentialReuse row or grows an existing one (added decky/service/IP).
+CREDENTIAL_CAPTURED = "captured"
+CREDENTIAL_REUSE_DETECTED = "reuse.detected"
+
+# Canary-token event types (third token under ``canary``).
+#
+#   canary.{token_id}.placed     — orchestrator/API successfully planted a
+#                                  canary artifact inside a decky's
+#                                  filesystem (or persisted a passive token
+#                                  that has no callback wiring).  Lets
+#                                  dashboards reflect baseline coverage in
+#                                  real time without a DB poll.
+#   canary.{token_id}.triggered  — ``decnet canary`` worker observed a
+#                                  callback hit (HTTP slug or DNS subdomain
+#                                  lookup) for the token.  Payload carries
+#                                  ``src_ip``, ``user_agent``, ``request_path``
+#                                  and any DNS qname so downstream
+#                                  consumers (correlator, webhook fanout)
+#                                  can attribute and forward without a
+#                                  follow-up DB read.
+#   canary.{token_id}.revoked    — operator removed a token; planter unlinked
+#                                  the file (best-effort) and the row was
+#                                  marked ``revoked``.  Subscribers may
+#                                  evict cached lookups by token id.
+CANARY_PLACED = "placed"
+CANARY_TRIGGERED = "triggered"
+CANARY_REVOKED = "revoked"
+
+# Orchestrator event types (second token under ``orchestrator``).  The
+# orchestrator worker publishes one of these per synthetic action it
+# drives against a decky — cheap inter-decky traffic and filesystem
+# mutations whose role is to keep the honeypot from looking suspiciously
+# static.  Always nested with the destination decky uuid as the third
+# token, so consumers can subscribe to a single decky's life-injection
+# stream via ``orchestrator.*.<decky_uuid>``.
+ORCHESTRATOR_TRAFFIC = "traffic"
+ORCHESTRATOR_FILE = "file"
+# Emailgen — published by the ``decnet emailgen`` worker once per generated
+# fake email delivered into a mail decky's maildir.  Third token is the
+# destination mail-decky uuid (the IMAP/POP3 host serving the mailbox),
+# matching the ``orchestrator.*.<decky_uuid>`` subscription pattern.
+ORCHESTRATOR_EMAIL = "email"
+
+# System event types.
+SYSTEM_LOG = "log"
+SYSTEM_BUS_HEALTH = "bus.health"
+# Worker-health leaf — built per-worker as ``system.<worker>.health`` via
+# :func:`system_health`.  The leaf constant stays the same across workers;
+# the worker name goes in the middle token.
+SYSTEM_HEALTH = "health"
+# Worker-control leaf — built per-worker as ``system.<worker>.control`` via
+# :func:`system_control`.  Admin-originated stop intents travel on this
+# topic; each worker subscribes to its own.
+SYSTEM_CONTROL = "control"
+
+# Control payload ``action`` values — the wire vocabulary.  Only ``stop`` is
+# handled in v1; ``start`` is reserved because a stopped worker has no
+# subscriber, so starting requires external supervision (systemd).
+WORKER_CONTROL_STOP = "stop"
+WORKER_CONTROL_START = "start"
+
+# Webhook subscription-set changed — published by the CRUD router after any
+# create / update / delete on WebhookSubscription so the webhook worker can
+# reload its in-memory subscription list and re-subscribe to the new union
+# of patterns. Payload is currently empty; consumers only need the signal.
+WEBHOOK_SUBSCRIPTIONS_CHANGED = "system.webhook.subscriptions_changed"
+
+
+# ─── Builders ────────────────────────────────────────────────────────────────
+
+def topology_mutation(topology_id: str, state: str) -> str:
+    """Build ``topology.<id>.mutation.<state>``.
+
+    *state* should be one of the ``MUTATION_*`` constants.
+    """
+    _reject_tokens(topology_id, state)
+    return f"{TOPOLOGY}.{topology_id}.mutation.{state}"
+
+
+def topology_status(topology_id: str) -> str:
+    """Build ``topology.<id>.status``."""
+    _reject_tokens(topology_id)
+    return f"{TOPOLOGY}.{topology_id}.{TOPOLOGY_STATUS}"
+
+
+def decky(decky_id: str, event_type: str) -> str:
+    """Build ``decky.<id>.<event_type>``.
+
+    *event_type* is typically one of ``DECKY_STATE`` or ``DECKY_TRAFFIC``.
+    """
+    _reject_tokens(decky_id, event_type)
+    return f"{DECKY}.{decky_id}.{event_type}"
+
+
+def decky_mutation(decky_id: str) -> str:
+    """Build ``decky.<id>.mutation``."""
+    _reject_tokens(decky_id)
+    return f"{DECKY}.{decky_id}.{DECKY_MUTATION}"
+
+
+def system(event_type: str) -> str:
+    """Build ``system.<event_type>``.
+
+    *event_type* may itself contain dots (e.g. ``bus.health``) — we don't
+    re-validate the already-constant leaves; this just prefixes.
+    """
+    if not event_type:
+        raise ValueError("system topic requires a non-empty event_type")
+    return f"{SYSTEM}.{event_type}"
+
+
+def credential(event_type: str) -> str:
+    """Build ``credential.<event_type>``.
+
+    *event_type* is typically one of :data:`CREDENTIAL_CAPTURED` or
+    :data:`CREDENTIAL_REUSE_DETECTED`. Dotted leaves
+    (``reuse.detected``) are permitted — same rationale as
+    :func:`system`.
+    """
+    if not event_type:
+        raise ValueError("credential topic requires a non-empty event_type")
+    return f"{CREDENTIAL}.{event_type}"
+
+
+def attacker(event_type: str) -> str:
+    """Build ``attacker.<event_type>``.
+
+    *event_type* is typically one of ``ATTACKER_OBSERVED``,
+    ``ATTACKER_SCORED``, ``ATTACKER_SESSION_STARTED``,
+    ``ATTACKER_SESSION_ENDED``.  Dotted leaves (``session.started``) are
+    permitted — same rationale as :func:`system`.
+    """
+    if not event_type:
+        raise ValueError("attacker topic requires a non-empty event_type")
+    return f"{ATTACKER}.{event_type}"
+
+
+def campaign(event_type: str) -> str:
+    """Build ``campaign.<event_type>``.
+
+    *event_type* is typically one of :data:`CAMPAIGN_FORMED`,
+    :data:`CAMPAIGN_IDENTITY_ASSIGNED`, :data:`CAMPAIGN_MERGED`, or
+    :data:`CAMPAIGN_UNMERGED`. Dotted leaves (``identity.assigned``)
+    are permitted — same rationale as :func:`system`.
+    """
+    if not event_type:
+        raise ValueError("campaign topic requires a non-empty event_type")
+    return f"{CAMPAIGN}.{event_type}"
+
+
+def identity(event_type: str) -> str:
+    """Build ``identity.<event_type>``.
+
+    *event_type* is typically one of :data:`IDENTITY_FORMED`,
+    :data:`IDENTITY_OBSERVATION_LINKED`, :data:`IDENTITY_MERGED`, or
+    :data:`IDENTITY_UNMERGED`. Dotted leaves (``observation.linked``)
+    are permitted — same rationale as :func:`system`.
+    """
+    if not event_type:
+        raise ValueError("identity topic requires a non-empty event_type")
+    return f"{IDENTITY}.{event_type}"
+
+
+def orchestrator(event_type: str, decky_id: str) -> str:
+    """Build ``orchestrator.<event_type>.<decky_id>``.
+
+    *event_type* should be one of :data:`ORCHESTRATOR_TRAFFIC` or
+    :data:`ORCHESTRATOR_FILE`. The destination decky is always the
+    third token so per-decky subscribers can use
+    ``orchestrator.*.<decky_uuid>``.
+    """
+    _reject_tokens(event_type, decky_id)
+    return f"{ORCHESTRATOR}.{event_type}.{decky_id}"
+
+
+def canary(token_id: str, event_type: str) -> str:
+    """Build ``canary.<token_id>.<event_type>``.
+
+    *event_type* should be one of :data:`CANARY_PLACED`,
+    :data:`CANARY_TRIGGERED`, or :data:`CANARY_REVOKED`.  The token id
+    is always the second token so per-token subscribers can use
+    ``canary.<token_id>.>`` and fleet-wide consumers (webhook fanout,
+    correlator) use ``canary.>``.
+    """
+    _reject_tokens(token_id, event_type)
+    return f"{CANARY}.{token_id}.{event_type}"
+
+
+def system_health(worker: str) -> str:
+    """Build ``system.<worker>.health``.
+
+    Worker-health heartbeats live as a nested leaf under ``system`` so
+    consumers can subscribe to ``system.*.health`` for every worker at
+    once, or to ``system.mutator.health`` for a single one.  *worker* is
+    validated as a regular segment — no dots, wildcards, or whitespace.
+    """
+    _reject_tokens(worker)
+    return f"{SYSTEM}.{worker}.{SYSTEM_HEALTH}"
+
+
+def system_control(worker: str) -> str:
+    """Build ``system.<worker>.control``.
+
+    Admin-originated stop (and, eventually, start) intents are published
+    here; the worker in question subscribes to its own address and reacts.
+    Payload shape::
+
+        {"action": "stop", "requested_by": "<username>", "ts": <unix>}
+
+    *action* must be one of :data:`WORKER_CONTROL_STOP` /
+    :data:`WORKER_CONTROL_START`; any other value is ignored by the
+    listener.  Same segment rules as :func:`system_health`.
+    """
+    _reject_tokens(worker)
+    return f"{SYSTEM}.{worker}.{SYSTEM_CONTROL}"
+
+
+def _reject_tokens(*parts: str) -> None:
+    """Reject topic segments that would break NATS-style tokenization.
+
+    Dots, wildcards, whitespace, and empty strings in a *segment* would
+    silently corrupt the hierarchy (e.g. ``topology.a.b.status`` for a
+    ``topology_id`` of ``"a.b"``).  Raise early at the builder instead of
+    shipping a malformed topic to the wire.
+    """
+    for p in parts:
+        if not p:
+            raise ValueError("topic segment must not be empty")
+        if "." in p or "*" in p or ">" in p or any(c.isspace() for c in p):
+            raise ValueError(
+                f"topic segment {p!r} may not contain '.', '*', '>', or whitespace"
+            )
--- a/decnet/bus/unix_client.py
+++ b/decnet/bus/unix_client.py
@@ -0,0 +1,257 @@
+"""UNIX-socket client — :class:`UnixSocketBus` implementation of :class:`BaseBus`.
+
+Holds one open socket to the local :class:`~decnet.bus.unix_server.BusServer`.
+Operations:
+
+* :meth:`publish` writes a single ``PUB`` frame and returns; no ack.
+* :meth:`subscribe` writes a ``SUB`` frame and returns a
+  :class:`~decnet.bus.base.Subscription` backed by an :class:`asyncio.Queue`
+  that the background reader task feeds.
+
+One background reader task per bus instance dispatches incoming ``EVT``
+frames to every registered subscription whose pattern matches the topic.
+On connection drop or close, every subscription is woken via a sentinel so
+iterators unblock cleanly; callers see :class:`StopAsyncIteration` from the
+``async for`` loop.
+
+No auto-reconnect in MVP.  If the server restarts, callers must
+:meth:`close` the bus and construct a new one.  This mirrors how other
+DECNET workers handle their dependencies — the systemd ``Restart=on-failure``
+supervision above us is the retry loop.
+"""
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import os
+import pathlib
+from typing import Any
+
+from decnet.bus import protocol
+from decnet.bus.base import (
+    BaseBus,
+    Event,
+    Subscription,
+    _CLOSE_SENTINEL,
+    matches,
+)
+from decnet.bus.fake import _enqueue_drop_oldest as _enqueue_event_drop_oldest
+from decnet.logging import get_logger
+
+log = get_logger("bus.client")
+
+_INBOUND_QUEUE_SIZE = 1024
+
+
+class _UnixSubscription(Subscription):
+    def __init__(
+        self,
+        bus: "UnixSocketBus",
+        pattern: str,
+        queue: "asyncio.Queue[Any]",
+    ) -> None:
+        super().__init__(pattern)
+        self._bus = bus
+        self._queue = queue
+
+    async def __anext__(self) -> Event:
+        if self._closed:
+            raise StopAsyncIteration
+        item = await self._queue.get()
+        if item is _CLOSE_SENTINEL:
+            raise StopAsyncIteration
+        return item
+
+    async def _aclose(self) -> None:
+        await self._bus._unregister(self)
+        try:
+            self._queue.put_nowait(_CLOSE_SENTINEL)
+        except asyncio.QueueFull:
+            pass
+
+
+class UnixSocketBus(BaseBus):
+    """Client handle for a local :class:`BusServer`.
+
+    One instance per process typically; multiple instances simply open
+    multiple sockets to the same server.  Connection is lazy — the first
+    :meth:`connect` (or any publish/subscribe call via ``async with``)
+    opens the socket.
+    """
+
+    def __init__(
+        self,
+        socket_path: pathlib.Path | str,
+        *,
+        client_name: str | None = None,
+    ) -> None:
+        self._path = pathlib.Path(socket_path)
+        self._client_name = client_name or f"decnet-bus-client[{os.getpid()}]"
+        self._reader: asyncio.StreamReader | None = None
+        self._writer: asyncio.StreamWriter | None = None
+        self._reader_task: asyncio.Task[None] | None = None
+        self._subs: list[_UnixSubscription] = []
+        self._lock = asyncio.Lock()
+        self._write_lock = asyncio.Lock()
+        self._closed = False
+        # Sticky flag: the first publish-on-closed-bus call logs at
+        # WARNING so operators see that a publish was dropped; subsequent
+        # calls on the same instance log at DEBUG only to prevent a
+        # log flood when stream threads drain after close.  The bus is
+        # critical infra, so the first warning is non-negotiable.
+        self._closed_publish_warned = False
+
+    # ─── Lifecycle ──────────────────────────────────────────────────────────
+
+    async def connect(self) -> None:
+        if self._writer is not None:
+            return
+        if self._closed:
+            raise RuntimeError("connect on closed bus")
+        self._reader, self._writer = await asyncio.open_unix_connection(str(self._path))
+        await self._send(protocol.encode(protocol.HELLO, args=self._client_name))
+        self._reader_task = asyncio.create_task(self._reader_loop())
+        log.debug("bus.client: connected to %s as %s", self._path, self._client_name)
+
+    async def close(self) -> None:
+        if self._closed:
+            return
+        self._closed = True
+
+        # Best-effort BYE — we don't care if it fails.
+        if self._writer is not None and not self._writer.is_closing():
+            with contextlib.suppress(Exception):
+                await self._send(protocol.encode(protocol.BYE))
+
+        if self._reader_task is not None:
+            self._reader_task.cancel()
+            with contextlib.suppress(asyncio.CancelledError):
+                await self._reader_task
+            self._reader_task = None
+
+        if self._writer is not None:
+            with contextlib.suppress(Exception):
+                self._writer.close()
+                await self._writer.wait_closed()
+            self._writer = None
+            self._reader = None
+
+        # Wake every subscription so `async for` exits.
+        for sub in list(self._subs):
+            with contextlib.suppress(asyncio.QueueFull):
+                sub._queue.put_nowait(_CLOSE_SENTINEL)
+        self._subs.clear()
+
+    # ─── Pub/Sub ────────────────────────────────────────────────────────────
+
+    async def publish(
+        self,
+        topic: str,
+        payload: dict[str, Any],
+        *,
+        event_type: str = "",
+    ) -> None:
+        if self._closed:
+            # Degrade gracefully: the DB is the source of truth, the bus
+            # is only the notification layer.  Raising here made every
+            # caller via publish_safely flood the logs once per stream
+            # line during shutdown races.  First drop warns loudly;
+            # subsequent drops on the same instance are DEBUG-only.
+            if not self._closed_publish_warned:
+                self._closed_publish_warned = True
+                log.warning(
+                    "bus.client: publish on closed bus dropped topic=%s "
+                    "(further drops on this instance logged at DEBUG)",
+                    topic,
+                )
+            else:
+                log.debug("bus.client: publish on closed bus dropped topic=%s", topic)
+            return
+        if self._writer is None:
+            await self.connect()
+        body = Event(topic=topic, payload=payload, type=event_type).to_dict()
+        try:
+            await self._send(protocol.encode(protocol.PUB, args=topic, body=body))
+        except (ConnectionError, BrokenPipeError) as exc:
+            # Bus loss is a logged warning, never a publisher crash.  The
+            # DB-as-source-of-truth invariant means the work is already
+            # persisted; the missing event is just a missed notification.
+            log.warning("bus.client: publish failed: %s", exc)
+
+    def subscribe(self, pattern: str) -> Subscription:
+        if self._closed:
+            raise RuntimeError("subscribe on closed bus")
+        queue: asyncio.Queue[Any] = asyncio.Queue(maxsize=_INBOUND_QUEUE_SIZE)
+        sub = _UnixSubscription(self, pattern, queue)
+        self._subs.append(sub)
+        # Schedule the SUB frame asynchronously so subscribe() stays sync,
+        # matching the BaseBus signature.  The caller will shortly `async
+        # with` / `async for` the subscription, which will run the event
+        # loop and pick this task up.
+        asyncio.ensure_future(self._send_sub(pattern))
+        return sub
+
+    async def _send_sub(self, pattern: str) -> None:
+        try:
+            if self._writer is None:
+                await self.connect()
+            await self._send(protocol.encode(protocol.SUB, args=pattern))
+        except Exception as exc:  # pragma: no cover - network paths in live tests
+            log.warning("bus.client: SUB %s failed: %s", pattern, exc)
+
+    async def _unregister(self, sub: _UnixSubscription) -> None:
+        try:
+            self._subs.remove(sub)
+        except ValueError:
+            return
+        # Tell the server we no longer want events for this pattern if no
+        # other local subscription still wants it.
+        if not any(s.pattern == sub.pattern for s in self._subs):
+            with contextlib.suppress(Exception):
+                await self._send(protocol.encode(protocol.UNSUB, args=sub.pattern))
+
+    # ─── Internal I/O ───────────────────────────────────────────────────────
+
+    async def _send(self, frame_bytes: bytes) -> None:
+        if self._writer is None:
+            raise ConnectionError("bus.client: not connected")
+        async with self._write_lock:
+            self._writer.write(frame_bytes)
+            await self._writer.drain()
+
+    async def _reader_loop(self) -> None:
+        if self._reader is None:
+            return
+        try:
+            while True:
+                frame = await protocol.read_frame(self._reader)
+                if frame is None:
+                    break
+                if frame.verb != protocol.EVT:
+                    # Clients only ever legitimately receive EVT (or BYE).
+                    if frame.verb == protocol.BYE:
+                        break
+                    log.warning("bus.client: unexpected verb from server: %s", frame.verb)
+                    continue
+                topic = frame.args
+                data = protocol.decode_body(frame.body) if frame.body else {}
+                event = Event.from_dict(topic, data)
+                self._dispatch(event)
+        except protocol.ProtocolError as exc:
+            log.warning("bus.client: protocol error: %s", exc)
+        except (asyncio.IncompleteReadError, ConnectionError):
+            pass
+        except asyncio.CancelledError:
+            raise
+        except Exception:  # pragma: no cover
+            log.exception("bus.client: reader loop crashed")
+        finally:
+            # Server-side close — wake every subscription.
+            for sub in list(self._subs):
+                with contextlib.suppress(asyncio.QueueFull):
+                    sub._queue.put_nowait(_CLOSE_SENTINEL)
+
+    def _dispatch(self, event: Event) -> None:
+        for sub in self._subs:
+            if matches(sub.pattern, event.topic):
+                _enqueue_event_drop_oldest(sub._queue, event)
--- a/decnet/bus/unix_server.py
+++ b/decnet/bus/unix_server.py
@@ -0,0 +1,309 @@
+"""UNIX-socket server for the DECNET bus.
+
+One :class:`BusServer` per host.  Accepts local connections on a UNIX-domain
+socket; each connection may:
+
+* publish events (``PUB`` frames) that the server fans out to all matching
+  subscribers on other connections, and
+* subscribe to patterns (``SUB`` frames) and receive matching events as
+  ``EVT`` frames.
+
+Authorization is socket file permissions (0660, group=``decnet`` if that
+POSIX group exists, else the server process's own group).  Anything the
+kernel lets ``connect()`` is trusted — there is no verb-level auth.  This
+matches the "local processes on the same host" threat model; cross-host
+federation is out of scope (see DEBT-029).
+
+Backpressure is per-connection, drop-oldest: if a subscriber can't drain its
+outbound queue fast enough, the server discards the oldest pending event
+rather than blocking publishers.  The bus is at-most-once by contract, so
+drops are acceptable; stalled publishers are not.
+"""
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import grp
+import os
+import pathlib
+from dataclasses import dataclass, field
+from typing import Any
+
+from decnet.bus import protocol
+from decnet.bus.base import Event, matches
+from decnet.logging import get_logger
+
+log = get_logger("bus.server")
+
+_SOCKET_MODE = 0o660
+_DEFAULT_GROUP = "decnet"
+_OUTBOUND_QUEUE_SIZE = 1024
+
+
+@dataclass(eq=False)
+class _Connection:
+    """Per-connection server state."""
+
+    writer: asyncio.StreamWriter
+    peer_name: str = "<unknown>"
+    patterns: set[str] = field(default_factory=set)
+    outbound: asyncio.Queue[bytes] = field(
+        default_factory=lambda: asyncio.Queue(maxsize=_OUTBOUND_QUEUE_SIZE)
+    )
+    closed: bool = False
+
+
+class BusServer:
+    """Serve a UNIX-socket bus on *socket_path*.
+
+    Lifecycle: construct → :meth:`start` → :meth:`serve_forever` (or rely
+    on :meth:`start` returning once bound) → :meth:`close` for teardown.
+    Safe to :meth:`close` multiple times.
+    """
+
+    def __init__(
+        self,
+        socket_path: pathlib.Path | str,
+        *,
+        group: str | None = _DEFAULT_GROUP,
+        mode: int = _SOCKET_MODE,
+    ) -> None:
+        self._path = pathlib.Path(socket_path)
+        self._group = group
+        self._mode = mode
+        self._server: asyncio.base_events.Server | None = None
+        self._connections: set[_Connection] = set()
+        self._closed = False
+
+    # ─── Lifecycle ──────────────────────────────────────────────────────────
+
+    async def start(self) -> None:
+        """Bind the socket and begin accepting connections.
+
+        Removes any stale socket file at *socket_path* first (common case:
+        the previous worker crashed without cleaning up).  The parent
+        directory must already exist; we do NOT create it blindly because
+        the chosen directory (typically ``/run/decnet``) may require
+        systemd ``RuntimeDirectory=`` to set up.
+        """
+        if self._server is not None:
+            return
+
+        parent = self._path.parent
+        if not parent.exists():
+            raise FileNotFoundError(
+                f"bus socket parent directory {parent} does not exist; "
+                f"create it with systemd RuntimeDirectory= or mkdir"
+            )
+
+        # Clean up a stale socket from a previous crash.  If a live server
+        # is actually listening there, ``bind()`` below will fail — we do
+        # not try to detect live vs. stale ourselves.
+        with contextlib.suppress(FileNotFoundError):
+            if self._path.is_socket():
+                self._path.unlink()
+
+        self._server = await asyncio.start_unix_server(
+            self._handle_connection, path=str(self._path),
+        )
+        _chmod_and_chown(self._path, self._mode, self._group)
+        log.info("bus.server: listening on %s (mode=%o group=%s)",
+                 self._path, self._mode, self._group or "<inherit>")
+
+    async def serve_forever(self) -> None:
+        if self._server is None:
+            raise RuntimeError("BusServer not started")
+        async with self._server:
+            await self._server.serve_forever()
+
+    async def close(self) -> None:
+        if self._closed:
+            return
+        self._closed = True
+
+        if self._server is not None:
+            self._server.close()
+            with contextlib.suppress(Exception):
+                await self._server.wait_closed()
+            self._server = None
+
+        # Drain every live connection.
+        for conn in list(self._connections):
+            await self._close_connection(conn)
+        self._connections.clear()
+
+        with contextlib.suppress(FileNotFoundError):
+            self._path.unlink()
+        log.info("bus.server: closed")
+
+    # ─── Internal publish fan-out ───────────────────────────────────────────
+
+    async def publish(self, topic: str, payload: dict[str, Any], event_type: str = "") -> None:
+        """Server-side publish helper — used by the worker to emit
+        ``system.bus.health`` heartbeats without opening a client loop."""
+        event = Event(topic=topic, payload=payload, type=event_type)
+        self._fanout(event)
+
+    # ─── Connection handler ─────────────────────────────────────────────────
+
+    async def _handle_connection(
+        self,
+        reader: asyncio.StreamReader,
+        writer: asyncio.StreamWriter,
+    ) -> None:
+        conn = _Connection(writer=writer)
+        self._connections.add(conn)
+        writer_task = asyncio.create_task(self._writer_loop(conn))
+        try:
+            await self._reader_loop(conn, reader)
+        except protocol.ProtocolError as exc:
+            log.warning("bus.server: protocol error from %s: %s", conn.peer_name, exc)
+        except (asyncio.IncompleteReadError, ConnectionError) as exc:
+            log.debug("bus.server: %s disconnected: %s", conn.peer_name, exc)
+        except Exception:  # pragma: no cover - defensive
+            log.exception("bus.server: unhandled error in connection")
+        finally:
+            await self._close_connection(conn)
+            self._connections.discard(conn)
+            writer_task.cancel()
+            with contextlib.suppress(asyncio.CancelledError):
+                await writer_task
+
+    async def _reader_loop(
+        self, conn: _Connection, reader: asyncio.StreamReader,
+    ) -> None:
+        while True:
+            frame = await protocol.read_frame(reader)
+            if frame is None:
+                return
+            await self._dispatch(conn, frame)
+            if frame.verb == protocol.BYE:
+                return
+
+    async def _dispatch(self, conn: _Connection, frame: protocol.Frame) -> None:
+        if frame.verb == protocol.HELLO:
+            conn.peer_name = frame.args or conn.peer_name
+            log.debug("bus.server: HELLO from %s", conn.peer_name)
+            return
+        if frame.verb == protocol.SUB:
+            pattern = frame.args
+            if not pattern:
+                raise protocol.ProtocolError("SUB requires a pattern")
+            conn.patterns.add(pattern)
+            log.debug("bus.server: %s SUB %s", conn.peer_name, pattern)
+            return
+        if frame.verb == protocol.UNSUB:
+            conn.patterns.discard(frame.args)
+            return
+        if frame.verb == protocol.PUB:
+            topic = frame.args
+            if not topic:
+                raise protocol.ProtocolError("PUB requires a topic")
+            data = protocol.decode_body(frame.body) if frame.body else {}
+            event = Event(
+                topic=topic,
+                payload=data.get("payload", {}) or {},
+                type=data.get("type", "") or "",
+            )
+            self._fanout(event, origin=conn)
+            return
+        if frame.verb == protocol.BYE:
+            return
+        # EVT is server-to-client only; receiving one is a protocol violation.
+        raise protocol.ProtocolError(f"unexpected verb {frame.verb!r} from client")
+
+    def _fanout(self, event: Event, *, origin: _Connection | None = None) -> None:
+        """Enqueue *event* as an EVT frame on every matching connection.
+
+        We do NOT deliver back to the originating connection (a publisher
+        does not receive its own event).  Encoding happens once per event,
+        not once per subscriber.
+        """
+        try:
+            frame_bytes = protocol.encode(
+                protocol.EVT, args=event.topic, body=event.to_dict(),
+            )
+        except protocol.ProtocolError:
+            log.exception("bus.server: failed to encode EVT for topic=%s", event.topic)
+            return
+
+        for conn in self._connections:
+            if conn is origin or conn.closed:
+                continue
+            if not any(matches(p, event.topic) for p in conn.patterns):
+                continue
+            _enqueue_drop_oldest(conn.outbound, frame_bytes, event.topic)
+
+    async def _writer_loop(self, conn: _Connection) -> None:
+        """Serialize writes onto *conn*'s socket.
+
+        One writer task per connection so a slow peer only blocks its own
+        queue, not the fan-out loop.  The queue is bounded with drop-oldest
+        policy applied at enqueue time (see :func:`_enqueue_drop_oldest`).
+        """
+        try:
+            while not conn.closed:
+                data = await conn.outbound.get()
+                conn.writer.write(data)
+                await conn.writer.drain()
+        except (ConnectionError, BrokenPipeError):
+            log.debug("bus.server: %s writer: peer closed", conn.peer_name)
+        except asyncio.CancelledError:
+            pass
+        except Exception:  # pragma: no cover - defensive
+            log.exception("bus.server: writer loop crashed for %s", conn.peer_name)
+
+    async def _close_connection(self, conn: _Connection) -> None:
+        if conn.closed:
+            return
+        conn.closed = True
+        with contextlib.suppress(Exception):
+            conn.writer.close()
+            await conn.writer.wait_closed()
+
+
+# ─── Helpers ─────────────────────────────────────────────────────────────────
+
+def _chmod_and_chown(path: pathlib.Path, mode: int, group: str | None) -> None:
+    """Apply socket file perms and best-effort group ownership.
+
+    If *group* is ``None`` or the named group does not exist, we leave the
+    socket owned by the current process group.  This keeps the server
+    usable on dev boxes that don't have a ``decnet`` group set up.
+    """
+    try:
+        os.chmod(path, mode)
+    except OSError as exc:
+        log.warning("bus.server: chmod(%s, %o) failed: %s", path, mode, exc)
+
+    if not group:
+        return
+    try:
+        gid = grp.getgrnam(group).gr_gid
+    except KeyError:
+        log.debug("bus.server: group %r not found, leaving socket group unchanged", group)
+        return
+    try:
+        os.chown(path, -1, gid)
+    except PermissionError:
+        # Dev box running as an unprivileged user can't chown.  Log once at
+        # debug and move on — the socket is still usable by the owner.
+        log.debug("bus.server: chown(%s, gid=%d) denied; leaving as-is", path, gid)
+    except OSError as exc:
+        log.warning("bus.server: chown(%s, gid=%d) failed: %s", path, gid, exc)
+
+
+def _enqueue_drop_oldest(
+    queue: "asyncio.Queue[bytes]", data: bytes, topic: str,
+) -> None:
+    """Drop-oldest backpressure — mirrors :func:`decnet.bus.fake._enqueue_drop_oldest`."""
+    while True:
+        try:
+            queue.put_nowait(data)
+            return
+        except asyncio.QueueFull:
+            try:
+                queue.get_nowait()
+                log.warning("bus.server: subscriber queue full, dropped event topic=%s", topic)
+            except asyncio.QueueEmpty:
+                return
--- a/decnet/bus/worker.py
+++ b/decnet/bus/worker.py
@@ -0,0 +1,121 @@
+"""``decnet bus`` worker entrypoint.
+
+Starts a :class:`~decnet.bus.unix_server.BusServer` on the configured UNIX
+socket and serves forever, emitting a ``system.bus.health`` heartbeat on
+its own bus every :data:`HEARTBEAT_INTERVAL_SEC` seconds so liveness-aware
+consumers (dashboards, watchdogs) can tell the bus is up without polling
+the filesystem.
+
+Cross-host federation is **out of scope** for the MVP; each host runs its
+own bus independently.  See DEBT-029 for the deferred ``--bridge-tcp``
+mode that would proxy the socket over the swarm mTLS channel.
+"""
+from __future__ import annotations
+
+import asyncio
+import os
+import pathlib
+import signal
+import time
+
+from decnet.bus import topics
+from decnet.bus.unix_server import BusServer
+from decnet.logging import get_logger
+
+log = get_logger("bus.worker")
+
+HEARTBEAT_INTERVAL_SEC = 10
+
+
+async def bus_worker(
+    socket_path: str | pathlib.Path,
+    *,
+    group: str | None = "decnet",
+    heartbeat_interval: int = HEARTBEAT_INTERVAL_SEC,
+) -> None:
+    """Run the bus server until cancelled or SIGTERM/SIGINT is received.
+
+    The parent directory of *socket_path* must already exist (systemd's
+    ``RuntimeDirectory=decnet`` handles this in prod; dev code is expected
+    to ``mkdir`` first).  This function does not create it implicitly
+    because the right choice of perms/owner depends on the deployment
+    context.
+    """
+    path = pathlib.Path(socket_path)
+    _ensure_parent(path)
+
+    server = BusServer(path, group=group)
+    await server.start()
+    log.info("bus.worker: pid=%d socket=%s", os.getpid(), path)
+
+    stop_event = asyncio.Event()
+    _install_signal_handlers(stop_event)
+
+    heartbeat_task = asyncio.create_task(_heartbeat_loop(server, heartbeat_interval))
+    serve_task = asyncio.create_task(server.serve_forever())
+
+    try:
+        await stop_event.wait()
+        log.info("bus.worker: shutdown signal received")
+    finally:
+        heartbeat_task.cancel()
+        serve_task.cancel()
+        for task in (heartbeat_task, serve_task):
+            try:
+                await task
+            except (asyncio.CancelledError, Exception):  # noqa: BLE001 - draining shutdown
+                pass
+        await server.close()
+        log.info("bus.worker: stopped")
+
+
+async def _heartbeat_loop(server: BusServer, interval: int) -> None:
+    """Publish ``system.bus.health`` on the server's own fan-out."""
+    started_at = time.time()
+    while True:
+        try:
+            await server.publish(
+                topics.system(topics.SYSTEM_BUS_HEALTH),
+                {
+                    "pid": os.getpid(),
+                    "uptime_sec": round(time.time() - started_at, 3),
+                    "ts": time.time(),
+                },
+                event_type=topics.SYSTEM_BUS_HEALTH,
+            )
+        except Exception:  # pragma: no cover - heartbeat must never kill the worker
+            log.exception("bus.worker: heartbeat publish failed")
+        await asyncio.sleep(interval)
+
+
+def _install_signal_handlers(stop_event: asyncio.Event) -> None:
+    loop = asyncio.get_running_loop()
+    for sig in (signal.SIGTERM, signal.SIGINT):
+        try:
+            loop.add_signal_handler(sig, stop_event.set)
+        except (NotImplementedError, RuntimeError):
+            # add_signal_handler is not supported on Windows / in some
+            # test harnesses where the loop is running in a non-main thread.
+            # The worker still exits via KeyboardInterrupt bubbling up.
+            pass
+
+
+def _ensure_parent(path: pathlib.Path) -> None:
+    parent = path.parent
+    if parent.exists():
+        return
+    # Dev-box convenience: if the parent is the user's ``~/.decnet`` dir,
+    # create it.  We do not auto-mkdir ``/run/decnet`` — that's systemd's job
+    # and silently creating it as the wrong user would cause permission
+    # confusion later.
+    home_prefix = pathlib.Path.home() / ".decnet"
+    try:
+        parent.relative_to(home_prefix.parent)
+    except ValueError:
+        raise FileNotFoundError(
+            f"bus socket parent {parent} does not exist; create it first"
+        )
+    parent.mkdir(parents=True, exist_ok=True)
+
+
+__all__ = ["bus_worker", "HEARTBEAT_INTERVAL_SEC"]
--- a/decnet/canary/init.py
+++ b/decnet/canary/init.py
@@ -0,0 +1,37 @@
+"""Canary tokens — decoy artifacts planted in decky filesystems.
+
+Public surface is exported here so callers can ``from decnet.canary
+import CanaryArtifact, get_generator, get_instrumenter`` without
+knowing the submodule layout.  Concrete generators / instrumenters
+live under :mod:`decnet.canary.generators` and
+:mod:`decnet.canary.instrumenters` respectively; the factory keeps
+import-time cost down by deferring those imports until first use
+(same pattern as :mod:`decnet.intel.factory`).
+"""
+from __future__ import annotations
+
+from decnet.canary.base import (
+    CanaryArtifact,
+    CanaryContext,
+    CanaryGenerator,
+    CanaryInstrumenter,
+)
+from decnet.canary.factory import (
+    KNOWN_GENERATORS,
+    KNOWN_INSTRUMENTERS,
+    get_generator,
+    get_instrumenter,
+    pick_instrumenter_for_mime,
+)
+
+__all__ = [
+    "CanaryArtifact",
+    "CanaryContext",
+    "CanaryGenerator",
+    "CanaryInstrumenter",
+    "KNOWN_GENERATORS",
+    "KNOWN_INSTRUMENTERS",
+    "get_generator",
+    "get_instrumenter",
+    "pick_instrumenter_for_mime",
+]
--- a/decnet/canary/base.py
+++ b/decnet/canary/base.py
@@ -0,0 +1,145 @@
+"""Canary generator / instrumenter ABCs and the artifact dataclass.
+
+Two flavors of producer share the same return shape:
+
+* :class:`CanaryGenerator` synthesises a fake artifact from scratch
+  (e.g. a plausible ``~/.aws/credentials`` block, a ``.git/config``
+  pointing at an attacker-bait remote URL).  Operators don't supply
+  any input.
+
+* :class:`CanaryInstrumenter` mutates an operator-uploaded blob to
+  embed the callback (HTTP slug + DNS host).  The original blob bytes
+  are passed in; the instrumenter returns the mutated version.
+
+Both return a :class:`CanaryArtifact` — the planter doesn't care
+which path produced it.  Same dataclass keeps the planter's
+docker-exec injector trivial.
+
+ABCs intentionally do not include I/O — generators and instrumenters
+are pure functions of (slug, host, blob?).  All filesystem work
+happens in :mod:`decnet.canary.planter` and :mod:`decnet.canary.storage`.
+"""
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Optional
+
+
+@dataclass
+class CanaryContext:
+    """Inputs every generator/instrumenter needs to embed a working callback.
+
+    ``callback_token`` is the unique slug; it appears verbatim in HTTP
+    URLs (``https://<host>/c/<callback_token>``) and as the leftmost
+    DNS label (``<callback_token>.canary.<dns_zone>``) so a single
+    slug resolves to a single :class:`CanaryToken` row regardless of
+    which path the attacker tripped.
+
+    ``http_base`` and ``dns_zone`` come from the canary worker's
+    public-facing config (``DECNET_CANARY_HTTP_BASE``,
+    ``DECNET_CANARY_DNS_ZONE``).  When DNS isn't deployed,
+    ``dns_zone`` is empty and instrumenters that only have a DNS
+    surface (e.g. an artifact whose only realistic embed point is a
+    hostname) raise.
+    """
+
+    callback_token: str
+    http_base: str  # e.g. "https://canary.example.test" — no trailing slash
+    dns_zone: str = ""  # e.g. "canary.example.test"; "" disables DNS embeds
+    persona: str = "linux"  # "linux" | "windows" — drives default username, path style
+
+
+@dataclass
+class CanaryArtifact:
+    """Bytes-and-placement bundle produced by a generator/instrumenter."""
+
+    path: str
+    """Absolute path inside the target container."""
+
+    content: bytes
+    """Final bytes that hit the decky filesystem.
+
+    Always raw bytes — the planter base64-encodes for the wire so
+    binary blobs (DOCX/PNG/PDF) survive ``docker exec sh -c`` safely.
+    """
+
+    mode: int = 0o600
+    """Unix file mode.  Defaults to ``0600`` because most realistic
+    canary placements (``~/.aws/credentials``, ``.env``, ``id_rsa``)
+    are operator-only.  Honeydocs in user docs folders should pass
+    ``0o644``.
+    """
+
+    mtime_offset: int = 0
+    """Seconds relative to *now* for the planted file's mtime.
+
+    Negative values backdate the file so it doesn't look like it
+    appeared the moment the decky was deployed.  ``-86400 * 90`` (90
+    days ago) is a common choice for ``honeydoc`` artifacts; ``0``
+    means "stamp it now," which is fine for ``aws_creds``-like files
+    that would plausibly be touched recently.
+    """
+
+    instrumenter: Optional[str] = None
+    """Identifier of the instrumenter that produced this artifact (for
+    upload-driven tokens).  Mirrored into ``CanaryToken.instrumenter``.
+    Mutually exclusive with :attr:`generator`.
+    """
+
+    generator: Optional[str] = None
+    """Identifier of the generator that produced this artifact (for
+    synthesised tokens).  Mirrored into ``CanaryToken.generator``.
+    Mutually exclusive with :attr:`instrumenter`.
+    """
+
+    notes: list[str] = field(default_factory=list)
+    """Human-readable notes about the embedding (e.g. "DOCX: injected
+    1×1 remote image at relsId rId99").  Surfaced in the API
+    ``preview`` response so the operator sees what we did before
+    planting.  Never leaked to the attacker-facing surface.
+    """
+
+
+class CanaryGenerator(ABC):
+    """Produces a fake artifact from scratch."""
+
+    name: str  #: short tag — matches ``CanaryToken.generator``
+
+    @abstractmethod
+    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+        """Synthesise the artifact.
+
+        MUST NOT do I/O.  MUST be deterministic for the same
+        ``(callback_token, http_base, dns_zone, persona)`` so re-seeding
+        from :attr:`CanaryToken.secret_seed` produces byte-identical
+        output and the planter is naturally idempotent.
+        """
+
+
+class CanaryInstrumenter(ABC):
+    """Mutates an operator-uploaded blob to embed a callback."""
+
+    name: str  #: short tag — matches ``CanaryToken.instrumenter``
+
+    #: MIME prefixes this instrumenter handles.  The factory uses these
+    #: to dispatch by sniffed content-type.  Sub-string match against
+    #: the prefix list (e.g. ``("application/pdf",)`` or
+    #: ``("text/",)``).
+    mime_prefixes: tuple[str, ...] = ()
+
+    @abstractmethod
+    def instrument(
+        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
+    ) -> CanaryArtifact:
+        """Return the mutated bytes with the callback embedded.
+
+        MUST raise :class:`InstrumenterRejectedError` when the blob
+        can't be safely mutated (corrupt zip, encrypted PDF, etc.) so
+        the API can surface a 400 with the specific reason rather than
+        silently shipping the original bytes.
+        """
+
+
+class InstrumenterRejectedError(ValueError):
+    """Raised when an instrumenter can't safely mutate the input."""
--- a/decnet/canary/cultivator.py
+++ b/decnet/canary/cultivator.py
@@ -0,0 +1,181 @@
+"""Realism contract adapter for canary generators.
+
+Stage 7 of the realism migration.  The orchestrator's planner picks a
+``canary_*`` :class:`~decnet.realism.taxonomy.ContentClass` 1–3% of
+the time on file ticks; this module turns that pick into a
+:class:`~decnet.canary.base.CanaryArtifact` (bytes the SSH driver
+plants) plus a persisted :class:`~decnet.web.db.models.CanaryToken`
+row so the canary worker recognises the slug when an attacker trips
+it.
+
+What this is NOT: it doesn't pick *when* canaries fire — that's the
+realism planner's job.  It doesn't decide *where* on the filesystem
+the canary lands beyond what realism naming + persona conventions
+already produce.  It's a thin bytes-and-row factory bolted onto the
+realism contract.
+
+Stealth (per ``feedback_stealth.md``): we never leak the
+``DECNET`` literal into anything that survives to the planted file.
+The underlying generators are already stealth-clean; this wrapper
+must not undo that.
+"""
+from __future__ import annotations
+
+import os
+import secrets as _secrets
+from datetime import datetime, timezone
+from typing import Any, Optional
+
+from decnet.canary.base import CanaryArtifact, CanaryContext
+from decnet.canary.factory import get_generator
+from decnet.logging import get_logger
+from decnet.realism.personas import login_for
+from decnet.realism.taxonomy import ContentClass, Plan
+
+log = get_logger("canary.cultivator")
+
+
+# realism content_class → canary generator name.  Mirrors
+# :data:`decnet.canary.factory.KNOWN_GENERATORS`.
+_CLASS_TO_GENERATOR: dict[ContentClass, str] = {
+    ContentClass.CANARY_AWS_CREDS: "aws_creds",
+    ContentClass.CANARY_ENV_FILE: "env_file",
+    ContentClass.CANARY_GIT_CONFIG: "git_config",
+    ContentClass.CANARY_SSH_KEY: "ssh_key",
+    ContentClass.CANARY_HONEYDOC: "honeydoc",
+    ContentClass.CANARY_HONEYDOC_DOCX: "honeydoc_docx",
+    ContentClass.CANARY_HONEYDOC_PDF: "honeydoc_pdf",
+    ContentClass.CANARY_MYSQL_DUMP: "mysql_dump",
+}
+
+
+# Generator → CanaryKind. The trip surface (HTTP slug callback / DNS
+# resolution / passive bait) determines how the canary worker matches
+# an attacker callback to this token. Aligned with
+# :data:`decnet.web.db.models.canary.CanaryKind`.
+_GENERATOR_TO_KIND: dict[str, str] = {
+    "aws_creds": "aws_passive",   # no embedded callback; passive bait
+    "env_file": "http",
+    "git_config": "http",
+    "honeydoc": "http",
+    "honeydoc_docx": "http",
+    "honeydoc_pdf": "http",
+    "ssh_key": "dns",             # trip is DNS resolution of host comment
+    "mysql_dump": "dns",          # trip is DNS resolution of subdomain
+}
+
+
+# Path conventions per generator.  The realism planner doesn't know
+# about decoy-realistic credential locations (``~/.aws/credentials``,
+# ``~/.git/config``); we map them per-class here so the planted
+# artifact lands somewhere an attacker would actually look.
+_DEFAULT_PATH: dict[ContentClass, str] = {
+    ContentClass.CANARY_AWS_CREDS: "/home/{persona}/.aws/credentials",
+    ContentClass.CANARY_ENV_FILE: "/home/{persona}/app/.env",
+    ContentClass.CANARY_GIT_CONFIG: "/home/{persona}/.git/config",
+    ContentClass.CANARY_SSH_KEY: "/home/{persona}/.ssh/id_rsa",
+    ContentClass.CANARY_HONEYDOC: "/home/{persona}/Documents/notes.html",
+    ContentClass.CANARY_HONEYDOC_DOCX: "/home/{persona}/Documents/Q3-Operations-Review.docx",
+    ContentClass.CANARY_HONEYDOC_PDF: "/home/{persona}/Documents/Q3-Operations-Review.pdf",
+    ContentClass.CANARY_MYSQL_DUMP: "/var/backups/db_backup.sql",
+}
+
+
+def _path_for(plan: Plan) -> str:
+    """Produce the canary placement path for *plan*.
+
+    The realism planner already filled in ``plan.target_path`` from
+    the namer, but canary placements have stronger conventions
+    (``~/.aws/credentials``, ``~/.ssh/id_rsa``) than the realism
+    namer's vocabulary.  When :data:`_DEFAULT_PATH` has an entry,
+    that wins.
+    """
+    template = _DEFAULT_PATH.get(plan.content_class)
+    if template is None:
+        return plan.target_path
+    return template.format(persona=login_for(plan.persona))
+
+
+def _new_callback_token() -> str:
+    """16 url-safe bytes — same shape canary slug fields use elsewhere."""
+    return _secrets.token_urlsafe(16)
+
+
+async def cultivate(
+    plan: Plan,
+    repo: Any,
+    *,
+    http_base: Optional[str] = None,
+    dns_zone: Optional[str] = None,
+    created_by: str = "system",
+) -> CanaryArtifact:
+    """Realism-driven canary plant.
+
+    Build a :class:`CanaryContext`, ask the right generator for bytes,
+    persist a ``canary_tokens`` row so the canary worker can attribute
+    callbacks to this token, and return the artifact for the SSH
+    driver to plant.
+
+    *http_base* and *dns_zone* default to ``DECNET_CANARY_HTTP_BASE``
+    and ``DECNET_CANARY_DNS_ZONE`` env vars respectively — same
+    pattern the canary worker uses.  When both are empty, generators
+    that need a callback host (``ssh_key`` DNS, ``mysql_dump``)
+    raise; the planner's caller logs and falls back to a non-canary
+    plan.
+    """
+    if not plan.content_class.is_canary():
+        raise ValueError(
+            f"cultivate() called with non-canary content_class="
+            f"{plan.content_class!r}"
+        )
+    gen_name = _CLASS_TO_GENERATOR.get(plan.content_class)
+    if gen_name is None:
+        raise KeyError(
+            f"no canary generator mapped for content_class="
+            f"{plan.content_class!r}"
+        )
+
+    callback_token = _new_callback_token()
+    ctx = CanaryContext(
+        callback_token=callback_token,
+        http_base=http_base or os.environ.get("DECNET_CANARY_HTTP_BASE", ""),
+        dns_zone=dns_zone or os.environ.get("DECNET_CANARY_DNS_ZONE", ""),
+        persona="linux",  # all our deckies are POSIX in MVP
+    )
+    generator = get_generator(gen_name)
+    artifact = generator.generate(ctx)
+
+    # The generator returns ``path=""`` (planter fills it normally).
+    # We have a realism-derived path on hand; stuff it in for the SSH
+    # driver's plant_file call AND the canary_tokens row.
+    placement_path = _path_for(plan)
+
+    # Persist the token row before planting so the canary worker can
+    # attribute a callback if the artifact trips during the plant
+    # itself (improbable but possible — DOCX viewers can preview
+    # autoplay-style).
+    await repo.create_canary_token({
+        "kind": _GENERATOR_TO_KIND.get(gen_name, "http"),
+        "decky_name": plan.decky_name,
+        "instrumenter": None,
+        "generator": gen_name,
+        "placement_path": placement_path,
+        "callback_token": callback_token,
+        "secret_seed": callback_token,  # deterministic re-seed compatible
+        "placed_at": datetime.now(timezone.utc),
+        "created_by": created_by,
+        "state": "planted",
+    })
+
+    # Carry the placement_path on the artifact so the orchestrator's
+    # plant_file call uses it.  We don't mutate the generator's
+    # original — copy with the new path.
+    return CanaryArtifact(
+        path=placement_path,
+        content=artifact.content,
+        mode=artifact.mode,
+        mtime_offset=artifact.mtime_offset,
+        instrumenter=artifact.instrumenter,
+        generator=artifact.generator,
+        notes=list(artifact.notes),
+    )
--- a/decnet/canary/dns_server.py
+++ b/decnet/canary/dns_server.py
@@ -0,0 +1,207 @@
+"""Minimal authoritative DNS server for canary tokens (stdlib only).
+
+We don't need a full resolver — only enough to:
+
+1. Decode an inbound query's qname.
+2. If the qname matches ``<slug>.<canary_zone>``, log the callback,
+   publish ``canary.<token_id>.triggered`` on the bus, and return a
+   plausible A record (any RFC-5737 reserved address would do; we
+   use 192.0.2.1) so the attacker's resolver doesn't loop on
+   NXDOMAIN.
+3. For unknown qnames return NXDOMAIN.
+
+DNS-over-UDP wire format is well-trodden: 12-byte header + name
+labels + qtype + qclass.  We implement just the bits we need.
+
+This module deliberately avoids the ``dnslib`` PyPI package so the
+canary worker has no extra dependency surface.  If we ever need
+EDNS0, DNSSEC, or other niceties we'll swap to dnslib then.
+"""
+from __future__ import annotations
+
+import asyncio
+import struct
+from dataclasses import dataclass
+from typing import Awaitable, Callable, Optional, Tuple
+
+
+@dataclass(frozen=True)
+class DNSQuery:
+    """Decoded query — only the bits the canary worker cares about."""
+
+    txid: int
+    qname: str  # lowercase, no trailing dot
+    qtype: int
+    qclass: int
+    flags: int
+
+
+def _decode_name(buf: bytes, offset: int) -> Tuple[str, int]:
+    """Return ``(qname_lowercase_no_dot, bytes_consumed)``.
+
+    Supports compressed pointers (RFC 1035 §4.1.4).  Doesn't recurse —
+    we walk the pointer chain iteratively with a hop cap to avoid
+    pointer-loop DoS.
+    """
+    labels: list[str] = []
+    pos = offset
+    consumed = 0
+    jumped = False
+    hops = 0
+    while True:
+        if pos >= len(buf):
+            raise ValueError("truncated DNS name")
+        length = buf[pos]
+        if length == 0:
+            pos += 1
+            if not jumped:
+                consumed = pos - offset
+            break
+        if (length & 0xC0) == 0xC0:
+            # Compression pointer.
+            if pos + 1 >= len(buf):
+                raise ValueError("truncated DNS pointer")
+            ptr = ((length & 0x3F) << 8) | buf[pos + 1]
+            if not jumped:
+                consumed = (pos + 2) - offset
+            pos = ptr
+            jumped = True
+            hops += 1
+            if hops > 10:
+                raise ValueError("DNS pointer loop")
+            continue
+        pos += 1
+        if pos + length > len(buf):
+            raise ValueError("truncated DNS label")
+        labels.append(buf[pos:pos + length].decode("ascii", "replace"))
+        pos += length
+    return ".".join(labels).lower(), consumed
+
+
+def parse_query(packet: bytes) -> DNSQuery:
+    """Parse the (single) question of a DNS query packet."""
+    if len(packet) < 12:
+        raise ValueError("DNS packet too short")
+    txid, flags, qdcount, _ancount, _nscount, _arcount = struct.unpack(
+        "!HHHHHH", packet[:12]
+    )
+    if qdcount != 1:
+        raise ValueError(f"expected 1 question, got {qdcount}")
+    qname, consumed = _decode_name(packet, 12)
+    pos = 12 + consumed
+    if pos + 4 > len(packet):
+        raise ValueError("truncated DNS qtype/qclass")
+    qtype, qclass = struct.unpack("!HH", packet[pos:pos + 4])
+    return DNSQuery(
+        txid=txid, qname=qname, qtype=qtype, qclass=qclass, flags=flags,
+    )
+
+
+def _encode_name(name: str) -> bytes:
+    out = bytearray()
+    for label in name.split("."):
+        if not label:
+            continue
+        b = label.encode("ascii", "replace")
+        out.append(len(b))
+        out.extend(b)
+    out.append(0)
+    return bytes(out)
+
+
+def _build_response(
+    query: DNSQuery,
+    *,
+    rcode: int = 0,
+    answer_ip: Optional[str] = None,
+) -> bytes:
+    """Encode a DNS response packet.
+
+    *rcode* 0 = NOERROR, 3 = NXDOMAIN.  When *answer_ip* is supplied
+    and the query was for an A record we include exactly one answer
+    (TTL 60, class IN).
+    """
+    qd_count = 1
+    an_count = 1 if (answer_ip and query.qtype == 1 and rcode == 0) else 0
+    flags = 0x8400 | rcode  # response + authoritative + RA bit clear + rcode
+    header = struct.pack(
+        "!HHHHHH", query.txid, flags, qd_count, an_count, 0, 0,
+    )
+    qname_bytes = _encode_name(query.qname)
+    question = qname_bytes + struct.pack("!HH", query.qtype, query.qclass)
+
+    answer = b""
+    if an_count:
+        # Use a name pointer back to the question (offset 12).
+        ptr = struct.pack("!H", 0xC000 | 12)
+        rdata = bytes(int(o) for o in answer_ip.split("."))
+        answer = ptr + struct.pack("!HHIH", 1, 1, 60, 4) + rdata
+
+    return header + question + answer
+
+
+# Hook signature: receives the matched slug + the query; returns
+# nothing.  The worker uses it to persist a CanaryTrigger row and
+# publish the bus event.
+TriggerHook = Callable[[str, DNSQuery, str], Awaitable[None]]
+
+
+class CanaryDNSProtocol(asyncio.DatagramProtocol):
+    """asyncio UDP server endpoint for canary DNS callbacks.
+
+    Constructor takes the canary zone (``"canary.example.test"``) and
+    a coroutine called when a query matches ``<slug>.<zone>``.  The
+    hook runs in the event loop's task; we don't block the receive
+    path on it.
+    """
+
+    def __init__(
+        self,
+        zone: str,
+        hook: TriggerHook,
+        *,
+        answer_ip: str = "192.0.2.1",
+    ) -> None:
+        # Normalise: lowercase, no leading/trailing dot.
+        self._zone = zone.lower().strip(".")
+        self._suffix = "." + self._zone if self._zone else ""
+        self._hook = hook
+        self._answer_ip = answer_ip
+        self._transport: Optional[asyncio.DatagramTransport] = None
+
+    def connection_made(self, transport) -> None:  # type: ignore[override]
+        self._transport = transport  # type: ignore[assignment]
+
+    def datagram_received(  # type: ignore[override]
+        self, data: bytes, addr: Tuple[str, int],
+    ) -> None:
+        try:
+            query = parse_query(data)
+        except ValueError:
+            # Malformed query — drop silently.  Returning a FORMERR
+            # would tip off the attacker that *something* is listening
+            # on this port; the stealth posture (feedback_stealth)
+            # prefers radio silence on parse errors.
+            return
+        slug = self._slug_for(query.qname)
+        if slug is None:
+            # Unknown name — NXDOMAIN.
+            self._send(addr, _build_response(query, rcode=3))
+            return
+        # Known name — answer with our sinkhole IP, then fire the hook.
+        self._send(addr, _build_response(query, answer_ip=self._answer_ip))
+        asyncio.create_task(self._hook(slug, query, addr[0]))
+
+    def _slug_for(self, qname: str) -> Optional[str]:
+        if not self._zone or not qname.endswith(self._suffix):
+            return None
+        slug = qname[: -len(self._suffix)]
+        # Single-label slug only; multi-label means the attacker is
+        # querying a sub-resource we don't model.
+        if not slug or "." in slug:
+            return None
+        return slug
+
+    def _send(self, addr: Tuple[str, int], packet: bytes) -> None:
+        if self._transport is not None:
+            self._transport.sendto(packet, addr)
--- a/decnet/canary/factory.py
+++ b/decnet/canary/factory.py
@@ -0,0 +1,141 @@
+"""Generator and instrumenter factories.
+
+Same lazy-import pattern as :mod:`decnet.intel.factory` — concrete
+implementations stay un-imported until first use so importing
+:mod:`decnet.canary` from a CLI subcommand doesn't drag in
+``pikepdf`` / ``python-docx`` / ``Pillow`` for callers that only
+need the model layer.
+"""
+from __future__ import annotations
+
+from typing import Tuple
+
+from decnet.canary.base import CanaryGenerator, CanaryInstrumenter
+
+KNOWN_GENERATORS: Tuple[str, ...] = (
+    "git_config",
+    "env_file",
+    "ssh_key",
+    "aws_creds",
+    "honeydoc",
+    "honeydoc_docx",
+    "honeydoc_pdf",
+    "mysql_dump",
+)
+
+KNOWN_INSTRUMENTERS: Tuple[str, ...] = (
+    "docx",
+    "xlsx",
+    "pdf",
+    "html",
+    "image",
+    "plain",
+    "passthrough",
+)
+
+
+def get_generator(name: str) -> CanaryGenerator:
+    """Return the generator registered under ``name``.
+
+    Raises :class:`ValueError` for unknown names so a typo in the API
+    request surfaces as a 400 rather than silently producing nothing.
+    """
+    if name == "git_config":
+        from decnet.canary.generators.git_config import GitConfigGenerator
+        return GitConfigGenerator()
+    if name == "env_file":
+        from decnet.canary.generators.env_file import EnvFileGenerator
+        return EnvFileGenerator()
+    if name == "ssh_key":
+        from decnet.canary.generators.ssh_key import SSHKeyGenerator
+        return SSHKeyGenerator()
+    if name == "aws_creds":
+        from decnet.canary.generators.aws_creds import AWSCredsGenerator
+        return AWSCredsGenerator()
+    if name == "honeydoc":
+        from decnet.canary.generators.honeydoc import HoneydocGenerator
+        return HoneydocGenerator()
+    if name == "honeydoc_docx":
+        from decnet.canary.generators.honeydoc_docx import HoneydocDocxGenerator
+        return HoneydocDocxGenerator()
+    if name == "honeydoc_pdf":
+        from decnet.canary.generators.honeydoc_pdf import HoneydocPdfGenerator
+        return HoneydocPdfGenerator()
+    if name == "mysql_dump":
+        from decnet.canary.generators.mysql_dump import MySQLDumpGenerator
+        return MySQLDumpGenerator()
+    raise ValueError(
+        f"Unknown canary generator: {name!r}. Known: {KNOWN_GENERATORS}"
+    )
+
+
+def get_instrumenter(name: str) -> CanaryInstrumenter:
+    """Return the instrumenter registered under ``name``."""
+    if name == "docx":
+        from decnet.canary.instrumenters.docx import DocxInstrumenter
+        return DocxInstrumenter()
+    if name == "xlsx":
+        from decnet.canary.instrumenters.xlsx import XlsxInstrumenter
+        return XlsxInstrumenter()
+    if name == "pdf":
+        from decnet.canary.instrumenters.pdf import PdfInstrumenter
+        return PdfInstrumenter()
+    if name == "html":
+        from decnet.canary.instrumenters.html import HtmlInstrumenter
+        return HtmlInstrumenter()
+    if name == "image":
+        from decnet.canary.instrumenters.image import ImageInstrumenter
+        return ImageInstrumenter()
+    if name == "plain":
+        from decnet.canary.instrumenters.plain import PlainInstrumenter
+        return PlainInstrumenter()
+    if name == "passthrough":
+        from decnet.canary.instrumenters.passthrough import PassthroughInstrumenter
+        return PassthroughInstrumenter()
+    raise ValueError(
+        f"Unknown canary instrumenter: {name!r}. Known: {KNOWN_INSTRUMENTERS}"
+    )
+
+
+# MIME → instrumenter dispatch.  Order matters: we walk the table
+# top-to-bottom and the first prefix match wins, so put the more
+# specific (DOCX/XLSX) before the generic (zip/octet-stream).
+_MIME_DISPATCH: tuple[tuple[str, str], ...] = (
+    # Office Open XML — DOCX/XLSX share a zip structure but expose
+    # different inner trees, so dispatch by MIME alias rather than
+    # zip-poking.
+    ("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx"),
+    ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx"),
+    ("application/pdf", "pdf"),
+    ("text/html", "html"),
+    ("application/xhtml+xml", "html"),
+    ("image/png", "image"),
+    ("image/jpeg", "image"),
+    ("image/gif", "image"),
+    # Plaintext catch-alls — config files, .env, .ini, .yaml, .json,
+    # source code.  All handled by the same regex-substitution pass.
+    ("text/", "plain"),
+    ("application/json", "plain"),
+    ("application/x-yaml", "plain"),
+    ("application/yaml", "plain"),
+    ("application/toml", "plain"),
+)
+
+
+def pick_instrumenter_for_mime(content_type: str) -> str:
+    """Return the instrumenter name registered for a sniffed MIME.
+
+    Falls back to ``"passthrough"`` for anything we don't have an
+    embedder for (binary blobs we can't mutate safely — random
+    container images, archives, executables).  ``passthrough`` only
+    supports DNS-callback tokens (the slug ends up in the filename or
+    an accompanying README), so the API surfaces that constraint to
+    the operator before they pick a kind.
+    """
+    if not content_type:
+        return "passthrough"
+    lowered = content_type.lower()
+    for prefix, name in _MIME_DISPATCH:
+        if lowered.startswith(prefix):
+            return name
+    return "passthrough"
--- a/decnet/canary/generators/init.py
+++ b/decnet/canary/generators/init.py
@@ -0,0 +1,7 @@
+"""Built-in canary generators (synthesised fake artifacts).
+
+Concrete classes live in sibling modules and are imported lazily by
+:func:`decnet.canary.factory.get_generator` to keep the import-time
+cost of :mod:`decnet.canary` cheap for callers that only need the
+ABCs.
+"""
--- a/decnet/canary/generators/aws_creds.py
+++ b/decnet/canary/generators/aws_creds.py
@@ -0,0 +1,86 @@
+"""Fake ``~/.aws/credentials`` block (passive bait).
+
+This is the **passive** variant — no callback wiring.  An attacker
+who exfils these keys can't trip a detection unless we run a real
+AWS account with a deny-all CloudTrail listener (post-v1).  The
+realism is the point: the file looks like a routinely used credentials
+file, so the rest of the decky's persona feels lived-in.
+
+If the operator picks ``kind="aws_passive"`` we accept that no slug
+will be embedded.  If they pick ``kind="http"`` or ``kind="dns"`` for
+this generator, the API will reject the combination with a 400 — AWS
+keys have no plausible field where a URL or hostname survives a
+``grep -E '[A-Z0-9]{20}'`` smell test.
+"""
+from __future__ import annotations
+
+import hashlib
+from secrets import token_urlsafe
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+
+
+# Stable AWS-style key body derived from the slug.  Keeping the
+# generator deterministic (per-slug) means re-seeding produces the
+# same bytes — the planter is naturally idempotent and an operator
+# who runs ``decnet canary verify`` can re-derive the expected file
+# without touching the DB.
+
+def _fake_access_key(seed: str) -> str:
+    # AWS access keys are 20 chars, uppercase alphanum, AKIA prefix.
+    body = hashlib.sha256(seed.encode()).hexdigest().upper()
+    return "AKIA" + body[:16]
+
+
+def _fake_secret_key(seed: str) -> str:
+    # AWS secret keys are 40 chars, mixed-case base64-ish.  We use
+    # base64-safe characters from token_urlsafe seeded by a SHA-256
+    # of the seed so the output is stable per slug.
+    h = hashlib.sha256(("secret:" + seed).encode()).digest()
+    # Reuse token_urlsafe for the alphabet but pad to 40 chars from
+    # the deterministic bytes so we don't depend on os.urandom.
+    import base64
+    return base64.b64encode(h)[:40].decode()
+
+
+class AWSCredsGenerator(CanaryGenerator):
+    name = "aws_creds"
+
+    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+        seed = ctx.callback_token
+        access = _fake_access_key(seed)
+        secret = _fake_secret_key(seed)
+        body = (
+            "[default]\n"
+            f"aws_access_key_id = {access}\n"
+            f"aws_secret_access_key = {secret}\n"
+            "region = us-east-1\n"
+            "\n"
+            "[prod]\n"
+            f"aws_access_key_id = {_fake_access_key('prod-' + seed)}\n"
+            f"aws_secret_access_key = {_fake_secret_key('prod-' + seed)}\n"
+            "region = us-west-2\n"
+        )
+        return CanaryArtifact(
+            path="",  # caller (planter) fills this from CanaryToken.placement_path
+            content=body.encode("utf-8"),
+            mode=0o600,
+            mtime_offset=-86400 * 14,  # 2 weeks ago — looks lived-in
+            generator=self.name,
+            notes=[
+                "fake AWS keys; no callback embedded — passive bait only",
+                f"derived deterministically from slug={seed}",
+            ],
+        )
+
+
+# Re-exported so the slug helper is reusable from the
+# instrumenters/passthrough module without an internal import path.
+__all__ = ["AWSCredsGenerator", "_fake_access_key", "_fake_secret_key"]
+
+
+# Imports at the bottom keep the public dataclasses on top — pylint
+# doesn't run on this repo, but tests do, and putting ``token_urlsafe``
+# in a public symbol confuses readers.  Suppress the unused warning by
+# referencing it once.
+_ = token_urlsafe
--- a/decnet/canary/generators/env_file.py
+++ b/decnet/canary/generators/env_file.py
@@ -0,0 +1,56 @@
+"""Fake ``.env`` with embedded callback URLs.
+
+Modern web stacks read environment variables for everything from
+database DSNs to webhook URLs, so dropping a few realistic-looking
+``KEY=value`` pairs alongside the canary URL is unremarkable.  The
+slug appears in two fields:
+
+* ``API_BASE_URL`` — the obvious one; an attacker scripting against
+  the credentials hits the worker on first invocation.
+* ``WEBHOOK_NOTIFY_URL`` — secondary, in case the attacker greps for
+  ``WEBHOOK`` and pivots there.
+
+Other fields (``DB_PASSWORD``, ``REDIS_URL``, ``JWT_SECRET``) are
+plausible but inert — they're realism filler, not detection
+mechanisms.
+"""
+from __future__ import annotations
+
+import hashlib
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+
+
+def _stable_token(seed: str, prefix: str = "") -> str:
+    h = hashlib.sha256((prefix + seed).encode()).hexdigest()
+    return h[:32]
+
+
+class EnvFileGenerator(CanaryGenerator):
+    name = "env_file"
+
+    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+        base = ctx.http_base.rstrip("/")
+        slug = ctx.callback_token
+        api_url = f"{base}/c/{slug}"
+        body = (
+            "# Production environment — DO NOT COMMIT\n"
+            f"API_BASE_URL={api_url}\n"
+            f"WEBHOOK_NOTIFY_URL={api_url}/webhook\n"
+            f"DB_PASSWORD={_stable_token(slug, 'db:')}\n"
+            f"REDIS_URL=redis://:{_stable_token(slug, 'redis:')[:16]}@redis.internal:6379/0\n"
+            f"JWT_SECRET={_stable_token(slug, 'jwt:')}\n"
+            "LOG_LEVEL=info\n"
+            "ENVIRONMENT=production\n"
+        )
+        return CanaryArtifact(
+            path="",
+            content=body.encode("utf-8"),
+            mode=0o600,
+            mtime_offset=-86400 * 7,  # last edited a week ago
+            generator=self.name,
+            notes=[
+                f"API_BASE_URL embeds {api_url}",
+                f"WEBHOOK_NOTIFY_URL embeds {api_url}/webhook",
+            ],
+        )
--- a/decnet/canary/generators/git_config.py
+++ b/decnet/canary/generators/git_config.py
@@ -0,0 +1,53 @@
+"""Fake ``.git/config`` with an attacker-bait remote URL.
+
+The ``[remote "origin"]`` ``url`` field is the natural place to embed
+an HTTP-callback URL: it's normal for git remotes to be HTTPS, the
+URL is read by every git command an attacker runs (``git pull``,
+``git fetch``, ``git remote -v``), and the slug fits naturally as
+part of a path.
+
+The generator emits a plausible private-mirror remote (``git.<org>``
+or the canary host's hostname) so an attacker doesn't immediately
+recognise it as a honeypot.  The slug ends up in the URL path:
+
+    [remote "origin"]
+        url = https://canary.example.test/c/<slug>/repo.git
+"""
+from __future__ import annotations
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+
+
+class GitConfigGenerator(CanaryGenerator):
+    name = "git_config"
+
+    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+        # Strip trailing slash defensively — operator may have
+        # configured DECNET_CANARY_HTTP_BASE either way.
+        base = ctx.http_base.rstrip("/")
+        slug = ctx.callback_token
+        # The /c/<slug>/repo.git suffix gives us a realistic-looking
+        # path the worker can route on a single ``startswith("/c/")``
+        # check, while still surviving a quick grep for the slug.
+        url = f"{base}/c/{slug}/repo.git"
+        body = (
+            "[core]\n"
+            "\trepositoryformatversion = 0\n"
+            "\tfilemode = true\n"
+            "\tbare = false\n"
+            "\tlogallrefupdates = true\n"
+            "[remote \"origin\"]\n"
+            f"\turl = {url}\n"
+            "\tfetch = +refs/heads/*:refs/remotes/origin/*\n"
+            "[branch \"main\"]\n"
+            "\tremote = origin\n"
+            "\tmerge = refs/heads/main\n"
+        )
+        return CanaryArtifact(
+            path="",
+            content=body.encode("utf-8"),
+            mode=0o644,
+            mtime_offset=-86400 * 30,  # checked out a month ago
+            generator=self.name,
+            notes=[f"git remote 'origin' embeds {url}"],
+        )
--- a/decnet/canary/generators/honeydoc.py
+++ b/decnet/canary/generators/honeydoc.py
@@ -0,0 +1,61 @@
+"""Built-in honeydoc — a minimal HTML "report" with a tracking pixel.
+
+This is the *fallback* honeydoc used when the operator hasn't
+uploaded a real document.  The HTML instrumenter handles operator
+uploads via :mod:`decnet.canary.instrumenters.html`; this generator
+exists so the deploy-time baseline can plant *something* convincing
+without first prompting the operator to drop a file.
+
+The realism here is intentionally modest: a Documents-folder HTML
+page with internal-looking content and a 1×1 remote image at the
+bottom whose ``src`` is the canary callback URL.  Most desktop
+HTML renderers fetch the image as soon as the file is opened in a
+browser preview, so opening the doc trips the callback.
+
+Operators who want a richer artifact should upload their own DOCX
+or PDF; the corresponding instrumenter embeds the same callback in
+the appropriate format.
+"""
+from __future__ import annotations
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+
+
+class HoneydocGenerator(CanaryGenerator):
+    name = "honeydoc"
+
+    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+        base = ctx.http_base.rstrip("/")
+        slug = ctx.callback_token
+        pixel_url = f"{base}/c/{slug}"
+        body = (
+            "<!DOCTYPE html>\n"
+            "<html lang=\"en\">\n"
+            "<head>\n"
+            "<meta charset=\"utf-8\">\n"
+            "<title>Q3 Operations Review — DRAFT</title>\n"
+            "</head>\n"
+            "<body>\n"
+            "<h1>Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)</h1>\n"
+            "<p>Forecast and remediation timeline below. Numbers are\n"
+            "preliminary and subject to revision before the all-hands.</p>\n"
+            "<table>\n"
+            "<tr><th>Region</th><th>Incidents</th><th>MTTR (h)</th></tr>\n"
+            "<tr><td>us-east</td><td>14</td><td>3.2</td></tr>\n"
+            "<tr><td>us-west</td><td>9</td><td>4.7</td></tr>\n"
+            "<tr><td>eu-central</td><td>22</td><td>2.1</td></tr>\n"
+            "</table>\n"
+            "<p>Internal contact: <a href=\"mailto:secops@internal\">"
+            "secops@internal</a></p>\n"
+            f"<img src=\"{pixel_url}\" width=\"1\" height=\"1\" alt=\"\">\n"
+            "</body>\n"
+            "</html>\n"
+        )
+        return CanaryArtifact(
+            path="",
+            content=body.encode("utf-8"),
+            mode=0o644,  # docs are typically world-readable
+            mtime_offset=-86400 * 21,  # 3 weeks ago
+            generator=self.name,
+            notes=[f"tracking pixel src={pixel_url}"],
+        )
--- a/decnet/canary/generators/honeydoc_docx.py
+++ b/decnet/canary/generators/honeydoc_docx.py
@@ -0,0 +1,133 @@
+"""Real-DOCX honeydoc generator.
+
+Synthesises a minimal but structurally valid DOCX from scratch via
+stdlib :mod:`zipfile`, then uses the same external-image relationship
+trick that powers :mod:`decnet.canary.instrumenters.docx` to embed
+the callback URL.  No python-docx dependency.
+
+The output opens cleanly in Word / LibreOffice; both fetch the
+external image relationship on document load.
+"""
+from __future__ import annotations
+
+import io
+import zipfile
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+from decnet.canary.instrumenters.docx import _drawing, _next_rid
+
+
+_CONTENT_TYPES = (
+    '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
+    '<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
+    '<Default Extension="xml" ContentType="application/xml"/>'
+    '<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
+    '<Override PartName="/word/document.xml" '
+    'ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
+    '</Types>'
+).encode()
+
+_PACKAGE_RELS = (
+    '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
+    '<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
+    '<Relationship Id="rId1" '
+    'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" '
+    'Target="word/document.xml"/>'
+    '</Relationships>'
+).encode()
+
+_BODY_PARAGRAPHS = (
+    "Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)",
+    "",
+    "Forecast and remediation timeline below. Numbers are preliminary "
+    "and subject to revision before the all-hands.",
+    "",
+    "Region        Incidents     MTTR (h)",
+    "us-east       14            3.2",
+    "us-west       9             4.7",
+    "eu-central    22            2.1",
+    "",
+    "Internal contact: secops@internal",
+)
+
+
+def _document_xml(rid_with_drawing: str | None = None) -> bytes:
+    """Build the body XML.
+
+    ``rid_with_drawing`` is the rId of the external image relationship;
+    when set, we append the same ``<w:drawing>`` element that the DOCX
+    instrumenter inserts so the body references the external resource.
+    """
+    paragraphs = []
+    for line in _BODY_PARAGRAPHS:
+        if line:
+            paragraphs.append(
+                "<w:p><w:r><w:t xml:space=\"preserve\">"
+                + _xml_escape(line)
+                + "</w:t></w:r></w:p>"
+            )
+        else:
+            paragraphs.append("<w:p/>")
+    body = "".join(paragraphs)
+    drawing = _drawing(rid_with_drawing).decode() if rid_with_drawing else ""
+    return (
+        '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
+        '<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
+        f'<w:body>{body}{drawing}</w:body>'
+        '</w:document>'
+    ).encode()
+
+
+def _xml_escape(s: str) -> str:
+    return (
+        s.replace("&", "&amp;")
+         .replace("<", "&lt;")
+         .replace(">", "&gt;")
+    )
+
+
+def _document_rels(rid: str, url: str) -> bytes:
+    return (
+        '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
+        '<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
+        f'<Relationship Id="{rid}" '
+        f'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" '
+        f'Target="{url}" TargetMode="External"/>'
+        '</Relationships>'
+    ).encode()
+
+
+class HoneydocDocxGenerator(CanaryGenerator):
+    name = "honeydoc_docx"
+
+    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+        url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
+        # Pick a stable rId — there's only one relationship in the
+        # synthesised file, so any unused id works.  Reuse the
+        # instrumenter's allocator against the bare relationships
+        # skeleton for parity with operator-uploaded DOCX flow.
+        skeleton = (
+            b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
+            b'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
+            b'</Relationships>'
+        )
+        rid = _next_rid(skeleton)
+
+        out = io.BytesIO()
+        with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf:
+            zf.writestr("[Content_Types].xml", _CONTENT_TYPES)
+            zf.writestr("_rels/.rels", _PACKAGE_RELS)
+            zf.writestr("word/document.xml", _document_xml(rid))
+            zf.writestr("word/_rels/document.xml.rels", _document_rels(rid, url))
+
+        return CanaryArtifact(
+            path="",
+            content=out.getvalue(),
+            mode=0o644,
+            mtime_offset=-86400 * 21,
+            generator=self.name,
+            notes=[
+                "synthesised DOCX with realistic Q3 review body",
+                f"external-image relationship {rid} -> {url}",
+            ],
+        )
--- a/decnet/canary/generators/honeydoc_pdf.py
+++ b/decnet/canary/generators/honeydoc_pdf.py
@@ -0,0 +1,127 @@
+"""Real-PDF honeydoc generator (uses :mod:`pikepdf`).
+
+Builds a one-page PDF with the same Q3-review body as the HTML/DOCX
+flavors and installs an ``/OpenAction`` ``/URI`` action on the
+catalog so most viewers fire the callback the moment the document
+opens.
+
+Pikepdf is now a hard dependency for this generator (the operator
+installed it explicitly so we can use it).  We still surface a
+clear :class:`InstrumenterRejectedError` when imports fail, so a
+deployment without pikepdf can fall back to the DOCX or HTML
+generators rather than crashing the API.
+"""
+from __future__ import annotations
+
+import io
+
+from decnet.canary.base import (
+    CanaryArtifact,
+    CanaryContext,
+    CanaryGenerator,
+    InstrumenterRejectedError,
+)
+
+
+_BODY_LINES = (
+    ("Q3 Operations Review (DRAFT — DO NOT DISTRIBUTE)", 14),
+    ("", 12),
+    ("Forecast and remediation timeline below.", 11),
+    ("Numbers are preliminary, subject to revision.", 11),
+    ("", 12),
+    ("Region        Incidents     MTTR (h)", 11),
+    ("us-east       14            3.2", 11),
+    ("us-west       9             4.7",  11),
+    ("eu-central    22            2.1",  11),
+    ("", 12),
+    ("Internal contact: secops@internal", 11),
+)
+
+
+class HoneydocPdfGenerator(CanaryGenerator):
+    name = "honeydoc_pdf"
+
+    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+        try:
+            from pikepdf import Pdf, Name, Dictionary, String  # type: ignore[import-not-found]
+        except ImportError as e:
+            raise InstrumenterRejectedError(
+                "honeydoc_pdf requires pikepdf; install it (`pip install "
+                "pikepdf`) or pick honeydoc / honeydoc_docx instead."
+            ) from e
+
+        url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
+
+        pdf = Pdf.new()
+        # Helvetica is one of the 14 PDF base fonts — every viewer ships
+        # it, so no font embedding is required.
+        font = pdf.make_indirect(Dictionary(
+            Type=Name("/Font"),
+            Subtype=Name("/Type1"),
+            BaseFont=Name("/Helvetica"),
+        ))
+
+        # Build a single content stream that writes each body line at a
+        # decreasing y-coordinate.  PDF coordinates start at the bottom-
+        # left (US Letter = 612 x 792 points); we lay out lines roughly
+        # 18 points apart starting near the top.
+        ops: list[str] = ["BT /F1 12 Tf 72 750 Td"]
+        first = True
+        for line, size in _BODY_LINES:
+            if not first:
+                ops.append("0 -18 Td")
+            first = False
+            ops.append(f"/F1 {size} Tf")
+            ops.append(f"({_pdf_escape(line)}) Tj")
+        ops.append("ET")
+        content_bytes = "\n".join(ops).encode("latin-1")
+
+        content_stream = pdf.make_stream(content_bytes)
+
+        page = pdf.add_blank_page(page_size=(612, 792))
+        page[Name("/Resources")] = Dictionary(
+            Font=Dictionary(F1=font),
+        )
+        page[Name("/Contents")] = content_stream
+
+        # OpenAction fires the URI when the file is opened in Acrobat,
+        # Preview, the browser PDF viewer, etc.  Most viewers prompt
+        # before fetching; that prompt itself is a tell, and an
+        # auto-allow viewer fetches silently.
+        pdf.Root[Name("/OpenAction")] = Dictionary(
+            Type=Name("/Action"),
+            S=Name("/URI"),
+            URI=String(url),
+        )
+
+        out = io.BytesIO()
+        pdf.save(out)
+        return CanaryArtifact(
+            path="",
+            content=out.getvalue(),
+            mode=0o644,
+            mtime_offset=-86400 * 21,
+            generator=self.name,
+            notes=[
+                "synthesised one-page PDF with realistic Q3 review body",
+                f"/OpenAction /URI -> {url}",
+            ],
+        )
+
+
+def _pdf_escape(s: str) -> str:
+    """Escape parens and backslashes for PDF literal-string syntax.
+
+    PDF string literals are wrapped in ``( … )``; inner ``(``, ``)``,
+    and ``\\`` need backslash escapes.  Everything else (including
+    UTF-8 multibyte sequences) round-trips fine because Helvetica's
+    encoding is WinAnsi-ish — we'll lose exotic glyphs but the
+    realistic body sticks to ASCII anyway.  Em-dashes are downgraded
+    to ``--`` to avoid the WinAnsi gap.
+    """
+    return (
+        s.replace("\\", r"\\")
+         .replace("(", r"\(")
+         .replace(")", r"\)")
+         .replace("—", "--")
+    )
--- a/decnet/canary/generators/mysql_dump.py
+++ b/decnet/canary/generators/mysql_dump.py
@@ -0,0 +1,190 @@
+"""Fake ``mysqldump`` output that phones home on import.
+
+Mirrors the Canarytokens.org MySQL-dump trick.  When a victim runs
+``mysql < dump.sql``, the trailer block executes a base64-obfuscated
+``CHANGE REPLICATION SOURCE TO`` against ``<slug>.canary.<dns_zone>``
+followed by ``START REPLICA``.  The victim's MySQL daemon then:
+
+1. Resolves the slug subdomain via DNS — this is the trip our
+   :mod:`decnet.canary.dns_server` already detects.
+2. Opens a TCP replica handshake on port 3306, sending its own
+   ``@@hostname`` and ``@@lc_time_names`` smuggled into the
+   ``SOURCE_USER`` field via ``CONCAT``.  Capturing those bytes
+   requires a MySQL handshake responder on the worker — out of scope
+   for v1; the DNS lookup alone is sufficient for detection.
+
+The base64 wrapper is the camouflage: a plain ``grep canary dump.sql``
+finds nothing.  The slug only materialises when the victim's server
+runs ``PREPARE … FROM @s2``.
+
+Because the trip surface is DNS, this generator REQUIRES a non-empty
+``dns_zone``.  The slug must appear as the leftmost label of the
+hostname so a single DNS query identifies the token; the http_base
+host is not slug-bearing and can't substitute.
+"""
+from __future__ import annotations
+
+import base64
+import hashlib
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+
+
+def _stable_hex(seed: str, prefix: str = "", length: int = 16) -> str:
+    h = hashlib.sha256((prefix + seed).encode()).hexdigest()
+    return h[:length]
+
+
+def _build_replica_payload(slug: str, dns_zone: str) -> str:
+    """Inner SQL that gets base64-wrapped.
+
+    The CONCAT splices ``@@lc_time_names`` and ``@@hostname`` into the
+    ``SOURCE_USER`` value at PREPARE time so the victim's locale and
+    hostname travel as the replica username on the 3306 handshake.
+    """
+    host = f"{slug}.{dns_zone}"
+    return (
+        "SET @bb = CONCAT("
+        "\"CHANGE REPLICATION SOURCE TO "
+        "SOURCE_PASSWORD='replica-pw', "
+        "SOURCE_RETRY_COUNT=1, "
+        "SOURCE_PORT=3306, "
+        f"SOURCE_HOST='{host}', "
+        "SOURCE_SSL=0, "
+        f"SOURCE_USER='{slug}\", "
+        "@@lc_time_names, @@hostname, \"';\");"
+    )
+
+
+def _build_trailer(slug: str, dns_zone: str) -> str:
+    inner = _build_replica_payload(slug, dns_zone)
+    encoded = base64.b64encode(inner.encode("utf-8")).decode("ascii")
+    return (
+        f"SET @b = '{encoded}';\n"
+        "SET @s2 = FROM_BASE64(@b);\n"
+        "PREPARE stmt1 FROM @s2;\n"
+        "EXECUTE stmt1;\n"
+        "PREPARE stmt2 FROM @bb;\n"
+        "EXECUTE stmt2;\n"
+        "START REPLICA;\n"
+    )
+
+
+class MySQLDumpGenerator(CanaryGenerator):
+    name = "mysql_dump"
+
+    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+        if not ctx.dns_zone:
+            raise ValueError(
+                "mysql_dump requires a non-empty dns_zone — the trip "
+                "surface is a DNS lookup of <slug>.<dns_zone>."
+            )
+        slug = ctx.callback_token
+        zone = ctx.dns_zone
+        host = f"{slug}.{zone}"
+
+        # Realism filler: deterministic per-slug fake user rows so two
+        # runs with the same context produce byte-identical output
+        # (planter idempotency contract).
+        u1_hash = _stable_hex(slug, "u1:", 32)
+        u2_hash = _stable_hex(slug, "u2:", 32)
+        api_token = _stable_hex(slug, "api:", 40)
+
+        # Synthesised SQL bait below — never executed by us, only by
+        # whoever runs ``mysql < dump.sql`` against their own server.
+        # Built with .format() instead of f-strings so bandit's B608
+        # heuristic doesn't false-positive on the "INSERT INTO" + var
+        # pattern.
+        users_insert = (
+            "INSERT INTO `users` VALUES "  # nosec B608
+            "(1,'alice@app.internal','$2y$10${u1a}.{u1b}','2024-11-12 09:13:44'),"
+            "(2,'bob@app.internal','$2y$10${u2a}.{u2b}','2025-02-03 17:42:08');\n"
+        ).replace("{u1a}", u1_hash[:22]).replace("{u1b}", u1_hash[22:]) \
+         .replace("{u2a}", u2_hash[:22]).replace("{u2b}", u2_hash[22:])
+        api_keys_insert = (
+            "INSERT INTO `api_keys` VALUES (1,1,'{tok}');\n"  # nosec B608
+        ).replace("{tok}", api_token)
+        header = (
+            "-- MySQL dump 10.13  Distrib 8.0.35, for Linux (x86_64)\n"
+            "--\n"
+            "-- Host: db-prod-01    Database: app_production\n"
+            "-- ------------------------------------------------------\n"
+            "-- Server version\t8.0.35\n"
+            "\n"
+            "/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;\n"
+            "/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;\n"
+            "/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;\n"
+            "/*!50503 SET NAMES utf8mb4 */;\n"
+            "/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;\n"
+            "/*!40103 SET TIME_ZONE='+00:00' */;\n"
+            "/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;\n"
+            "/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;\n"
+            "/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;\n"
+            "/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;\n"
+            "\n"
+            "--\n"
+            "-- Table structure for table `users`\n"
+            "--\n"
+            "\n"
+            "DROP TABLE IF EXISTS `users`;\n"
+            "CREATE TABLE `users` (\n"
+            "  `id` int unsigned NOT NULL AUTO_INCREMENT,\n"
+            "  `email` varchar(255) NOT NULL,\n"
+            "  `password_hash` char(60) NOT NULL,\n"
+            "  `created_at` datetime NOT NULL,\n"
+            "  PRIMARY KEY (`id`),\n"
+            "  UNIQUE KEY `uniq_email` (`email`)\n"
+            ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;\n"
+            "\n"
+            "LOCK TABLES `users` WRITE;\n"
+            + users_insert +
+            "UNLOCK TABLES;\n"
+            "\n"
+            "--\n"
+            "-- Table structure for table `api_keys`\n"
+            "--\n"
+            "\n"
+            "DROP TABLE IF EXISTS `api_keys`;\n"
+            "CREATE TABLE `api_keys` (\n"
+            "  `id` int unsigned NOT NULL AUTO_INCREMENT,\n"
+            "  `user_id` int unsigned NOT NULL,\n"
+            "  `token` char(40) NOT NULL,\n"
+            "  PRIMARY KEY (`id`)\n"
+            ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;\n"
+            "\n"
+            "LOCK TABLES `api_keys` WRITE;\n"
+            + api_keys_insert +
+            "UNLOCK TABLES;\n"
+            "\n"
+        )
+
+        trailer_replica = _build_trailer(slug, zone)
+
+        trailer_close = (
+            "\n"
+            "/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;\n"
+            "/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;\n"
+            "/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;\n"
+            "/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;\n"
+            "/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;\n"
+            "/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;\n"
+            "/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;\n"
+            "/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;\n"
+            "\n"
+            "-- Dump completed\n"
+        )
+
+        body = header + trailer_replica + trailer_close
+
+        return CanaryArtifact(
+            path="",
+            content=body.encode("utf-8"),
+            mode=0o600,
+            mtime_offset=-86400 * 7,  # last week's backup
+            generator=self.name,
+            notes=[
+                f"replica payload phones home to {host}:3306 on import",
+                "base64-wrapped PREPARE/EXECUTE block hides the slug from grep",
+                "@@hostname and @@lc_time_names smuggled into SOURCE_USER",
+            ],
+        )
--- a/decnet/canary/generators/ssh_key.py
+++ b/decnet/canary/generators/ssh_key.py
@@ -0,0 +1,68 @@
+"""Fake SSH private key with the callback host in the comment.
+
+OpenSSH private keys carry a free-form comment field — typically
+``user@host`` — that's preserved across rounds of ``ssh-keygen -p``.
+We embed the canary host as the ``user@host`` so an attacker who
+imports the key into their own keyring or runs ``ssh-keygen -lf`` on
+it sees a hostname they may then try to reach.
+
+The key bytes themselves are syntactically valid (PEM envelope, base64
+body) but cryptographically junk — the body is a deterministic SHA-256
+hash of the slug repeated to the right length.  We don't ship a real
+RSA/Ed25519 key because (a) we don't want a real private key sitting
+on disk pretending to be valuable, and (b) the attacker ``cat``-ing
+the file or running ``ssh -i`` will trigger the callback regardless
+of cryptographic validity.
+
+The DNS-callback variant uses ``<slug>.canary.<dns_zone>`` as the
+hostname so a bare ``ssh-keygen -lf`` on the file resolves a unique
+subdomain even if the attacker never hits HTTP.
+"""
+from __future__ import annotations
+
+import base64
+import hashlib
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryGenerator
+
+
+def _fake_key_body(seed: str) -> str:
+    # Real OpenSSH keys are several hundred base64 chars; we make a
+    # plausible-looking 24-line block from a SHA-256-derived stream.
+    h = hashlib.sha256(seed.encode()).digest()
+    long_stream = (h * 32)[:768]  # 768 bytes → ~1024 base64 chars
+    encoded = base64.b64encode(long_stream).decode()
+    # Wrap at 70 chars per line — same shape ``ssh-keygen`` produces.
+    return "\n".join(encoded[i:i + 70] for i in range(0, len(encoded), 70))
+
+
+class SSHKeyGenerator(CanaryGenerator):
+    name = "ssh_key"
+
+    def generate(self, ctx: CanaryContext) -> CanaryArtifact:
+        slug = ctx.callback_token
+        body = _fake_key_body(slug)
+        # Hostname for the comment: prefer DNS-zone form when the
+        # operator has DNS deployed (so ssh-keygen -lf names a subdomain
+        # the attacker may resolve); fall back to the http_base host
+        # otherwise.
+        if ctx.dns_zone:
+            host_comment = f"deploy@{slug}.{ctx.dns_zone}"
+        else:
+            from urllib.parse import urlparse
+            host = urlparse(ctx.http_base).hostname or "deploy.local"
+            host_comment = f"deploy@{host}"
+        content = (
+            "-----BEGIN OPENSSH PRIVATE KEY-----\n"
+            f"{body}\n"
+            "-----END OPENSSH PRIVATE KEY-----\n"
+            f"# {host_comment}\n"
+        )
+        return CanaryArtifact(
+            path="",
+            content=content.encode("utf-8"),
+            mode=0o600,
+            mtime_offset=-86400 * 60,  # 2 months ago
+            generator=self.name,
+            notes=[f"comment line embeds {host_comment}"],
+        )
--- a/decnet/canary/instrumenters/init.py
+++ b/decnet/canary/instrumenters/init.py
@@ -0,0 +1,4 @@
+"""Built-in canary instrumenters (operator-uploaded artifact mutation).
+
+Lazy-imported by :func:`decnet.canary.factory.get_instrumenter`.
+"""
--- a/decnet/canary/instrumenters/docx.py
+++ b/decnet/canary/instrumenters/docx.py
@@ -0,0 +1,147 @@
+"""DOCX instrumenter — inject a remote image into the body.
+
+DOCX files are zip archives carrying ``word/document.xml`` (the body)
+and ``word/_rels/document.xml.rels`` (the relationship table that
+maps ``rId`` references to URLs).  We:
+
+1. Add a new relationship of type ``image`` whose target is the
+   canary callback URL and ``TargetMode="External"``.
+2. Add a tiny ``<w:drawing>`` element referencing that ``rId`` at
+   the end of ``word/document.xml`` (just before ``</w:body>``).
+
+Word and LibreOffice both fetch external image relationships when
+the document is opened (subject to the user's "trusted source"
+toggle, which most enterprise environments disable in favour of
+"warn but allow").
+
+We use stdlib ``zipfile`` only — no python-docx dependency — because
+the surface we touch is two small XML files and we don't need any of
+the higher-level abstractions.
+"""
+from __future__ import annotations
+
+import io
+import re
+import zipfile
+from typing import Tuple
+
+from decnet.canary.base import (
+    CanaryArtifact,
+    CanaryContext,
+    CanaryInstrumenter,
+    InstrumenterRejectedError,
+)
+
+
+_RELS_END = re.compile(rb"</Relationships\s*>", re.IGNORECASE)
+_BODY_END = re.compile(rb"</w:body\s*>", re.IGNORECASE)
+
+
+def _next_rid(rels_xml: bytes) -> str:
+    """Return an rId not already taken in the relationships file.
+
+    Word's loader tolerates non-sequential ids, so we just pick one
+    well above the typical range to avoid collisions.
+    """
+    used = set(m.group(1).decode() for m in re.finditer(rb'Id="(rId\d+)"', rels_xml))
+    for n in range(900, 9999):
+        rid = f"rId{n}"
+        if rid not in used:
+            return rid
+    raise InstrumenterRejectedError("DOCX has too many relationships to allocate a new rId")
+
+
+def _inject_relationship(rels_xml: bytes, rid: str, url: str) -> bytes:
+    rel = (
+        f'<Relationship Id="{rid}" '
+        f'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" '
+        f'Target="{url}" TargetMode="External"/>'
+    ).encode()
+    match = _RELS_END.search(rels_xml)
+    if not match:
+        raise InstrumenterRejectedError(
+            "DOCX rels file has no </Relationships>; refusing to mutate"
+        )
+    return rels_xml[:match.start()] + rel + rels_xml[match.start():]
+
+
+def _drawing(rid: str) -> bytes:
+    # Minimal w:drawing tree referencing the external image at rid.
+    # Dimensions are 1 EMU x 1 EMU so the image is invisible; Word
+    # still fetches the resource on document load.
+    return (
+        '<w:p><w:r><w:drawing>'
+        '<wp:inline xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing">'
+        '<wp:extent cx="1" cy="1"/><wp:docPr id="1" name="canary"/>'
+        '<a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main">'
+        '<a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture">'
+        '<pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture">'
+        '<pic:nvPicPr><pic:cNvPr id="1" name="canary"/><pic:cNvPicPr/></pic:nvPicPr>'
+        '<pic:blipFill>'
+        f'<a:blip xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" r:link="{rid}"/>'
+        '<a:stretch><a:fillRect/></a:stretch>'
+        '</pic:blipFill>'
+        '<pic:spPr><a:xfrm><a:off x="0" y="0"/><a:ext cx="1" cy="1"/></a:xfrm>'
+        '<a:prstGeom prst="rect"><a:avLst/></a:prstGeom></pic:spPr>'
+        '</pic:pic></a:graphicData></a:graphic></wp:inline>'
+        '</w:drawing></w:r></w:p>'
+    ).encode()
+
+
+def _inject_drawing(document_xml: bytes, rid: str) -> bytes:
+    match = _BODY_END.search(document_xml)
+    if not match:
+        raise InstrumenterRejectedError("DOCX document.xml has no </w:body>")
+    drawing = _drawing(rid)
+    return document_xml[:match.start()] + drawing + document_xml[match.start():]
+
+
+def _mutate(blob: bytes, url: str) -> Tuple[bytes, str]:
+    try:
+        with zipfile.ZipFile(io.BytesIO(blob), "r") as zf:
+            try:
+                rels = zf.read("word/_rels/document.xml.rels")
+                doc = zf.read("word/document.xml")
+            except KeyError as e:
+                raise InstrumenterRejectedError(
+                    f"DOCX missing expected member: {e.args[0]!r}"
+                ) from e
+            members = [(zi, zf.read(zi.filename)) for zi in zf.infolist()]
+    except zipfile.BadZipFile as e:
+        raise InstrumenterRejectedError("uploaded blob is not a valid DOCX zip") from e
+
+    rid = _next_rid(rels)
+    new_rels = _inject_relationship(rels, rid, url)
+    new_doc = _inject_drawing(doc, rid)
+
+    out = io.BytesIO()
+    with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf_out:
+        for zi, data in members:
+            if zi.filename == "word/_rels/document.xml.rels":
+                zf_out.writestr(zi.filename, new_rels)
+            elif zi.filename == "word/document.xml":
+                zf_out.writestr(zi.filename, new_doc)
+            else:
+                zf_out.writestr(zi, data)
+    return out.getvalue(), rid
+
+
+class DocxInstrumenter(CanaryInstrumenter):
+    name = "docx"
+    mime_prefixes = (
+        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    )
+
+    def instrument(
+        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
+    ) -> CanaryArtifact:
+        url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
+        mutated, rid = _mutate(blob, url)
+        return CanaryArtifact(
+            path=target_path,
+            content=mutated,
+            mode=0o644,
+            mtime_offset=-86400 * 14,
+            instrumenter=self.name,
+            notes=[f"injected external-image relationship {rid} -> {url}"],
+        )
--- a/decnet/canary/instrumenters/html.py
+++ b/decnet/canary/instrumenters/html.py
@@ -0,0 +1,45 @@
+"""HTML instrumenter — append a 1×1 tracking pixel.
+
+Stdlib-only.  We don't parse the HTML; we just inject the ``<img>``
+tag immediately before the closing ``</body>`` (or, failing that, at
+the end of the document).  Most renderers that support remote images
+(email previewers, IDE doc previews, browsers) will fetch it as
+soon as the document is opened.
+"""
+from __future__ import annotations
+
+import re
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryInstrumenter
+
+
+_BODY_CLOSE = re.compile(rb"</body\s*>", re.IGNORECASE)
+
+
+class HtmlInstrumenter(CanaryInstrumenter):
+    name = "html"
+    mime_prefixes = ("text/html", "application/xhtml+xml")
+
+    def instrument(
+        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
+    ) -> CanaryArtifact:
+        url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}".encode()
+        pixel = (
+            b"<img src=\"" + url + b"\" width=\"1\" height=\"1\" "
+            b"alt=\"\" style=\"display:none\">\n"
+        )
+        match = _BODY_CLOSE.search(blob)
+        if match:
+            out = blob[:match.start()] + pixel + blob[match.start():]
+            note = "injected 1x1 pixel before </body>"
+        else:
+            out = (blob if blob.endswith(b"\n") else blob + b"\n") + pixel
+            note = "appended 1x1 pixel (no </body> found)"
+        return CanaryArtifact(
+            path=target_path,
+            content=out,
+            mode=0o644,
+            mtime_offset=-86400 * 7,
+            instrumenter=self.name,
+            notes=[note, f"pixel src={url.decode()}"],
+        )
--- a/decnet/canary/instrumenters/image.py
+++ b/decnet/canary/instrumenters/image.py
@@ -0,0 +1,72 @@
+"""Image instrumenter — requires :mod:`PIL` (optional dependency).
+
+For PNG/JPEG/GIF we append a tEXt/EXIF chunk carrying the slug so
+``exiftool`` / ``identify -verbose`` surface the slug, then route the
+detection via a sibling **plain-text companion file**.  The image
+itself can't really embed an HTTP fetcher — image decoders don't
+run network requests on decode — so the realistic detection surface
+is "attacker exfils the image, runs metadata tools on it, hits our
+URL when curious about the embedded marker."
+
+When Pillow isn't installed we reject and direct the operator to
+``passthrough`` (which preserves the bytes; the slug then lives in
+the filename only).
+"""
+from __future__ import annotations
+
+import io
+
+from decnet.canary.base import (
+    CanaryArtifact,
+    CanaryContext,
+    CanaryInstrumenter,
+    InstrumenterRejectedError,
+)
+
+
+class ImageInstrumenter(CanaryInstrumenter):
+    name = "image"
+    mime_prefixes = ("image/png", "image/jpeg", "image/gif")
+
+    def instrument(
+        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
+    ) -> CanaryArtifact:
+        try:
+            from PIL import Image, PngImagePlugin  # type: ignore[import-not-found]
+        except ImportError as e:
+            raise InstrumenterRejectedError(
+                "image instrumenter requires Pillow; install it (`pip "
+                "install Pillow`) or re-upload the artifact with "
+                "kind=passthrough so it ships unmodified."
+            ) from e
+
+        slug_url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
+        try:
+            buf_in = io.BytesIO(blob)
+            img = Image.open(buf_in)
+            fmt = (img.format or "").upper()
+            buf_out = io.BytesIO()
+            if fmt == "PNG":
+                meta = PngImagePlugin.PngInfo()
+                meta.add_text("Comment", f"reference: {slug_url}")
+                meta.add_text("X-Canary", ctx.callback_token)
+                img.save(buf_out, format="PNG", pnginfo=meta)
+            elif fmt in ("JPEG", "JPG"):
+                # Pillow encodes JPEG comments via the ``comment`` kwarg.
+                img.save(buf_out, format="JPEG", comment=slug_url.encode())
+            else:
+                # GIF and friends — Pillow doesn't expose comment metadata
+                # uniformly. Re-encode as-is and skip the metadata embed.
+                img.save(buf_out, format=fmt or "PNG")
+            mutated = buf_out.getvalue()
+        except Exception as e:
+            raise InstrumenterRejectedError(f"failed to instrument image: {e!s}") from e
+
+        return CanaryArtifact(
+            path=target_path,
+            content=mutated,
+            mode=0o644,
+            mtime_offset=-86400 * 30,
+            instrumenter=self.name,
+            notes=[f"image metadata carries {slug_url} (slug={ctx.callback_token})"],
+        )
--- a/decnet/canary/instrumenters/passthrough.py
+++ b/decnet/canary/instrumenters/passthrough.py
@@ -0,0 +1,37 @@
+"""Passthrough instrumenter — bytes go to disk unchanged.
+
+Used as the dispatch fallback for content types we can't safely
+mutate (random binary blobs, container images, archives we don't
+recognise).  In passthrough mode the only callback surface is the
+:attr:`CanaryToken.placement_path` itself: the operator must use a
+DNS-callback token whose slug appears in the filename, so a
+listing/access at the OS level resolves the slug as part of the
+path (e.g. ``/etc/<slug>.canary.example.test/secrets.bin``) when
+the attacker greps for hostnames in their loot.
+
+The instrumenter does not enforce that — the API does, when it sees
+``instrumenter=passthrough`` with ``kind=http`` it returns 400.
+"""
+from __future__ import annotations
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryInstrumenter
+
+
+class PassthroughInstrumenter(CanaryInstrumenter):
+    name = "passthrough"
+    mime_prefixes = ()  # dispatched by fallback in pick_instrumenter_for_mime
+
+    def instrument(
+        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
+    ) -> CanaryArtifact:
+        return CanaryArtifact(
+            path=target_path,
+            content=blob,
+            mode=0o644,
+            mtime_offset=-86400 * 7,
+            instrumenter=self.name,
+            notes=[
+                "passthrough: bytes unchanged — only DNS-callback tokens "
+                "trip detection (slug must live in the placement path)",
+            ],
+        )
--- a/decnet/canary/instrumenters/pdf.py
+++ b/decnet/canary/instrumenters/pdf.py
@@ -0,0 +1,76 @@
+"""PDF instrumenter — requires :mod:`pikepdf` (optional dependency).
+
+PDF embedding is non-trivial: the cleanest place to put a callback
+is an ``/AA`` (additional actions) ``/O`` (open) entry on the
+catalog or a ``/URI`` action on a link annotation.  Either path
+needs proper xref-table updates — pikepdf handles that for us.
+
+If pikepdf isn't available in the environment the instrumenter
+raises :class:`InstrumenterRejectedError` so the API can return a
+clear 400 directing the operator to either install pikepdf or
+re-upload as ``passthrough``.
+
+We don't ship a stdlib fallback because every "naive" PDF mutation
+I'm aware of (appending raw bytes, splicing into the trailer, etc.)
+breaks the document's xref table and trips a "file is corrupt"
+warning in modern viewers — which the attacker will absolutely
+notice.
+"""
+from __future__ import annotations
+
+from decnet.canary.base import (
+    CanaryArtifact,
+    CanaryContext,
+    CanaryInstrumenter,
+    InstrumenterRejectedError,
+)
+
+
+class PdfInstrumenter(CanaryInstrumenter):
+    name = "pdf"
+    mime_prefixes = ("application/pdf",)
+
+    def instrument(
+        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
+    ) -> CanaryArtifact:
+        try:
+            import pikepdf  # type: ignore[import-not-found]
+        except ImportError as e:
+            raise InstrumenterRejectedError(
+                "PDF instrumenter requires pikepdf; install it (`pip "
+                "install pikepdf`) or re-upload the artifact with "
+                "kind=passthrough so it ships unmodified."
+            ) from e
+
+        url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
+        try:
+            import io
+            buf = io.BytesIO(blob)
+            with pikepdf.open(buf) as pdf:
+                # Add an OpenAction that fires a URI action on document
+                # open. Most viewers prompt before fetching; that's
+                # fine — even the prompt itself can trip a "user
+                # interacted with the document" tell, and an
+                # auto-allow viewer fetches the URL silently.
+                action = pikepdf.Dictionary(
+                    Type=pikepdf.Name("/Action"),
+                    S=pikepdf.Name("/URI"),
+                    URI=pikepdf.String(url),
+                )
+                pdf.Root[pikepdf.Name("/OpenAction")] = action
+                out = io.BytesIO()
+                pdf.save(out)
+                mutated = out.getvalue()
+        except Exception as e:
+            raise InstrumenterRejectedError(
+                f"failed to instrument PDF: {e!s}"
+            ) from e
+
+        return CanaryArtifact(
+            path=target_path,
+            content=mutated,
+            mode=0o644,
+            mtime_offset=-86400 * 14,
+            instrumenter=self.name,
+            notes=[f"installed /OpenAction /URI -> {url}"],
+        )
--- a/decnet/canary/instrumenters/plain.py
+++ b/decnet/canary/instrumenters/plain.py
@@ -0,0 +1,79 @@
+"""Plain-text / config-file instrumenter.
+
+Two embedding strategies, picked in order:
+
+1. **Token substitution.**  If the blob contains the literal
+   placeholder ``{{CANARY_URL}}`` or ``{{CANARY_HOST}}``, replace it.
+   This gives operators full control over where the slug lands —
+   they can pre-edit the file with placeholders before uploading.
+2. **Append.**  Otherwise, append a comment line that mentions the
+   callback URL.  The comment style adapts to the file's apparent
+   syntax (``#`` for shell/yaml/python/dockerfile, ``//`` for json5/
+   javascript-ish, ``;`` for ini).
+
+Operators who want neither behavior should upload the file as
+``passthrough``.
+"""
+from __future__ import annotations
+
+from decnet.canary.base import CanaryArtifact, CanaryContext, CanaryInstrumenter
+
+
+_SLASH_HINTS = (b"//", b"function ", b"const ", b"let ", b"var ")
+_SEMI_HINTS = (b"[default]", b"[section]", b"\n[")
+
+
+def _comment_prefix(blob: bytes) -> bytes:
+    head = blob[:512]
+    if any(h in head for h in _SEMI_HINTS):
+        return b"; "
+    if any(h in head for h in _SLASH_HINTS):
+        return b"// "
+    # Default to # — the most common comment glyph across config files
+    # we'd plausibly canary.
+    return b"# "
+
+
+class PlainInstrumenter(CanaryInstrumenter):
+    name = "plain"
+    mime_prefixes = ("text/", "application/json", "application/yaml", "application/toml")
+
+    def instrument(
+        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
+    ) -> CanaryArtifact:
+        base = ctx.http_base.rstrip("/")
+        callback_url = f"{base}/c/{ctx.callback_token}".encode()
+        callback_host = (
+            f"{ctx.callback_token}.{ctx.dns_zone}".encode()
+            if ctx.dns_zone else b""
+        )
+        notes: list[str] = []
+        out = blob
+
+        if b"{{CANARY_URL}}" in blob:
+            out = out.replace(b"{{CANARY_URL}}", callback_url)
+            notes.append(f"substituted {{{{CANARY_URL}}}} -> {callback_url.decode()}")
+        if b"{{CANARY_HOST}}" in blob and callback_host:
+            out = out.replace(b"{{CANARY_HOST}}", callback_host)
+            notes.append(f"substituted {{{{CANARY_HOST}}}} -> {callback_host.decode()}")
+
+        if not notes:
+            # No placeholders — append a comment line at the end.
+            prefix = _comment_prefix(blob)
+            tail = (
+                b"\n" + prefix + b"see " + callback_url
+                + b" for the latest version\n"
+            )
+            out = (out if out.endswith(b"\n") else out + b"\n") + tail
+            notes.append(
+                f"appended comment line carrying {callback_url.decode()}"
+            )
+
+        return CanaryArtifact(
+            path=target_path,
+            content=out,
+            mode=0o644,
+            mtime_offset=-86400 * 7,
+            instrumenter=self.name,
+            notes=notes,
+        )
--- a/decnet/canary/instrumenters/xlsx.py
+++ b/decnet/canary/instrumenters/xlsx.py
@@ -0,0 +1,95 @@
+"""XLSX instrumenter — embed an external-image link.
+
+XLSX is structurally identical to DOCX (Office Open XML zip).  The
+injection target is the workbook's relationships file
+(``xl/_rels/workbook.xml.rels``).  We add an external image
+relationship there; Excel/LibreOffice fetch external images on
+workbook open in the same way Word does.
+
+We don't inject a ``<drawing>`` element into a sheet because that
+requires touching ``xl/worksheets/sheetN.xml`` *and* allocating a new
+``xl/drawings/drawingN.xml`` part — much higher chance of mangling
+the file.  An orphan external image relationship is enough: many
+Office viewers fetch all relationships at open time regardless of
+whether they're referenced from a sheet.
+
+If the operator wants a stronger trigger (image visible in the
+sheet, fetched even by viewers that lazy-load external resources)
+they should embed the slug as a hyperlink cell content via the
+``plain``/``passthrough`` instrumenters.
+"""
+from __future__ import annotations
+
+import io
+import zipfile
+from typing import Tuple
+
+from decnet.canary.base import (
+    CanaryArtifact,
+    CanaryContext,
+    CanaryInstrumenter,
+    InstrumenterRejectedError,
+)
+from decnet.canary.instrumenters.docx import _inject_relationship, _next_rid
+
+
+_RELS_PATHS = (
+    "xl/_rels/workbook.xml.rels",
+    "xl/_rels/sharedStrings.xml.rels",
+)
+
+
+def _mutate(blob: bytes, url: str) -> Tuple[bytes, str, str]:
+    try:
+        with zipfile.ZipFile(io.BytesIO(blob), "r") as zf:
+            members = [(zi, zf.read(zi.filename)) for zi in zf.infolist()]
+    except zipfile.BadZipFile as e:
+        raise InstrumenterRejectedError("uploaded blob is not a valid XLSX zip") from e
+
+    target_rels: str | None = None
+    for zi, _ in members:
+        if zi.filename in _RELS_PATHS:
+            target_rels = zi.filename
+            break
+    if not target_rels:
+        raise InstrumenterRejectedError(
+            "XLSX has no workbook relationships file to mutate"
+        )
+
+    out_members = []
+    rid = ""
+    for zi, data in members:
+        if zi.filename == target_rels:
+            rid = _next_rid(data)
+            data = _inject_relationship(data, rid, url)
+        out_members.append((zi, data))
+
+    out = io.BytesIO()
+    with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf_out:
+        for zi, data in out_members:
+            zf_out.writestr(zi, data)
+    return out.getvalue(), rid, target_rels
+
+
+class XlsxInstrumenter(CanaryInstrumenter):
+    name = "xlsx"
+    mime_prefixes = (
+        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    )
+
+    def instrument(
+        self, blob: bytes, ctx: CanaryContext, *, target_path: str,
+    ) -> CanaryArtifact:
+        url = f"{ctx.http_base.rstrip('/')}/c/{ctx.callback_token}"
+        mutated, rid, target_rels = _mutate(blob, url)
+        return CanaryArtifact(
+            path=target_path,
+            content=mutated,
+            mode=0o644,
+            mtime_offset=-86400 * 14,
+            instrumenter=self.name,
+            notes=[
+                f"injected external-image relationship {rid} into "
+                f"{target_rels} -> {url}",
+            ],
+        )
--- a/decnet/canary/paths.py
+++ b/decnet/canary/paths.py
@@ -0,0 +1,82 @@
+"""Persona-aware path resolution for canary artifacts.
+
+Linux-persona deckies use POSIX-shaped paths under ``/home/<user>``.
+"Windows" personas (still Linux containers under the hood — see
+:mod:`decnet.archetypes`) use Windows-shaped paths under
+``/home/<user>/AppData/...`` so an attacker browsing the filesystem
+through a planted RDP/SMB session sees the right shape.
+
+The persona lookup is best-effort: callers pass the
+:attr:`decnet.archetypes.Archetype.nmap_os` value (``"linux"`` or
+``"windows"``); unknown personas fall through to ``"linux"``.
+Operators can always override by passing an explicit
+``placement_path`` when creating a token.
+"""
+from __future__ import annotations
+
+DEFAULT_LINUX_USER = "admin"
+DEFAULT_WINDOWS_USER = "Administrator"
+
+# Canonical placements for the synthesizer-driven baseline tokens.
+# Operators can override per-token via the API, but these are the
+# defaults the deploy-time seed uses.
+_LINUX_DEFAULTS: dict[str, str] = {
+    "git_config": "/home/{user}/.git/config",
+    "env_file": "/home/{user}/.env",
+    "ssh_key": "/home/{user}/.ssh/id_rsa",
+    "aws_creds": "/home/{user}/.aws/credentials",
+    "honeydoc": "/home/{user}/Documents/quarterly_report.html",
+    "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
+    "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
+}
+
+_WINDOWS_DEFAULTS: dict[str, str] = {
+    "git_config": "/home/{user}/AppData/Local/Programs/Git/etc/gitconfig",
+    "env_file": "/home/{user}/Desktop/prod.env",
+    "ssh_key": "/home/{user}/.ssh/id_rsa",  # OpenSSH on Windows uses the same path
+    "aws_creds": "/home/{user}/.aws/credentials",
+    "honeydoc": "/home/{user}/Documents/quarterly_report.html",
+    "honeydoc_docx": "/home/{user}/Documents/quarterly_report.docx",
+    "honeydoc_pdf": "/home/{user}/Documents/quarterly_report.pdf",
+}
+
+
+def default_user(persona: str) -> str:
+    """Return the conventional unprivileged username for a persona."""
+    return DEFAULT_WINDOWS_USER if persona == "windows" else DEFAULT_LINUX_USER
+
+
+def default_path_for(generator: str, persona: str = "linux") -> str:
+    """Resolve the default placement path for a synthesized token.
+
+    Returns an absolute container path with ``{user}`` already
+    expanded.  Falls back to a sane Linux default for unknown
+    personas — better to plant *something* than fail the deploy hook.
+    """
+    table = _WINDOWS_DEFAULTS if persona == "windows" else _LINUX_DEFAULTS
+    template = table.get(generator)
+    if not template:
+        # Unknown generator — fall back to a generic /tmp drop so the
+        # planter still has somewhere to write.  The API rejects
+        # unknown generators upstream, so this branch is defensive.
+        return f"/tmp/{generator}.canary"  # nosec B108 — placement inside attacker-facing decoy container, not host /tmp
+    return template.format(user=default_user(persona))
+
+
+def normalize_placement(path: str) -> str:
+    """Validate and normalize an operator-supplied placement path.
+
+    Forbids relative paths, NUL bytes, and shell metacharacters that
+    ``docker exec sh -c`` can't safely round-trip.  Returns the
+    sanitised path unchanged when valid; raises :class:`ValueError`
+    otherwise so the API can return a 400 with a clear message.
+    """
+    if not path or not path.startswith("/"):
+        raise ValueError("placement_path must be absolute (start with '/')")
+    if "\x00" in path:
+        raise ValueError("placement_path may not contain NUL")
+    if "\n" in path or "\r" in path:
+        raise ValueError("placement_path may not contain newlines")
+    if "../" in path or path.endswith("/.."):
+        raise ValueError("placement_path may not contain '..' segments")
+    return path
--- a/decnet/canary/planter.py
+++ b/decnet/canary/planter.py
@@ -0,0 +1,301 @@
+"""Plant / revoke canary artifacts inside running decky containers.
+
+Single entry point per operation:
+
+* :func:`plant` writes a :class:`CanaryArtifact` into one decky's
+  filesystem via ``docker exec`` (mirroring the SSH driver's
+  ``_run_file`` pattern), backdates the mtime, sets the requested
+  mode, and publishes ``canary.{token_id}.placed`` on the bus.
+* :func:`revoke` unlinks the file (best-effort) and publishes
+  ``canary.{token_id}.revoked``.
+* :func:`seed_baseline` is the deploy-hook helper: synthesises the
+  configured baseline set for one decky, persists rows, plants each.
+  Failures are logged but do **not** abort the deploy (the deployer
+  hook calls this best-effort).
+
+We don't reuse :class:`SSHDriver` directly because the orchestrator
+driver is tied to its action types (``FileAction`` carries str
+content; canary content is bytes).  The planter takes the same
+shape but speaks bytes-via-base64 over the wire.
+"""
+from __future__ import annotations
+
+import asyncio
+import base64
+import os
+import shlex
+import time
+from secrets import token_urlsafe
+from typing import Any, Iterable, Optional
+
+from decnet.bus import topics
+from decnet.bus.base import BaseBus
+from decnet.bus.factory import get_bus
+from decnet.canary.base import CanaryArtifact, CanaryContext
+from decnet.canary.factory import get_generator
+from decnet.canary.paths import default_path_for
+from decnet.logging import get_logger
+from decnet.web.db.repository import BaseRepository
+
+log = get_logger("canary.planter")
+
+_DOCKER = "docker"
+_TIMEOUT = 8.0
+# Container suffix — matches the orchestrator SSH driver's convention
+# (``<decky_name>-ssh``).  Canary placement always happens through the
+# ssh container because every decky has one and it carries the most
+# realistic filesystem layout.
+_SSH_CONTAINER_SUFFIX = "-ssh"
+
+
+def _container_for(decky_name: str) -> str:
+    return f"{decky_name}{_SSH_CONTAINER_SUFFIX}"
+
+
+def _dirname(path: str) -> str:
+    idx = path.rfind("/")
+    if idx <= 0:
+        return "/"
+    return path[:idx]
+
+
+async def _run(
+    argv: list[str], *, stdin_bytes: Optional[bytes] = None,
+) -> tuple[int, str, str]:
+    try:
+        proc = await asyncio.create_subprocess_exec(
+            *argv,
+            stdin=asyncio.subprocess.PIPE if stdin_bytes is not None else None,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+    except FileNotFoundError as exc:
+        return 127, "", f"argv[0] not found: {exc}"
+    try:
+        stdout, stderr = await asyncio.wait_for(
+            proc.communicate(input=stdin_bytes), timeout=_TIMEOUT,
+        )
+    except asyncio.TimeoutError:
+        try:
+            proc.kill()
+        except ProcessLookupError:
+            pass
+        return 124, "", "timeout"
+    return (
+        proc.returncode if proc.returncode is not None else -1,
+        stdout.decode("utf-8", "replace"),
+        stderr.decode("utf-8", "replace"),
+    )
+
+
+def _build_plant_command(artifact: CanaryArtifact) -> tuple[str, bytes]:
+    """Compose the ``sh -c`` script + stdin payload for one artifact.
+
+    Binary safety: we base64-encode on the host and stream the result
+    over stdin to ``base64 -d`` inside the container, so the bytes
+    never touch the argv (kernel ARG_MAX would reject anything larger
+    than ~128KB-2MB depending on the host).  Both ``base64`` (coreutils)
+    and ``touch -d @<unix_ts>`` are present on every Linux base image
+    we ship, so there's no per-distro branching.
+    """
+    encoded = base64.b64encode(artifact.content)
+    mtime = int(time.time() + artifact.mtime_offset)
+    mode_str = oct(artifact.mode)[2:]
+    parts = [
+        f"mkdir -p {shlex.quote(_dirname(artifact.path))}",
+        f"base64 -d > {shlex.quote(artifact.path)}",
+        f"chmod {mode_str} {shlex.quote(artifact.path)}",
+        f"touch -d @{mtime} {shlex.quote(artifact.path)}",
+    ]
+    return " && ".join(parts), encoded
+
+
+async def _publish(
+    bus: Optional[BaseBus], topic: str, payload: dict[str, Any],
+) -> None:
+    """Best-effort publish — never raises.
+
+    When ``bus`` is None we resolve via :func:`get_bus`; either way
+    bus-side failures are logged and swallowed (delivery is at-most-once
+    by contract; the DB row is source of truth).
+    """
+    try:
+        owns_bus = bus is None
+        target = bus if bus is not None else get_bus()
+        if owns_bus:
+            await target.connect()
+        await target.publish(topic, payload)
+        if owns_bus:
+            await target.close()
+    except Exception as e:  # noqa: BLE001
+        log.warning("canary bus publish failed topic=%s err=%s", topic, e)
+
+
+async def plant(
+    decky_name: str,
+    artifact: CanaryArtifact,
+    *,
+    token_uuid: str,
+    repo: Optional[BaseRepository] = None,
+    publish: bool = True,
+    bus: Optional[BaseBus] = None,
+) -> tuple[bool, Optional[str]]:
+    """Write *artifact* into the decky's ssh container.
+
+    Returns ``(success, error_or_none)``.  When ``repo`` is provided
+    the token row's state is updated to ``planted`` / ``failed``
+    accordingly.  When ``publish`` is True a ``canary.<id>.placed``
+    event is published on the bus on success.
+
+    The function never raises on docker errors — callers (the API,
+    the deploy hook) treat the result as data.
+    """
+    if not artifact.path:
+        err = "planter requires a non-empty artifact.path"
+        log.warning("canary.plant skipped: %s decky=%s token=%s", err, decky_name, token_uuid)
+        if repo is not None:
+            await repo.update_canary_token_state(token_uuid, "failed", err)
+        return False, err
+
+    sh_cmd, stdin_payload = _build_plant_command(artifact)
+    # ``-i`` keeps stdin attached so base64 -d inside the container can
+    # consume the encoded payload streamed from the host.
+    argv = [_DOCKER, "exec", "-i", _container_for(decky_name), "sh", "-c", sh_cmd]
+    rc, _stdout, stderr = await _run(argv, stdin_bytes=stdin_payload)
+    success = rc == 0
+    error = None if success else (stderr.strip()[:256] or f"rc={rc}")
+
+    if repo is not None:
+        if success:
+            await repo.update_canary_token_state(token_uuid, "planted", None)
+        else:
+            await repo.update_canary_token_state(token_uuid, "failed", error)
+
+    if success and publish:
+        await _publish(bus, topics.canary(token_uuid, topics.CANARY_PLACED), {
+            "token_id": token_uuid,
+            "decky_name": decky_name,
+            "placement_path": artifact.path,
+            "instrumenter": artifact.instrumenter,
+            "generator": artifact.generator,
+        })
+
+    if not success:
+        log.warning(
+            "canary.plant failed decky=%s token=%s rc=%d stderr=%r",
+            decky_name, token_uuid, rc, stderr[:120],
+        )
+    return success, error
+
+
+async def revoke(
+    decky_name: str,
+    placement_path: str,
+    *,
+    token_uuid: str,
+    repo: Optional[BaseRepository] = None,
+    publish: bool = True,
+    bus: Optional[BaseBus] = None,
+) -> tuple[bool, Optional[str]]:
+    """Best-effort unlink + state transition + bus publish.
+
+    Returns ``(success, error_or_none)``.  ``success`` is True when
+    the file is gone after the call (whether we deleted it or it was
+    already missing); only docker / container-down errors return False.
+    """
+    sh_cmd = f"rm -f {shlex.quote(placement_path)}"
+    argv = [_DOCKER, "exec", _container_for(decky_name), "sh", "-c", sh_cmd]
+    rc, _stdout, stderr = await _run(argv)
+    success = rc == 0
+    error = None if success else (stderr.strip()[:256] or f"rc={rc}")
+
+    if repo is not None:
+        await repo.update_canary_token_state(token_uuid, "revoked", error if not success else None)
+
+    if publish:
+        await _publish(bus, topics.canary(token_uuid, topics.CANARY_REVOKED), {
+            "token_id": token_uuid,
+            "decky_name": decky_name,
+            "placement_path": placement_path,
+        })
+
+    return success, error
+
+
+def _baseline_set() -> Iterable[str]:
+    """Return the configured baseline generator names.
+
+    Honors ``DECNET_CANARY_BASELINE`` (comma-separated).  Default is
+    a sensible mix that exercises every callback-bearing generator
+    plus a passive aws_creds drop for realism.
+    """
+    raw = os.environ.get(
+        "DECNET_CANARY_BASELINE",
+        "git_config,env_file,honeydoc,aws_creds",
+    )
+    return [n.strip() for n in raw.split(",") if n.strip()]
+
+
+def _ctx_for(slug: str) -> CanaryContext:
+    """Build a :class:`CanaryContext` from the canary worker config."""
+    base = os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088")
+    zone = os.environ.get("DECNET_CANARY_DNS_ZONE", "")
+    return CanaryContext(callback_token=slug, http_base=base, dns_zone=zone)
+
+
+async def seed_baseline(
+    decky_name: str,
+    repo: BaseRepository,
+    *,
+    persona: str = "linux",
+    created_by: str = "system",
+    bus: Optional[BaseBus] = None,
+) -> list[dict[str, Any]]:
+    """Plant the configured baseline canary set on one decky.
+
+    Best-effort: any individual placement that fails is logged and
+    the row is left in ``state=failed``; the deployer hook treats the
+    return value as informational, not authoritative.
+
+    Returns the list of token rows created (whether their planting
+    ultimately succeeded or not), so the caller can surface them in
+    the deploy report.
+    """
+    out: list[dict[str, Any]] = []
+    for gen_name in _baseline_set():
+        try:
+            generator = get_generator(gen_name)
+        except ValueError:
+            log.warning("canary.seed_baseline: unknown generator %r — skipping", gen_name)
+            continue
+        slug = token_urlsafe(16)
+        ctx = _ctx_for(slug)
+        artifact = generator.generate(ctx)
+        artifact.path = default_path_for(gen_name, persona)
+        kind = "aws_passive" if gen_name == "aws_creds" else "http"
+        # Persist first so the planter has a row to update; that way a
+        # crash mid-plant leaves a recoverable failed-state row.
+        from uuid import uuid4
+        token_uuid = str(uuid4())
+        await repo.create_canary_token({
+            "uuid": token_uuid,
+            "kind": kind,
+            "decky_name": decky_name,
+            "blob_uuid": None,
+            "instrumenter": None,
+            "generator": gen_name,
+            "placement_path": artifact.path,
+            "callback_token": slug,
+            "secret_seed": slug,
+            "created_by": created_by,
+            "state": "planted",  # optimistic — plant() flips to failed on error
+        })
+        await plant(
+            decky_name, artifact,
+            token_uuid=token_uuid, repo=repo, publish=True, bus=bus,
+        )
+        out.append({
+            "token_uuid": token_uuid, "generator": gen_name, "kind": kind,
+            "callback_token": slug, "placement_path": artifact.path,
+        })
+    return out
--- a/decnet/canary/storage.py
+++ b/decnet/canary/storage.py
@@ -0,0 +1,89 @@
+"""Filesystem store for operator-uploaded canary blobs.
+
+Blobs live under ``/var/lib/decnet/canary/blobs/<sha256>`` (override
+via ``DECNET_CANARY_BLOB_DIR``) and are deduplicated by content hash.
+The DB table :class:`decnet.web.db.models.CanaryBlob` mirrors
+metadata; the bytes are read on demand at instrumentation time, so
+the API process never holds large operator uploads in memory longer
+than the request itself.
+
+Refcount-aware deletion is enforced at the DB layer (see
+:meth:`decnet.web.db.repository.BaseRepository.delete_canary_blob`);
+this module only provides write/read/unlink primitives keyed by
+sha256.
+"""
+from __future__ import annotations
+
+import hashlib
+import os
+from pathlib import Path
+from typing import Tuple
+
+
+def blob_dir() -> Path:
+    """Return the on-disk root for canary blobs.
+
+    Honors ``DECNET_CANARY_BLOB_DIR`` so tests can point at a tmp
+    path.  The directory is created lazily on first write.
+    """
+    raw = os.environ.get("DECNET_CANARY_BLOB_DIR", "/var/lib/decnet/canary/blobs")
+    return Path(raw)
+
+
+def _path_for(sha256: str) -> Path:
+    # Two-level fan-out (``ab/cd/abcd...``) keeps any one directory
+    # from accumulating thousands of entries on busy fleets.  Same
+    # shape as Git's loose-object store.
+    if len(sha256) < 4:
+        raise ValueError("sha256 must be at least 4 chars")
+    root = blob_dir()
+    return root / sha256[:2] / sha256[2:4] / sha256
+
+
+def write_blob(content: bytes) -> Tuple[str, Path, int]:
+    """Persist ``content`` under its sha256 path.
+
+    Idempotent: if the target file already exists with the same
+    bytes, no rewrite happens.  Returns ``(sha256, path,
+    size_bytes)``.
+    """
+    sha = hashlib.sha256(content).hexdigest()
+    target = _path_for(sha)
+    target.parent.mkdir(parents=True, exist_ok=True)
+    if not target.exists():
+        # Atomic-ish: write to a temp sibling and rename.  Avoids the
+        # half-written-file race a concurrent reader would otherwise
+        # see if we wrote in place.
+        tmp = target.with_suffix(target.suffix + ".part")
+        tmp.write_bytes(content)
+        os.replace(tmp, target)
+    return sha, target, len(content)
+
+
+def read_blob(sha256: str) -> bytes:
+    """Read the bytes for a stored blob.
+
+    Raises :class:`FileNotFoundError` when the on-disk row was unlinked
+    out of band (operator pruned ``/var/lib/decnet`` by hand) — the
+    caller (instrumenter dispatch) surfaces it as a 410-ish error so
+    the operator can re-upload.
+    """
+    return _path_for(sha256).read_bytes()
+
+
+def unlink_blob(sha256: str) -> bool:
+    """Delete the on-disk bytes for ``sha256``.
+
+    Returns True if a file was removed, False if it was already gone.
+    The DB row deletion happens in
+    :meth:`SQLModelRepository.delete_canary_blob`; this function is
+    a best-effort companion called *after* the DB delete commits so
+    a crash between them leaves a recoverable orphan, never a
+    dangling DB reference.
+    """
+    target = _path_for(sha256)
+    try:
+        target.unlink()
+    except FileNotFoundError:
+        return False
+    return True
--- a/decnet/canary/worker.py
+++ b/decnet/canary/worker.py
@@ -0,0 +1,254 @@
+"""``decnet canary`` worker — HTTP + DNS callback receivers.
+
+Two surfaces, one process:
+
+* **HTTP** — a tiny FastAPI app on its own port (default 8088).  The
+  only useful route is ``GET /c/{slug}`` which looks up the slug in
+  the canary token table, persists a :class:`CanaryTrigger` row,
+  publishes ``canary.<token_id>.triggered`` on the bus, and returns
+  a 1×1 transparent GIF (or 204 if the client's ``Accept`` doesn't
+  list any image type).
+* **DNS** — an authoritative UDP server (default 5353 if non-root,
+  53 if root) for ``*.<canary_zone>``.  Same lookup + persist +
+  publish flow, plus a sinkhole A record so the attacker's resolver
+  doesn't loop on NXDOMAIN.
+
+Both surfaces are **stealth** by policy
+(:mod:`feedback_stealth`): no DECNET strings in headers / banners /
+error pages.  The HTTP app strips the default ``Server: uvicorn``
+header in middleware; FastAPI's docs/openapi UI is disabled because
+discovering them would tip off the attacker that this is a honeypot.
+
+The worker is supervised by its own systemd unit
+(``decnet-canary.service``); like every other DECNET worker, it
+crashes loudly rather than masking failures.
+"""
+from __future__ import annotations
+
+import asyncio
+import os
+from datetime import datetime, timezone
+from typing import Optional
+
+from fastapi import FastAPI, Request, Response
+
+from decnet.bus import topics
+from decnet.bus.base import BaseBus
+from decnet.bus.factory import get_bus
+from decnet.canary.dns_server import CanaryDNSProtocol, DNSQuery
+from decnet.logging import get_logger
+from decnet.web.db.factory import get_repository
+from decnet.web.db.repository import BaseRepository
+
+log = get_logger("canary.worker")
+
+# 1×1 transparent GIF — public-domain canonical bytes.  Returning the
+# same image every time is fine: the body has no information the
+# attacker shouldn't see, and image clients cache it.
+_TRANSPARENT_GIF = bytes.fromhex(
+    "47494638396101000100800100000000ffffff21f90401000001002c00000000010001000002024401003b"
+)
+
+
+def _http_base() -> str:
+    return os.environ.get("DECNET_CANARY_HTTP_BASE", "http://localhost:8088").rstrip("/")
+
+
+def _dns_zone() -> str:
+    return os.environ.get("DECNET_CANARY_DNS_ZONE", "").strip(".").lower()
+
+
+def _http_port() -> int:
+    return int(os.environ.get("DECNET_CANARY_HTTP_PORT", "8088"))
+
+
+def _dns_port() -> int:
+    # Default 5353 (mDNS-ish, non-privileged) — operators pin :53 via
+    # NAT or a CAP_NET_BIND_SERVICE-enabled unit.
+    return int(os.environ.get("DECNET_CANARY_DNS_PORT", "5353"))
+
+
+def _dns_bind() -> str:
+    return os.environ.get("DECNET_CANARY_DNS_BIND", "0.0.0.0")  # nosec B104 — attacker-facing decoy listener, internet exposure is the design
+
+
+def _http_bind() -> str:
+    return os.environ.get("DECNET_CANARY_HTTP_BIND", "0.0.0.0")  # nosec B104 — same rationale
+
+
+# ---------------------------- HTTP surface --------------------------------
+
+
+def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
+    """Construct the FastAPI app.
+
+    Disables docs / openapi / redoc — operators query the canary
+    surface via the *main* DECNET API, never directly.  Anyone hitting
+    these paths is either misconfigured or scanning for a honeypot.
+    """
+    app = FastAPI(
+        title="",  # don't leak "DECNET" in OpenAPI
+        docs_url=None, redoc_url=None, openapi_url=None,
+    )
+
+    @app.middleware("http")
+    async def _stealth_headers(request: Request, call_next):
+        response: Response = await call_next(request)
+        # Strip the uvicorn / starlette banner; replace with a
+        # generic Server line that matches what most CDNs return.
+        response.headers["Server"] = "nginx"
+        # Don't leak request id / process id headers.
+        if "x-process-time" in response.headers:
+            del response.headers["x-process-time"]
+        return response
+
+    @app.get("/c/{slug}")
+    async def callback(slug: str, request: Request) -> Response:
+        await _record_hit(
+            repo, bus,
+            slug=slug,
+            src_ip=_client_ip(request),
+            user_agent=request.headers.get("user-agent"),
+            request_path=str(request.url.path),
+            dns_qname=None,
+            raw_headers=dict(request.headers),
+        )
+        # Always 200 with a tiny image so the attacker's client sees
+        # a "success" — same return regardless of whether the slug is
+        # known. Stealth: do NOT distinguish unknown vs known via
+        # status code or response body.
+        return Response(content=_TRANSPARENT_GIF, media_type="image/gif")
+
+    @app.get("/")
+    async def root() -> Response:
+        # Bare root returns a generic 404. The decoy posture: pretend
+        # to be an empty static-file host that just happens to resolve
+        # /c/<slug> when it matches.
+        return Response(status_code=404)
+
+    return app
+
+
+def _client_ip(request: Request) -> str:
+    # Honor X-Forwarded-For if the operator deployed behind a reverse
+    # proxy. Take the leftmost address in the chain; everything after
+    # is upstream-proxy noise.
+    fwd = request.headers.get("x-forwarded-for")
+    if fwd:
+        return fwd.split(",", 1)[0].strip()
+    if request.client:
+        return request.client.host
+    return "0.0.0.0"  # nosec B104 — sentinel for "unknown remote"
+
+
+# ---------------------------- shared persistence -------------------------
+
+
+async def _record_hit(
+    repo: BaseRepository,
+    bus: BaseBus,
+    *,
+    slug: str,
+    src_ip: str,
+    user_agent: Optional[str],
+    request_path: Optional[str],
+    dns_qname: Optional[str],
+    raw_headers: Optional[dict],
+) -> None:
+    """Resolve slug -> token, persist a trigger, publish on the bus.
+
+    Unknown slugs are silently swallowed: returning the same response
+    for known and unknown slugs is the stealth posture, and persisting
+    every random scan would clutter the DB.
+    """
+    token = await repo.get_canary_token_by_slug(slug)
+    if token is None:
+        return
+    trigger_id = await repo.record_canary_trigger({
+        "token_uuid": token["uuid"],
+        "occurred_at": datetime.now(timezone.utc),
+        "src_ip": src_ip,
+        "user_agent": user_agent,
+        "request_path": request_path,
+        "dns_qname": dns_qname,
+        "raw_headers": raw_headers or {},
+    })
+    try:
+        await bus.publish(
+            topics.canary(token["uuid"], topics.CANARY_TRIGGERED),
+            {
+                "token_id": token["uuid"],
+                "trigger_id": trigger_id,
+                "decky_name": token["decky_name"],
+                "src_ip": src_ip,
+                "user_agent": user_agent,
+                "request_path": request_path,
+                "dns_qname": dns_qname,
+            },
+        )
+    except Exception as e:  # noqa: BLE001 — best effort
+        log.warning("canary.triggered publish failed slug=%s err=%s", slug, e)
+
+
+# ---------------------------- DNS surface --------------------------------
+
+
+async def _start_dns_server(
+    repo: BaseRepository, bus: BaseBus, *, loop: asyncio.AbstractEventLoop,
+) -> Optional[asyncio.DatagramTransport]:
+    zone = _dns_zone()
+    if not zone:
+        log.info("canary.dns disabled (DECNET_CANARY_DNS_ZONE unset)")
+        return None
+
+    async def _hook(slug: str, query: DNSQuery, src_ip: str) -> None:
+        await _record_hit(
+            repo, bus,
+            slug=slug, src_ip=src_ip, user_agent=None,
+            request_path=None, dns_qname=query.qname,
+            raw_headers=None,
+        )
+
+    transport, _proto = await loop.create_datagram_endpoint(
+        lambda: CanaryDNSProtocol(zone, _hook),
+        local_addr=(_dns_bind(), _dns_port()),
+    )
+    log.info("canary.dns listening zone=%s port=%d", zone, _dns_port())
+    return transport  # type: ignore[return-value]
+
+
+# ---------------------------- entry point --------------------------------
+
+
+async def run() -> None:
+    """Worker entry point — kicked off by ``decnet canary``."""
+    import uvicorn
+
+    repo = get_repository()
+    await repo.initialize()
+    bus = get_bus()
+    await bus.connect()
+
+    app = _build_app(repo, bus)
+    config = uvicorn.Config(
+        app,
+        host=_http_bind(),
+        port=_http_port(),
+        log_level="warning",
+        access_log=False,  # stealth: no per-request lines
+        server_header=False,  # we set Server: nginx in middleware
+    )
+    server = uvicorn.Server(config)
+    loop = asyncio.get_running_loop()
+    dns_transport = await _start_dns_server(repo, bus, loop=loop)
+    try:
+        await server.serve()
+    finally:
+        if dns_transport is not None:
+            dns_transport.close()
+        await bus.close()
+
+
+def main() -> None:
+    """CLI entry point — synchronous wrapper for ``asyncio.run``."""
+    asyncio.run(run())
--- a/decnet/cli.py
+++ b/decnet/cli.py
@@ -1,461 +0,0 @@
-"""
-DECNET CLI — entry point for all commands.
-
-Usage:
-  decnet deploy --mode unihost --deckies 5 --randomize-services
-  decnet status
-  decnet teardown [--all | --id decky-01]
-  decnet services
-"""
-
-import random
-from typing import Optional
-
-import typer
-from rich.console import Console
-from rich.table import Table
-
-from decnet.archetypes import Archetype, all_archetypes, get_archetype
-from decnet.config import (
-    DeckyConfig,
-    DecnetConfig,
-    random_hostname,
-)
-from decnet.distros import all_distros, get_distro, random_distro
-from decnet.ini_loader import IniConfig, load_ini
-from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
-from decnet.services.registry import all_services
-
-app = typer.Typer(
-    name="decnet",
-    help="Deploy a deception network of honeypot deckies on your LAN.",
-    no_args_is_help=True,
-)
-console = Console()
-
-def _all_service_names() -> list[str]:
-    """Return all registered service names from the live plugin registry."""
-    return sorted(all_services().keys())
-
-
-def _resolve_distros(
-    distros_explicit: list[str] | None,
-    randomize_distros: bool,
-    n: int,
-    archetype: Archetype | None = None,
-) -> list[str]:
-    """Return a list of n distro slugs based on CLI flags or archetype preference."""
-    if distros_explicit:
-        return [distros_explicit[i % len(distros_explicit)] for i in range(n)]
-    if randomize_distros:
-        return [random_distro().slug for _ in range(n)]
-    if archetype:
-        pool = archetype.preferred_distros
-        return [pool[i % len(pool)] for i in range(n)]
-    # Default: cycle through all distros to maximize heterogeneity
-    slugs = list(all_distros().keys())
-    return [slugs[i % len(slugs)] for i in range(n)]
-
-
-def _build_deckies(
-    n: int,
-    ips: list[str],
-    services_explicit: list[str] | None,
-    randomize_services: bool,
-    distros_explicit: list[str] | None = None,
-    randomize_distros: bool = False,
-    archetype: Archetype | None = None,
-) -> list[DeckyConfig]:
-    deckies = []
-    used_combos: set[frozenset] = set()
-    distro_slugs = _resolve_distros(distros_explicit, randomize_distros, n, archetype)
-
-    for i, ip in enumerate(ips):
-        name = f"decky-{i + 1:02d}"
-        distro = get_distro(distro_slugs[i])
-        hostname = random_hostname(distro.slug)
-
-        if services_explicit:
-            svc_list = services_explicit
-        elif archetype:
-            svc_list = list(archetype.services)
-        elif randomize_services:
-            svc_pool = _all_service_names()
-            attempts = 0
-            while True:
-                count = random.randint(1, min(3, len(svc_pool)))
-                chosen = frozenset(random.sample(svc_pool, count))
-                attempts += 1
-                if chosen not in used_combos or attempts > 20:
-                    break
-            svc_list = list(chosen)
-            used_combos.add(chosen)
-        else:
-            typer.echo("Error: provide --services, --archetype, or --randomize-services.", err=True)
-            raise typer.Exit(1)
-
-        deckies.append(
-            DeckyConfig(
-                name=name,
-                ip=ip,
-                services=svc_list,
-                distro=distro.slug,
-                base_image=distro.image,
-                build_base=distro.build_base,
-                hostname=hostname,
-                archetype=archetype.slug if archetype else None,
-                nmap_os=archetype.nmap_os if archetype else "linux",
-            )
-        )
-    return deckies
-
-
-def _build_deckies_from_ini(
-    ini: IniConfig,
-    subnet_cidr: str,
-    gateway: str,
-    host_ip: str,
-    randomize: bool,
-) -> list[DeckyConfig]:
-    """Build DeckyConfig list from an IniConfig, auto-allocating missing IPs."""
-    from ipaddress import IPv4Address, IPv4Network
-
-    explicit_ips: set[IPv4Address] = {
-        IPv4Address(s.ip) for s in ini.deckies if s.ip
-    }
-
-    net = IPv4Network(subnet_cidr, strict=False)
-    reserved = {
-        net.network_address,
-        net.broadcast_address,
-        IPv4Address(gateway),
-        IPv4Address(host_ip),
-    } | explicit_ips
-
-    auto_pool = (str(addr) for addr in net.hosts() if addr not in reserved)
-
-    deckies: list[DeckyConfig] = []
-    for spec in ini.deckies:
-        # Resolve archetype (if any) — explicit services/distro override it
-        arch: Archetype | None = None
-        if spec.archetype:
-            try:
-                arch = get_archetype(spec.archetype)
-            except ValueError as e:
-                console.print(f"[red]{e}[/]")
-                raise typer.Exit(1)
-
-        # Distro: archetype preferred list → random → global cycle
-        distro_pool = arch.preferred_distros if arch else list(all_distros().keys())
-        distro = get_distro(distro_pool[len(deckies) % len(distro_pool)])
-        hostname = random_hostname(distro.slug)
-
-        ip = spec.ip or next(auto_pool, None)
-        if ip is None:
-            raise RuntimeError(
-                f"Not enough free IPs in {subnet_cidr} while assigning IP for '{spec.name}'."
-            )
-
-        if spec.services:
-            known = set(_all_service_names())
-            unknown = [s for s in spec.services if s not in known]
-            if unknown:
-                console.print(
-                    f"[red]Unknown service(s) in [{spec.name}]: {unknown}. "
-                    f"Available: {_all_service_names()}[/]"
-                )
-                raise typer.Exit(1)
-            svc_list = spec.services
-        elif arch:
-            svc_list = list(arch.services)
-        elif randomize:
-            svc_pool = _all_service_names()
-            count = random.randint(1, min(3, len(svc_pool)))
-            svc_list = random.sample(svc_pool, count)
-        else:
-            console.print(
-                f"[red]Decky '[{spec.name}]' has no services= in config. "
-                "Add services=, archetype=, or use --randomize-services.[/]"
-            )
-            raise typer.Exit(1)
-
-        # nmap_os priority: explicit INI key > archetype default > "linux"
-        resolved_nmap_os = spec.nmap_os or (arch.nmap_os if arch else "linux")
-        deckies.append(DeckyConfig(
-            name=spec.name,
-            ip=ip,
-            services=svc_list,
-            distro=distro.slug,
-            base_image=distro.image,
-            build_base=distro.build_base,
-            hostname=hostname,
-            archetype=arch.slug if arch else None,
-            service_config=spec.service_config,
-            nmap_os=resolved_nmap_os,
-        ))
-    return deckies
-
-
-@app.command()
-def deploy(
-    mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
-    deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
-    interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
-    subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
-    ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
-    services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
-    randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
-    distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
-    randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
-    log_target: Optional[str] = typer.Option(None, "--log-target", help="Forward logs to ip:port (e.g. 192.168.1.5:5140)"),
-    log_file: Optional[str] = typer.Option(None, "--log-file", help="Write RFC 5424 syslog to this path inside containers (e.g. /var/log/decnet/decnet.log)"),
-    archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
-    dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
-    no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
-    ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
-    config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
-) -> None:
-    """Deploy deckies to the LAN."""
-    if mode not in ("unihost", "swarm"):
-        console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
-        raise typer.Exit(1)
-
-    # ------------------------------------------------------------------ #
-    # Config-file path                                                     #
-    # ------------------------------------------------------------------ #
-    if config_file:
-        try:
-            ini = load_ini(config_file)
-        except FileNotFoundError as e:
-            console.print(f"[red]{e}[/]")
-            raise typer.Exit(1)
-
-        # CLI flags override INI values when explicitly provided
-        iface = interface or ini.interface or detect_interface()
-        subnet_cidr = subnet or ini.subnet
-        effective_gateway = ini.gateway
-        if subnet_cidr is None:
-            subnet_cidr, effective_gateway = detect_subnet(iface)
-        elif effective_gateway is None:
-            _, effective_gateway = detect_subnet(iface)
-
-        host_ip = get_host_ip(iface)
-        console.print(f"[dim]Config:[/] {config_file}  [dim]Interface:[/] {iface}  "
-                      f"[dim]Subnet:[/] {subnet_cidr}  [dim]Gateway:[/] {effective_gateway}  "
-                      f"[dim]Host IP:[/] {host_ip}")
-
-        # Register bring-your-own services from INI before validation
-        if ini.custom_services:
-            from decnet.custom_service import CustomService
-            from decnet.services.registry import register_custom_service
-            for cs in ini.custom_services:
-                register_custom_service(
-                    CustomService(
-                        name=cs.name,
-                        image=cs.image,
-                        exec_cmd=cs.exec_cmd,
-                        ports=cs.ports,
-                    )
-                )
-
-        effective_log_target = log_target or ini.log_target
-        effective_log_file = log_file
-        decky_configs = _build_deckies_from_ini(
-            ini, subnet_cidr, effective_gateway, host_ip, randomize_services
-        )
-    # ------------------------------------------------------------------ #
-    # Classic CLI path                                                     #
-    # ------------------------------------------------------------------ #
-    else:
-        if deckies is None:
-            console.print("[red]--deckies is required when --config is not used.[/]")
-            raise typer.Exit(1)
-
-        services_list = [s.strip() for s in services.split(",")] if services else None
-        if services_list:
-            known = set(_all_service_names())
-            unknown = [s for s in services_list if s not in known]
-            if unknown:
-                console.print(f"[red]Unknown service(s): {unknown}. Available: {_all_service_names()}[/]")
-                raise typer.Exit(1)
-
-        # Resolve archetype if provided
-        arch: Archetype | None = None
-        if archetype_name:
-            try:
-                arch = get_archetype(archetype_name)
-            except ValueError as e:
-                console.print(f"[red]{e}[/]")
-                raise typer.Exit(1)
-
-        if not services_list and not randomize_services and not arch:
-            console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
-            raise typer.Exit(1)
-
-        iface = interface or detect_interface()
-        if subnet is None:
-            subnet_cidr, effective_gateway = detect_subnet(iface)
-        else:
-            subnet_cidr = subnet
-            _, effective_gateway = detect_subnet(iface)
-
-        host_ip = get_host_ip(iface)
-        console.print(f"[dim]Interface:[/] {iface}  [dim]Subnet:[/] {subnet_cidr}  "
-                      f"[dim]Gateway:[/] {effective_gateway}  [dim]Host IP:[/] {host_ip}")
-
-        distros_list = [d.strip() for d in distro.split(",")] if distro else None
-        if distros_list:
-            try:
-                for slug in distros_list:
-                    get_distro(slug)
-            except ValueError as e:
-                console.print(f"[red]{e}[/]")
-                raise typer.Exit(1)
-
-        ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
-        decky_configs = _build_deckies(
-            deckies, ips, services_list, randomize_services,
-            distros_explicit=distros_list, randomize_distros=randomize_distros,
-            archetype=arch,
-        )
-        effective_log_target = log_target
-        effective_log_file = log_file
-
-    config = DecnetConfig(
-        mode=mode,
-        interface=iface,
-        subnet=subnet_cidr,
-        gateway=effective_gateway,
-        deckies=decky_configs,
-        log_target=effective_log_target,
-        log_file=effective_log_file,
-        ipvlan=ipvlan,
-    )
-
-    if effective_log_target and not dry_run:
-        from decnet.logging.forwarder import probe_log_target
-        if not probe_log_target(effective_log_target):
-            console.print(f"[yellow]Warning: log target {effective_log_target} is unreachable. "
-                          "Logs will be lost if it stays down.[/]")
-
-    from decnet.deployer import deploy as _deploy
-    _deploy(config, dry_run=dry_run, no_cache=no_cache)
-
-
-@app.command()
-def status() -> None:
-    """Show running deckies and their status."""
-    from decnet.deployer import status as _status
-    _status()
-
-
-@app.command()
-def teardown(
-    all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
-    id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
-) -> None:
-    """Stop and remove deckies."""
-    if not all_ and not id_:
-        console.print("[red]Specify --all or --id <name>.[/]")
-        raise typer.Exit(1)
-
-    from decnet.deployer import teardown as _teardown
-    _teardown(decky_id=id_)
-
-
-@app.command(name="services")
-def list_services() -> None:
-    """List all registered honeypot service plugins."""
-    svcs = all_services()
-    table = Table(title="Available Services", show_lines=True)
-    table.add_column("Name", style="bold cyan")
-    table.add_column("Ports")
-    table.add_column("Image")
-    for name, svc in sorted(svcs.items()):
-        table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
-    console.print(table)
-
-
-@app.command(name="distros")
-def list_distros() -> None:
-    """List all available OS distro profiles for deckies."""
-    table = Table(title="Available Distro Profiles", show_lines=True)
-    table.add_column("Slug", style="bold cyan")
-    table.add_column("Display Name")
-    table.add_column("Docker Image", style="dim")
-    for slug, profile in sorted(all_distros().items()):
-        table.add_row(slug, profile.display_name, profile.image)
-    console.print(table)
-
-
-@app.command(name="correlate")
-def correlate(
-    log_file: Optional[str] = typer.Option(None, "--log-file", "-f", help="Path to DECNET syslog file to analyse"),
-    min_deckies: int = typer.Option(2, "--min-deckies", "-m", help="Minimum number of distinct deckies an IP must touch to be reported"),
-    output: str = typer.Option("table", "--output", "-o", help="Output format: table | json | syslog"),
-    emit_syslog: bool = typer.Option(False, "--emit-syslog", help="Also print traversal events as RFC 5424 lines (for SIEM piping)"),
-) -> None:
-    """Analyse logs for cross-decky traversals and print the attacker movement graph."""
-    import sys
-    import json as _json
-    from pathlib import Path
-    from decnet.correlation.engine import CorrelationEngine
-
-    engine = CorrelationEngine()
-
-    if log_file:
-        path = Path(log_file)
-        if not path.exists():
-            console.print(f"[red]Log file not found: {log_file}[/]")
-            raise typer.Exit(1)
-        engine.ingest_file(path)
-    elif not sys.stdin.isatty():
-        for line in sys.stdin:
-            engine.ingest(line)
-    else:
-        console.print("[red]Provide --log-file or pipe log data via stdin.[/]")
-        raise typer.Exit(1)
-
-    traversals = engine.traversals(min_deckies)
-
-    if output == "json":
-        console.print_json(_json.dumps(engine.report_json(min_deckies), indent=2))
-    elif output == "syslog":
-        for line in engine.traversal_syslog_lines(min_deckies):
-            typer.echo(line)
-    else:
-        if not traversals:
-            console.print(
-                f"[yellow]No traversals detected "
-                f"(min_deckies={min_deckies}, events_indexed={engine.events_indexed}).[/]"
-            )
-        else:
-            console.print(engine.report_table(min_deckies))
-            console.print(
-                f"[dim]Parsed {engine.lines_parsed} lines · "
-                f"indexed {engine.events_indexed} events · "
-                f"{len(engine.all_attackers())} unique IPs · "
-                f"[bold]{len(traversals)}[/] traversal(s)[/]"
-            )
-
-    if emit_syslog:
-        for line in engine.traversal_syslog_lines(min_deckies):
-            typer.echo(line)
-
-
-@app.command(name="archetypes")
-def list_archetypes() -> None:
-    """List all machine archetype profiles."""
-    table = Table(title="Machine Archetypes", show_lines=True)
-    table.add_column("Slug", style="bold cyan")
-    table.add_column("Display Name")
-    table.add_column("Default Services", style="green")
-    table.add_column("Description", style="dim")
-    for slug, arch in sorted(all_archetypes().items()):
-        table.add_row(
-            slug,
-            arch.display_name,
-            ", ".join(arch.services),
-            arch.description,
-        )
-    console.print(table)
--- a/decnet/cli/init.py
+++ b/decnet/cli/init.py
@@ -0,0 +1,90 @@
+"""
+DECNET CLI — entry point for all commands.
+
+Usage:
+  decnet deploy --mode unihost --deckies 5 --randomize-services
+  decnet status
+  decnet teardown [--all | --id decky-01]
+  decnet services
+
+Layout: each command module exports ``register(app)`` which attaches its
+commands to the passed Typer app. ``__init__.py`` builds the root app,
+calls every module's ``register`` in order, then runs the master-only
+gate. The gate must fire LAST so it sees the fully-populated dispatch
+table before filtering.
+"""
+
+from __future__ import annotations
+
+import typer
+
+from . import (
+    agent,
+    api,
+    bus,
+    canary,
+    db,
+    deploy,
+    forwarder,
+    geoip,
+    init,
+    inventory,
+    lifecycle,
+    listener,
+    orchestrator,
+    profiler,
+    realism,
+    reconciler,
+    sniffer,
+    swarm,
+    swarmctl,
+    topology,
+    updater,
+    web,
+    webhook,
+    workers,
+)
+from .gating import _gate_commands_by_mode
+from .utils import console as console, log as log
+
+app = typer.Typer(
+    name="decnet",
+    help="Deploy a deception network of honeypot deckies on your LAN.",
+    no_args_is_help=True,
+)
+
+# Order matches the old flat layout so `decnet --help` reads the same.
+for _mod in (
+    api, swarmctl, agent, updater, listener, forwarder,
+    swarm,
+    deploy, lifecycle, workers, inventory,
+    web, profiler, orchestrator, realism, reconciler, sniffer, db,
+    topology, bus, geoip, init, webhook, canary,
+):
+    _mod.register(app)
+
+_gate_commands_by_mode(app)
+
+# Backwards-compat re-exports. Tests and third-party tooling import these
+# directly from ``decnet.cli``; the refactor must keep them resolvable.
+from .db import _db_reset_mysql_async  # noqa: E402,F401
+from .gating import (  # noqa: E402,F401
+    MASTER_ONLY_COMMANDS,
+    MASTER_ONLY_GROUPS,
+    _agent_mode_active,
+    _require_master_mode,
+)
+from .utils import (  # noqa: E402,F401
+    _daemonize,
+    _http_request,
+    _is_running,
+    _kill_all_services,
+    _pid_dir,
+    _service_registry,
+    _spawn_detached,
+    _swarmctl_base_url,
+)
+
+
+if __name__ == "__main__":  # pragma: no cover
+    app()
--- a/decnet/cli/agent.py
+++ b/decnet/cli/agent.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+import os
+import pathlib as _pathlib
+import sys as _sys
+from typing import Optional
+
+import typer
+
+from . import utils as _utils
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command()
+    def agent(
+        port: int = typer.Option(8765, "--port", help="Port for the worker agent"),
+        host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the worker agent"),  # nosec B104
+        agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent, expanded under the running user's HOME — set this when running as sudo/root)"),
+        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
+        no_forwarder: bool = typer.Option(False, "--no-forwarder", help="Do not auto-spawn the log forwarder alongside the agent"),
+    ) -> None:
+        """Run the DECNET SWARM worker agent (requires a cert bundle in ~/.decnet/agent/).
+
+        By default, `decnet agent` auto-spawns `decnet forwarder` as a fully-
+        detached sibling process so worker logs start flowing to the master
+        without a second manual invocation. The forwarder survives agent
+        restarts and crashes — if it dies on its own, restart it manually
+        with `decnet forwarder --daemon …`. Pass --no-forwarder to skip.
+        """
+        from decnet.agent import server as _agent_server
+        from decnet.env import DECNET_SWARM_MASTER_HOST, DECNET_AGENT_LOG_FILE
+        from decnet.swarm import pki as _pki
+
+        resolved_dir = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
+
+        if daemon:
+            log.info("agent daemonizing host=%s port=%d", host, port)
+            _utils._daemonize()
+
+        if not no_forwarder and DECNET_SWARM_MASTER_HOST:
+            fw_argv = [
+                _sys.executable, "-m", "decnet", "forwarder",
+                "--master-host", DECNET_SWARM_MASTER_HOST,
+                "--master-port", str(int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))),
+                "--agent-dir", str(resolved_dir),
+                "--log-file", str(DECNET_AGENT_LOG_FILE),
+                "--daemon",
+            ]
+            try:
+                pid = _utils._spawn_detached(fw_argv, _utils._pid_dir() / "forwarder.pid")
+                log.info("agent auto-spawned forwarder pid=%d master=%s", pid, DECNET_SWARM_MASTER_HOST)
+                console.print(f"[dim]Auto-spawned forwarder (pid {pid}) → {DECNET_SWARM_MASTER_HOST}.[/]")
+            except Exception as e:  # noqa: BLE001
+                log.warning("agent could not auto-spawn forwarder: %s", e)
+                console.print(f"[yellow]forwarder auto-spawn skipped: {e}[/]")
+        elif not no_forwarder:
+            log.info("agent skipping forwarder auto-spawn (DECNET_SWARM_MASTER_HOST unset)")
+
+        log.info("agent command invoked host=%s port=%d dir=%s", host, port, resolved_dir)
+        console.print(f"[green]Starting DECNET worker agent on {host}:{port} (mTLS)...[/]")
+        rc = _agent_server.run(host, port, agent_dir=resolved_dir)
+        if rc != 0:
+            raise typer.Exit(rc)
--- a/decnet/cli/api.py
+++ b/decnet/cli/api.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+import os
+import signal
+import subprocess  # nosec B404
+import sys
+
+import typer
+
+from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
+
+from . import utils as _utils
+from .gating import _require_master_mode
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command()
+    def api(
+        port: int = typer.Option(DECNET_API_PORT, "--port", help="Port for the backend API"),
+        host: str = typer.Option(DECNET_API_HOST, "--host", help="Host IP for the backend API"),
+        log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Path to the DECNET log file to monitor"),
+        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
+        workers: int = typer.Option(1, "--workers", "-w", min=1, help="Number of uvicorn worker processes"),
+    ) -> None:
+        """Run the DECNET API and Web Dashboard in standalone mode."""
+        _require_master_mode("api")
+        if daemon:
+            log.info("API daemonizing host=%s port=%d workers=%d", host, port, workers)
+            _utils._daemonize()
+
+        log.info("API command invoked host=%s port=%d workers=%d", host, port, workers)
+        console.print(f"[green]Starting DECNET API on {host}:{port} (workers={workers})...[/]")
+        _env: dict[str, str] = os.environ.copy()
+        _env["DECNET_INGEST_LOG_FILE"] = str(log_file)
+        _cmd = [sys.executable, "-m", "uvicorn", "decnet.web.api:app",
+                "--host", host, "--port", str(port), "--workers", str(workers)]
+        try:
+            proc = subprocess.Popen(_cmd, env=_env, start_new_session=True)  # nosec B603 B404
+            try:
+                proc.wait()
+            except KeyboardInterrupt:
+                try:
+                    os.killpg(proc.pid, signal.SIGTERM)
+                    try:
+                        proc.wait(timeout=10)
+                    except subprocess.TimeoutExpired:
+                        os.killpg(proc.pid, signal.SIGKILL)
+                        proc.wait()
+                except ProcessLookupError:
+                    pass
+        except (FileNotFoundError, subprocess.SubprocessError):
+            console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
--- a/decnet/cli/bus.py
+++ b/decnet/cli/bus.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+import typer
+
+from . import utils as _utils
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command(name="bus")
+    def bus_cmd(
+        socket_path: str = typer.Option(
+            None, "--socket", "-s",
+            help="UNIX socket path (defaults to DECNET_BUS_SOCKET env var, "
+                 "then /run/decnet/bus.sock, then ~/.decnet/bus.sock).",
+        ),
+        group: str = typer.Option(
+            "decnet", "--group", "-g",
+            help="POSIX group to chown the socket to (falls back to process "
+                 "group if the named group does not exist).",
+        ),
+        heartbeat: int = typer.Option(
+            10, "--heartbeat", "-H",
+            help="Seconds between system.bus.health heartbeat events.",
+        ),
+        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process."),
+    ) -> None:
+        """Run the DECNET ServiceBus worker (host-local UNIX-socket pub/sub)."""
+        import asyncio
+        from decnet.bus.factory import _default_socket_path
+        from decnet.bus.worker import bus_worker
+
+        resolved = socket_path or _default_socket_path()
+
+        if daemon:
+            log.info("bus daemonizing socket=%s", resolved)
+            _utils._daemonize()
+
+        log.info("bus starting socket=%s group=%s heartbeat=%ds", resolved, group, heartbeat)
+        console.print(f"[bold cyan]Bus starting[/] (socket: {resolved}, heartbeat: {heartbeat}s)")
+
+        try:
+            asyncio.run(bus_worker(resolved, group=group, heartbeat_interval=heartbeat))
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Bus stopped.[/]")
--- a/decnet/cli/canary.py
+++ b/decnet/cli/canary.py
@@ -0,0 +1,42 @@
+"""``decnet canary`` — HTTP + DNS callback receiver for canary tokens.
+
+Worker process. Mirrors the shape of :mod:`decnet.cli.webhook`: a
+``@app.command(name="canary")`` Typer entry point that delegates to
+:func:`decnet.canary.worker.run`.
+
+Not master-only — any host that hosts deckies can run its own
+canary worker (the bus events stay local; the webhook worker on
+each host fans them out to SIEMs independently per the design
+in ``development/let-s-move-to-the-enumerated-pike.md``).
+"""
+from __future__ import annotations
+
+import typer
+
+from . import utils as _utils
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command(name="canary")
+    def canary_cmd(
+        daemon: bool = typer.Option(
+            False, "--daemon", "-d", help="Detach to background as a daemon process",
+        ),
+    ) -> None:
+        """Run the canary HTTP + DNS callback receiver."""
+        import asyncio
+
+        from decnet.canary.worker import run
+
+        if daemon:
+            log.info("canary daemonizing")
+            _utils._daemonize()
+
+        log.info("canary starting")
+        console.print("[bold cyan]Canary callback receiver starting[/]")
+
+        try:
+            asyncio.run(run())
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Canary worker stopped.[/]")
--- a/decnet/cli/db.py
+++ b/decnet/cli/db.py
@@ -0,0 +1,141 @@
+from __future__ import annotations
+
+from typing import Optional
+
+import typer
+from rich.table import Table
+
+from .utils import console, log
+
+
+def _decnet_tables() -> tuple[str, ...]:
+    """Every DECNET-managed table, ordered child-first for DROP safety.
+
+    Source is ``SQLModel.metadata.sorted_tables`` — the same registry that
+    drives ``create_all`` — so adding a new model automatically enrolls
+    its table in ``db-reset`` with no manual step. (Previous hardcoded
+    list drifted multiple times; ``webhook_subscriptions`` /
+    ``session_profile`` / ``smtp_targets`` all got missed.)
+
+    ``sorted_tables`` returns parent-first (topological order that makes
+    ``CREATE`` safe). For ``DROP`` we need the reverse: children first,
+    so FK constraints drop before their parents. ``SET FOREIGN_KEY_CHECKS
+    = 0`` below makes this order-insensitive for MySQL, but the reverse
+    order keeps the code honest for any backend that doesn't support
+    disabling the FK check.
+    """
+    from sqlmodel import SQLModel
+    # Importing the models package registers every table on SQLModel.metadata.
+    import decnet.web.db.models  # noqa: F401
+
+    return tuple(
+        t.name for t in reversed(SQLModel.metadata.sorted_tables)
+    )
+
+
+async def _db_reset_mysql_async(dsn: str, mode: str, confirm: bool) -> None:
+    """Inspect + (optionally) wipe a MySQL database.  Pulled out of the CLI
+    wrapper so tests can drive it without spawning a Typer runner."""
+    from urllib.parse import urlparse
+    from sqlalchemy import text
+    from sqlalchemy.ext.asyncio import create_async_engine
+
+    db_name = urlparse(dsn).path.lstrip("/") or "(default)"
+    engine = create_async_engine(dsn)
+    tables = _decnet_tables()
+    try:
+        rows: dict[str, int] = {}
+        async with engine.connect() as conn:
+            for tbl in tables:
+                try:
+                    result = await conn.execute(text(f"SELECT COUNT(*) FROM `{tbl}`"))  # nosec B608
+                    rows[tbl] = result.scalar() or 0
+                except Exception:  # noqa: BLE001 — ProgrammingError for missing table varies by driver
+                    rows[tbl] = -1
+
+        summary = Table(title=f"DECNET MySQL reset — database `{db_name}` (mode={mode})")
+        summary.add_column("Table", style="cyan")
+        summary.add_column("Rows", justify="right")
+        for tbl, count in rows.items():
+            summary.add_row(tbl, "[dim]missing[/]" if count < 0 else f"{count:,}")
+        console.print(summary)
+
+        if not confirm:
+            console.print(
+                "[yellow]Dry-run only.  Re-run with [bold]--i-know-what-im-doing[/] "
+                "to actually execute.[/]"
+            )
+            return
+
+        async with engine.begin() as conn:
+            await conn.execute(text("SET FOREIGN_KEY_CHECKS = 0"))
+            for tbl in tables:
+                if rows.get(tbl, -1) < 0:
+                    continue
+                if mode == "truncate":
+                    await conn.execute(text(f"TRUNCATE TABLE `{tbl}`"))
+                    console.print(f"[green]✓ TRUNCATE {tbl}[/]")
+                else:
+                    await conn.execute(text(f"DROP TABLE `{tbl}`"))
+                    console.print(f"[green]✓ DROP TABLE {tbl}[/]")
+            await conn.execute(text("SET FOREIGN_KEY_CHECKS = 1"))
+
+        console.print(f"[bold green]Done. Database `{db_name}` reset ({mode}).[/]")
+    finally:
+        await engine.dispose()
+
+
+def register(app: typer.Typer) -> None:
+    @app.command(name="db-reset")
+    def db_reset(
+        i_know: bool = typer.Option(
+            False,
+            "--i-know-what-im-doing",
+            help="Required to actually execute. Without it, the command runs in dry-run mode.",
+        ),
+        mode: str = typer.Option(
+            "truncate",
+            "--mode",
+            help="truncate (wipe rows, keep schema) | drop-tables (DROP TABLE for each DECNET table)",
+        ),
+        url: Optional[str] = typer.Option(
+            None,
+            "--url",
+            help="Override DECNET_DB_URL for this invocation (e.g. when cleanup needs admin creds).",
+        ),
+    ) -> None:
+        """Wipe the MySQL database used by the DECNET dashboard.
+
+        Destructive. Runs dry by default — pass --i-know-what-im-doing to commit.
+        Only supported against MySQL; refuses to operate on SQLite.
+        """
+        import asyncio
+        import os
+
+        if mode not in ("truncate", "drop-tables"):
+            console.print(f"[red]Invalid --mode '{mode}'. Expected: truncate | drop-tables.[/]")
+            raise typer.Exit(2)
+
+        db_type = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
+        if db_type != "mysql":
+            console.print(
+                f"[red]db-reset is MySQL-only (DECNET_DB_TYPE='{db_type}'). "
+                f"For SQLite, just delete the decnet.db file.[/]"
+            )
+            raise typer.Exit(2)
+
+        dsn = url or os.environ.get("DECNET_DB_URL")
+        if not dsn:
+            from decnet.web.db.mysql.database import build_mysql_url
+            try:
+                dsn = build_mysql_url()
+            except ValueError as e:
+                console.print(f"[red]{e}[/]")
+                raise typer.Exit(2) from e
+
+        log.info("db-reset invoked mode=%s confirm=%s", mode, i_know)
+        try:
+            asyncio.run(_db_reset_mysql_async(dsn, mode=mode, confirm=i_know))
+        except Exception as e:  # noqa: BLE001
+            console.print(f"[red]db-reset failed: {e}[/]")
+            raise typer.Exit(1) from e
--- a/decnet/cli/deploy.py
+++ b/decnet/cli/deploy.py
@@ -0,0 +1,307 @@
+from __future__ import annotations
+
+from typing import Optional
+
+import typer
+from rich.table import Table
+
+from decnet.archetypes import Archetype, get_archetype
+from decnet.config import DecnetConfig
+from decnet.distros import get_distro
+from decnet.env import DECNET_API_HOST, DECNET_INGEST_LOG_FILE
+from decnet.fleet import all_service_names, build_deckies, build_deckies_from_ini
+from decnet.ini_loader import load_ini
+from decnet.network import detect_interface, detect_subnet, allocate_ips, get_host_ip
+
+from . import utils as _utils
+from .gating import _require_master_mode
+from .utils import console, log
+
+
+def _deploy_swarm(config: "DecnetConfig", *, dry_run: bool, no_cache: bool) -> None:
+    """Shard deckies round-robin across enrolled workers and POST to swarmctl."""
+    base = _utils._swarmctl_base_url(None)
+    resp = _utils._http_request("GET", base + "/swarm/hosts?host_status=enrolled")
+    enrolled = resp.json()
+    resp2 = _utils._http_request("GET", base + "/swarm/hosts?host_status=active")
+    active = resp2.json()
+    workers = [*enrolled, *active]
+    if not workers:
+        console.print("[red]No enrolled workers — run `decnet swarm enroll ...` first.[/]")
+        raise typer.Exit(1)
+
+    assigned: list = []
+    for idx, d in enumerate(config.deckies):
+        target = workers[idx % len(workers)]
+        assigned.append(d.model_copy(update={"host_uuid": target["uuid"]}))
+    config = config.model_copy(update={"deckies": assigned})
+
+    body = {"config": config.model_dump(mode="json"), "dry_run": dry_run, "no_cache": no_cache}
+    console.print(f"[cyan]Dispatching {len(config.deckies)} deckies across {len(workers)} worker(s)...[/]")
+    resp3 = _utils._http_request("POST", base + "/swarm/deploy", json_body=body, timeout=900.0)
+    results = resp3.json().get("results", [])
+
+    table = Table(title="SWARM deploy results")
+    for col in ("worker", "host_uuid", "ok", "detail"):
+        table.add_column(col)
+    any_failed = False
+    for r in results:
+        ok = bool(r.get("ok"))
+        if not ok:
+            any_failed = True
+        detail = r.get("detail")
+        if isinstance(detail, dict):
+            detail = detail.get("status") or "ok"
+        table.add_row(
+            str(r.get("host_name") or ""),
+            str(r.get("host_uuid") or ""),
+            "[green]yes[/]" if ok else "[red]no[/]",
+            str(detail)[:80],
+        )
+    console.print(table)
+    if any_failed:
+        raise typer.Exit(1)
+
+
+def register(app: typer.Typer) -> None:
+    @app.command()
+    def deploy(
+        mode: str = typer.Option("unihost", "--mode", "-m", help="Deployment mode: unihost | swarm"),
+        deckies: Optional[int] = typer.Option(None, "--deckies", "-n", help="Number of deckies to deploy (required without --config)", min=1),
+        interface: Optional[str] = typer.Option(None, "--interface", "-i", help="Host NIC (auto-detected if omitted)"),
+        subnet: Optional[str] = typer.Option(None, "--subnet", help="LAN subnet CIDR (auto-detected if omitted)"),
+        ip_start: Optional[str] = typer.Option(None, "--ip-start", help="First decky IP (auto if omitted)"),
+        services: Optional[str] = typer.Option(None, "--services", help="Comma-separated services, e.g. ssh,smb,rdp"),
+        randomize_services: bool = typer.Option(False, "--randomize-services", help="Assign random services to each decky"),
+        distro: Optional[str] = typer.Option(None, "--distro", help="Comma-separated distro slugs, e.g. debian,ubuntu22,rocky9"),
+        randomize_distros: bool = typer.Option(False, "--randomize-distros", help="Assign a random distro to each decky"),
+        log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Host path for the collector to write RFC 5424 logs (e.g. /var/log/decnet/decnet.log)"),
+        archetype_name: Optional[str] = typer.Option(None, "--archetype", "-a", help="Machine archetype slug (e.g. linux-server, windows-workstation)"),
+        mutate_interval: Optional[int] = typer.Option(30, "--mutate-interval", help="Automatically rotate services every N minutes"),
+        dry_run: bool = typer.Option(False, "--dry-run", help="Generate compose file without starting containers"),
+        no_cache: bool = typer.Option(False, "--no-cache", help="Force rebuild all images, ignoring Docker layer cache"),
+        parallel: bool = typer.Option(False, "--parallel", help="Build all images concurrently (enables BuildKit, separates build from up)"),
+        ipvlan: bool = typer.Option(False, "--ipvlan", help="Use IPvlan L2 instead of MACVLAN (required on WiFi interfaces)"),
+        config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to INI config file"),
+        api: bool = typer.Option(False, "--api", help="Start the FastAPI backend to ingest and serve logs"),
+        api_port: int = typer.Option(8000, "--api-port", help="Port for the backend API"),
+        daemon: bool = typer.Option(False, "--daemon", help="Detach to background as a daemon process"),
+    ) -> None:
+        """Deploy deckies to the LAN."""
+        import os
+        import subprocess  # nosec B404
+        import sys
+        from pathlib import Path as _Path
+
+        _require_master_mode("deploy")
+        if daemon:
+            log.info("deploy daemonizing mode=%s deckies=%s", mode, deckies)
+            _utils._daemonize()
+
+        log.info("deploy command invoked mode=%s deckies=%s dry_run=%s", mode, deckies, dry_run)
+        if mode not in ("unihost", "swarm"):
+            console.print("[red]--mode must be 'unihost' or 'swarm'[/]")
+            raise typer.Exit(1)
+
+        if config_file:
+            try:
+                ini = load_ini(config_file)
+            except FileNotFoundError as e:
+                console.print(f"[red]{e}[/]")
+                raise typer.Exit(1)
+
+            iface = interface or ini.interface or detect_interface()
+            subnet_cidr = subnet or ini.subnet
+            effective_gateway = ini.gateway
+            if subnet_cidr is None:
+                subnet_cidr, effective_gateway = detect_subnet(iface)
+            elif effective_gateway is None:
+                _, effective_gateway = detect_subnet(iface)
+
+            host_ip = get_host_ip(iface)
+            console.print(f"[dim]Config:[/] {config_file}  [dim]Interface:[/] {iface}  "
+                          f"[dim]Subnet:[/] {subnet_cidr}  [dim]Gateway:[/] {effective_gateway}  "
+                          f"[dim]Host IP:[/] {host_ip}")
+
+            if ini.custom_services:
+                from decnet.custom_service import CustomService
+                from decnet.services.registry import register_custom_service
+                for cs in ini.custom_services:
+                    register_custom_service(
+                        CustomService(
+                            name=cs.name,
+                            image=cs.image,
+                            exec_cmd=cs.exec_cmd,
+                            ports=cs.ports,
+                        )
+                    )
+
+            effective_log_file = log_file
+            try:
+                decky_configs = build_deckies_from_ini(
+                    ini, subnet_cidr, effective_gateway, host_ip, randomize_services, cli_mutate_interval=mutate_interval
+                )
+            except ValueError as e:
+                console.print(f"[red]{e}[/]")
+                raise typer.Exit(1)
+        else:
+            if deckies is None:
+                console.print("[red]--deckies is required when --config is not used.[/]")
+                raise typer.Exit(1)
+
+            services_list = [s.strip() for s in services.split(",")] if services else None
+            if services_list:
+                known = set(all_service_names())
+                unknown = [s for s in services_list if s not in known]
+                if unknown:
+                    console.print(f"[red]Unknown service(s): {unknown}. Available: {all_service_names()}[/]")
+                    raise typer.Exit(1)
+
+            arch: Archetype | None = None
+            if archetype_name:
+                try:
+                    arch = get_archetype(archetype_name)
+                except ValueError as e:
+                    console.print(f"[red]{e}[/]")
+                    raise typer.Exit(1)
+
+            if not services_list and not randomize_services and not arch:
+                console.print("[red]Specify --services, --archetype, or --randomize-services.[/]")
+                raise typer.Exit(1)
+
+            iface = interface or detect_interface()
+            if subnet is None:
+                subnet_cidr, effective_gateway = detect_subnet(iface)
+            else:
+                subnet_cidr = subnet
+                _, effective_gateway = detect_subnet(iface)
+
+            host_ip = get_host_ip(iface)
+            console.print(f"[dim]Interface:[/] {iface}  [dim]Subnet:[/] {subnet_cidr}  "
+                          f"[dim]Gateway:[/] {effective_gateway}  [dim]Host IP:[/] {host_ip}")
+
+            distros_list = [d.strip() for d in distro.split(",")] if distro else None
+            if distros_list:
+                try:
+                    for slug in distros_list:
+                        get_distro(slug)
+                except ValueError as e:
+                    console.print(f"[red]{e}[/]")
+                    raise typer.Exit(1)
+
+            ips = allocate_ips(subnet_cidr, effective_gateway, host_ip, deckies, ip_start)
+            decky_configs = build_deckies(
+                deckies, ips, services_list, randomize_services,
+                distros_explicit=distros_list, randomize_distros=randomize_distros,
+                archetype=arch, mutate_interval=mutate_interval,
+            )
+            effective_log_file = log_file
+
+        if api and not effective_log_file:
+            effective_log_file = os.path.join(os.getcwd(), "decnet.log")
+            console.print(f"[cyan]API mode enabled: defaulting log-file to {effective_log_file}[/]")
+
+        config = DecnetConfig(
+            mode=mode,
+            interface=iface,
+            subnet=subnet_cidr,
+            gateway=effective_gateway,
+            deckies=decky_configs,
+            log_file=effective_log_file,
+            ipvlan=ipvlan,
+            mutate_interval=mutate_interval,
+        )
+
+        log.debug("deploy: config built deckies=%d interface=%s subnet=%s", len(config.deckies), config.interface, config.subnet)
+
+        if mode == "swarm":
+            _deploy_swarm(config, dry_run=dry_run, no_cache=no_cache)
+            if dry_run:
+                log.info("deploy: swarm dry-run complete, no workers dispatched")
+            else:
+                log.info("deploy: swarm deployment complete deckies=%d", len(config.deckies))
+            return
+
+        from decnet.engine import deploy as _deploy
+        _deploy(config, dry_run=dry_run, no_cache=no_cache, parallel=parallel)
+        if dry_run:
+            log.info("deploy: dry-run complete, no containers started")
+        else:
+            log.info("deploy: deployment complete deckies=%d", len(config.deckies))
+
+        if mutate_interval is not None and not dry_run:
+            console.print(f"[green]Starting DECNET Mutator watcher in the background (interval: {mutate_interval}m)...[/]")
+            try:
+                subprocess.Popen(  # nosec B603
+                    [sys.executable, "-m", "decnet.cli", "mutate", "--watch"],
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.STDOUT,
+                    start_new_session=True,
+                )
+            except (FileNotFoundError, subprocess.SubprocessError):
+                console.print("[red]Failed to start mutator watcher.[/]")
+
+        if effective_log_file and not dry_run and not api:
+            _collector_err = _Path(effective_log_file).with_suffix(".collector.log")
+            console.print(f"[bold cyan]Starting log collector[/] → {effective_log_file}")
+            subprocess.Popen(  # nosec B603
+                [sys.executable, "-m", "decnet.cli", "collect", "--log-file", str(effective_log_file)],
+                stdin=subprocess.DEVNULL,
+                stdout=open(_collector_err, "a"),
+                stderr=subprocess.STDOUT,
+                start_new_session=True,
+            )
+
+        if api and not dry_run:
+            console.print(f"[green]Starting DECNET API on port {api_port}...[/]")
+            _env: dict[str, str] = os.environ.copy()
+            _env["DECNET_INGEST_LOG_FILE"] = str(effective_log_file or "")
+            try:
+                subprocess.Popen(  # nosec B603
+                    [sys.executable, "-m", "uvicorn", "decnet.web.api:app", "--host", DECNET_API_HOST, "--port", str(api_port)],
+                    env=_env,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.STDOUT
+                )
+                console.print(f"[dim]API running at http://{DECNET_API_HOST}:{api_port}[/]")
+            except (FileNotFoundError, subprocess.SubprocessError):
+                console.print("[red]Failed to start API. Ensure 'uvicorn' is installed in the current environment.[/]")
+
+        if effective_log_file and not dry_run:
+            console.print("[bold cyan]Starting DECNET-PROBER[/] (auto-discovers attackers from log stream)")
+            try:
+                subprocess.Popen(  # nosec B603
+                    [sys.executable, "-m", "decnet.cli", "probe", "--daemon", "--log-file", str(effective_log_file)],
+                    stdin=subprocess.DEVNULL,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.STDOUT,
+                    start_new_session=True,
+                )
+            except (FileNotFoundError, subprocess.SubprocessError):
+                console.print("[red]Failed to start DECNET-PROBER.[/]")
+
+        if effective_log_file and not dry_run:
+            console.print("[bold cyan]Starting DECNET-PROFILER[/] (builds attacker profiles from log stream)")
+            try:
+                subprocess.Popen(  # nosec B603
+                    [sys.executable, "-m", "decnet.cli", "profiler", "--daemon"],
+                    stdin=subprocess.DEVNULL,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.STDOUT,
+                    start_new_session=True,
+                )
+            except (FileNotFoundError, subprocess.SubprocessError):
+                console.print("[red]Failed to start DECNET-PROFILER.[/]")
+
+        if effective_log_file and not dry_run:
+            console.print("[bold cyan]Starting DECNET-SNIFFER[/] (passive network capture)")
+            try:
+                subprocess.Popen(  # nosec B603
+                    [sys.executable, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", str(effective_log_file)],
+                    stdin=subprocess.DEVNULL,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.STDOUT,
+                    start_new_session=True,
+                )
+            except (FileNotFoundError, subprocess.SubprocessError):
+                console.print("[red]Failed to start DECNET-SNIFFER.[/]")
--- a/decnet/cli/forwarder.py
+++ b/decnet/cli/forwarder.py
@@ -0,0 +1,74 @@
+from __future__ import annotations
+
+import asyncio
+import pathlib
+import signal
+from typing import Optional
+
+import typer
+
+from decnet.env import DECNET_INGEST_LOG_FILE
+
+from . import utils as _utils
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command()
+    def forwarder(
+        master_host: Optional[str] = typer.Option(None, "--master-host", help="Master listener hostname/IP (default: $DECNET_SWARM_MASTER_HOST)"),
+        master_port: int = typer.Option(6514, "--master-port", help="Master listener TCP port (RFC 5425 default 6514)"),
+        log_file: Optional[str] = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", help="Local RFC 5424 file to tail and forward"),
+        agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker cert bundle dir (default: ~/.decnet/agent)"),
+        state_db: Optional[str] = typer.Option(None, "--state-db", help="Forwarder offset SQLite path (default: <agent_dir>/forwarder.db)"),
+        poll_interval: float = typer.Option(0.5, "--poll-interval", help="Seconds between log file stat checks"),
+        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
+    ) -> None:
+        """Run the worker-side syslog-over-TLS forwarder (RFC 5425, mTLS to master:6514)."""
+        from decnet.env import DECNET_SWARM_MASTER_HOST
+        from decnet.swarm import pki
+        from decnet.swarm.log_forwarder import ForwarderConfig, run_forwarder
+
+        resolved_host = master_host or DECNET_SWARM_MASTER_HOST
+        if not resolved_host:
+            console.print("[red]--master-host is required (or set DECNET_SWARM_MASTER_HOST).[/]")
+            raise typer.Exit(2)
+
+        resolved_agent_dir = pathlib.Path(agent_dir) if agent_dir else pki.DEFAULT_AGENT_DIR
+        if not (resolved_agent_dir / "worker.crt").exists():
+            console.print(f"[red]No worker cert bundle at {resolved_agent_dir} — enroll from the master first.[/]")
+            raise typer.Exit(2)
+
+        if not log_file:
+            console.print("[red]--log-file is required.[/]")
+            raise typer.Exit(2)
+
+        cfg = ForwarderConfig(
+            log_path=pathlib.Path(log_file),
+            master_host=resolved_host,
+            master_port=master_port,
+            agent_dir=resolved_agent_dir,
+            state_db=pathlib.Path(state_db) if state_db else None,
+        )
+
+        if daemon:
+            log.info("forwarder daemonizing master=%s:%d log=%s", resolved_host, master_port, log_file)
+            _utils._daemonize()
+
+        log.info("forwarder command invoked master=%s:%d log=%s", resolved_host, master_port, log_file)
+        console.print(f"[green]Starting DECNET forwarder → {resolved_host}:{master_port} (mTLS)...[/]")
+
+        async def _main() -> None:
+            stop = asyncio.Event()
+            loop = asyncio.get_running_loop()
+            for sig in (signal.SIGTERM, signal.SIGINT):
+                try:
+                    loop.add_signal_handler(sig, stop.set)
+                except (NotImplementedError, RuntimeError):  # pragma: no cover
+                    pass
+            await run_forwarder(cfg, poll_interval=poll_interval, stop_event=stop)
+
+        try:
+            asyncio.run(_main())
+        except KeyboardInterrupt:
+            pass
--- a/decnet/cli/gating.py
+++ b/decnet/cli/gating.py
@@ -0,0 +1,73 @@
+"""Role-based CLI gating.
+
+MAINTAINERS: when you add a new Typer command (or add_typer group) that is
+master-only, register its name in MASTER_ONLY_COMMANDS / MASTER_ONLY_GROUPS
+below. The gate is the only thing that:
+  (a) hides the command from `decnet --help` on worker hosts, and
+  (b) prevents a misconfigured worker from invoking master-side logic.
+Forgetting to register a new command is a role-boundary bug. Grep for
+MASTER_ONLY when touching command registration.
+
+Worker-legitimate commands (NOT in these sets): agent, updater, forwarder,
+status, collect, probe, sniffer. Agents run deckies locally and should be
+able to inspect them + run the per-host microservices (collector streams
+container logs, prober characterizes attackers hitting this host, sniffer
+captures traffic). Mutator and Profiler stay master-only: the mutator
+orchestrates respawns across the swarm; the profiler rebuilds attacker
+profiles against the master DB (no per-host DB exists).
+"""
+
+from __future__ import annotations
+
+import os
+
+import typer
+
+from .utils import console
+
+MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
+    "api", "swarmctl", "deploy", "redeploy", "teardown",
+    "mutate", "listener", "profiler",
+    "services", "distros", "correlate", "archetypes", "web",
+    "db-reset", "init", "webhook", "clusterer", "campaign-clusterer",
+})
+MASTER_ONLY_GROUPS: frozenset[str] = frozenset(
+    {"swarm", "topology", "geoip", "realism"}
+)
+
+
+def _agent_mode_active() -> bool:
+    """True when the host is configured as an agent AND master commands are
+    disallowed (the default for agents). Workers overriding this explicitly
+    set DECNET_DISALLOW_MASTER=false to opt into hybrid use."""
+    mode = os.environ.get("DECNET_MODE", "master").lower()
+    disallow = os.environ.get("DECNET_DISALLOW_MASTER", "true").lower() == "true"
+    return mode == "agent" and disallow
+
+
+def _require_master_mode(command_name: str) -> None:
+    """Defence-in-depth: called at the top of every master-only command body.
+
+    The registration-time gate in _gate_commands_by_mode() already hides
+    these commands from Typer's dispatch table, but this check protects
+    against direct function imports (e.g. from tests or third-party tools)
+    that would bypass Typer entirely."""
+    if _agent_mode_active():
+        console.print(
+            f"[red]`decnet {command_name}` is a master-only command; this host "
+            f"is configured as an agent (DECNET_MODE=agent).[/]"
+        )
+        raise typer.Exit(1)
+
+
+def _gate_commands_by_mode(_app: typer.Typer) -> None:
+    if not _agent_mode_active():
+        return
+    _app.registered_commands = [
+        c for c in _app.registered_commands
+        if (c.name or c.callback.__name__) not in MASTER_ONLY_COMMANDS
+    ]
+    _app.registered_groups = [
+        g for g in _app.registered_groups
+        if g.name not in MASTER_ONLY_GROUPS
+    ]
--- a/decnet/cli/geoip.py
+++ b/decnet/cli/geoip.py
@@ -0,0 +1,59 @@
+"""GeoIP CLI — refresh and lookup subcommands (master-only).
+
+Usage::
+
+    decnet geoip refresh          # re-download RIR files and rebuild the index
+    decnet geoip lookup 8.8.8.8   # one-shot IP -> country dump
+"""
+from __future__ import annotations
+
+import typer
+
+from .gating import _require_master_mode
+from .utils import console, log
+
+_group = typer.Typer(
+    name="geoip",
+    help="GeoIP provider management (master only).",
+    no_args_is_help=True,
+)
+
+
+@_group.command("refresh")
+def _refresh() -> None:
+    """Force re-download of the GeoIP provider data and rebuild the index."""
+    _require_master_mode("geoip refresh")
+    from decnet.geoip import get_lookup
+    from decnet.geoip.factory import get_provider
+
+    provider = get_provider()
+    log.info("geoip: forcing refresh via %s provider", provider.name)
+    console.print(f"[bold cyan]Refreshing {provider.name} GeoIP data…[/]")
+    try:
+        lookup = get_lookup(force_refresh=True)
+    except Exception as exc:  # noqa: BLE001
+        console.print(f"[red]refresh failed: {exc}[/]")
+        raise typer.Exit(1) from exc
+    console.print(
+        f"[green]OK[/] {provider.name} index rebuilt "
+        f"({len(lookup)} ranges)."
+    )
+
+
+@_group.command("lookup")
+def _lookup(
+    ip: str = typer.Argument(..., help="IP address to resolve."),
+) -> None:
+    """Print the country code for an IP (or 'unknown')."""
+    _require_master_mode("geoip lookup")
+    from decnet.geoip import enrich_ip
+
+    cc, source = enrich_ip(ip)
+    if cc is None:
+        console.print(f"{ip} [yellow]unknown[/]")
+        raise typer.Exit(0)
+    console.print(f"{ip} [green]cc={cc}[/] source={source}")
+
+
+def register(app: typer.Typer) -> None:
+    app.add_typer(_group, name="geoip")
--- a/decnet/cli/init.py
+++ b/decnet/cli/init.py
@@ -0,0 +1,843 @@
+"""
+`decnet init` — one-shot master-host bootstrap.
+
+Idempotent: running it twice is a no-op on already-configured items.
+Takes a freshly ``pip install``'d DECNET and turns it into a ready-to-
+run master host: creates the ``decnet`` system user/group, installs
+the systemd units + polkit rule + tmpfiles.d entry, seeds the
+directory layout, drops a placeholder config, and starts the
+``decnet.target`` grouping unit.
+
+Requires root. Uses ``subprocess.run`` (never ``shell=True``) for every
+privileged call so the full argv surface is auditable.
+"""
+from __future__ import annotations
+
+import grp
+import hashlib
+import os
+import pwd
+import shutil
+import subprocess  # nosec B404
+import sys
+from pathlib import Path
+from typing import Callable, List, Optional
+
+import typer
+from jinja2 import Environment, FileSystemLoader, StrictUndefined
+
+import decnet as _decnet_pkg
+from .gating import _require_master_mode
+from .utils import console, log
+
+
+_CONFIG_PLACEHOLDER = """\
+# /etc/decnet/decnet.ini — DECNET host config.
+#
+# Every key is OPTIONAL. Absent keys fall through to env-var defaults
+# defined in decnet/env.py. Real env vars always win over this file
+# (precedence: env > INI > default), so systemd EnvironmentFile= and
+# one-off `DECNET_FOO=bar decnet ...` invocations always take effect.
+#
+# Secrets (JWT, admin password, DB password) intentionally DO NOT
+# live here. Put them in /opt/decnet/.env.local or the systemd
+# EnvironmentFile= — never in a group-readable INI.
+
+[decnet]
+# mode = master                          # or "agent"
+
+# [api]
+# host = 127.0.0.1
+# port = 8000
+
+# [web]
+# host = 127.0.0.1
+# port = 8080
+# admin-user = admin
+# cors-origins = http://localhost:8080   # comma-separated
+
+# [database]
+# type = sqlite                          # or "mysql"
+# url = mysql+asyncmy://user@host:3306/decnet   # if set, wins over host/port/name/user
+# host = localhost
+# port = 3306
+# name = decnet
+# user = decnet
+
+# [bus]
+# enabled = true
+# type = unix                            # or "fake"
+# socket = /run/decnet/bus.sock
+# group = decnet
+
+# [swarm]
+# master-host = 10.0.0.1
+# syslog-port = 6514
+# swarmctl-port = 8770
+
+# [logging]
+# system-log = /var/log/decnet/decnet.system.log
+# ingest-log = /var/log/decnet/decnet.log
+# agent-log  = /var/log/decnet/agent.log
+
+# [ingester]
+# batch-size = 100
+# batch-max-wait-ms = 250
+
+# [tracing]
+# enabled = false
+# otel-endpoint = http://localhost:4317
+
+# [agent]
+# Managed by the enroll bundle — do NOT edit by hand on an agent host.
+"""
+
+
+def _deploy_root() -> Path:
+    """Resolve the on-disk ``deploy/`` directory of the installed package.
+
+    Editable install (``pip install -e .``): sibling of the ``decnet``
+    package at repo root. Wheel installs aren't supported yet — the
+    error message tells the operator to use an editable install.
+    """
+    root = Path(_decnet_pkg.__file__).resolve().parent.parent / "deploy"
+    if not (root / "decnet.target").is_file():
+        raise RuntimeError(
+            f"cannot locate deploy/ directory (looked at {root}); "
+            "are you on a wheel install that didn't bundle deploy/? "
+            "use `pip install -e .` from a git checkout"
+        )
+    return root
+
+
+def _sha256(path: Path) -> str:
+    h = hashlib.sha256()
+    h.update(path.read_bytes())
+    return h.hexdigest()
+
+
+def _run(argv: List[str], *, dry_run: bool) -> None:
+    if dry_run:
+        console.print(f"  [dim]would run:[/] {' '.join(argv)}")
+        return
+    log.info("init: exec %s", argv)
+    subprocess.run(argv, check=True)  # nosec B603
+
+
+def _step(label: str, action: Callable[[], str]) -> bool:
+    """Run ``action``, print a checklist line.
+
+    The callable returns the human-readable outcome verb:
+    ``"ok"`` → ``[ OK ] <label>``,
+    ``"skip: <reason>"`` → ``[SKIP] <label> (<reason>)``.
+    Any exception becomes ``[FAIL] <label>: <err>`` and re-raises.
+    """
+    try:
+        result = action()
+    except Exception as exc:  # noqa: BLE001
+        console.print(f"[red][FAIL][/] {label}: {exc}")
+        raise
+    if result.startswith("skip:"):
+        reason = result[len("skip:") :].strip()
+        console.print(f"[yellow][SKIP][/] {label} ({reason})")
+    else:
+        console.print(f"[green][ OK ][/] {label}")
+    return True
+
+
+def _ensure_group(group: str, *, dry_run: bool) -> str:
+    try:
+        grp.getgrnam(group)
+        return f"skip: group {group} already exists"
+    except KeyError:
+        _run(["groupadd", "--system", group], dry_run=dry_run)
+        return "ok"
+
+
+def _ensure_user(user: str, group: str, install_dir: str, *, dry_run: bool) -> str:
+    try:
+        pwd.getpwnam(user)
+        return f"skip: user {user} already exists"
+    except KeyError:
+        _run(
+            [
+                "useradd", "--system",
+                "--gid", group,
+                "--home-dir", install_dir,
+                "--shell", "/usr/sbin/nologin",
+                "--comment", "DECNET honeypot",
+                user,
+            ],
+            dry_run=dry_run,
+        )
+        return "ok"
+
+
+def _ensure_dir(
+    path: Path, *, mode: int, owner: str, group: str, dry_run: bool
+) -> str:
+    existed = path.exists()
+    if dry_run:
+        console.print(
+            f"  [dim]would ensure dir:[/] {path} (mode={oct(mode)}, "
+            f"owner={owner}:{group})"
+        )
+        return "skip: dry-run" if existed else "ok"
+    path.mkdir(parents=True, exist_ok=True)
+    try:
+        os.chmod(path, mode)
+        uid = pwd.getpwnam(owner).pw_uid
+        gid = grp.getgrnam(group).gr_gid
+        os.chown(path, uid, gid)
+    except (KeyError, PermissionError):
+        # owner/group not yet created, or we're not root (--prefix tests).
+        # mkdir is the load-bearing part; perm bits come back on the real
+        # root run.
+        pass
+    return f"skip: {path} already present" if existed else "ok"
+
+
+def _ensure_config(path: Path, group: str, *, dry_run: bool) -> str:
+    if path.exists():
+        return f"skip: {path} already present"
+    if dry_run:
+        console.print(f"  [dim]would write:[/] {path}")
+        return "ok"
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(_CONFIG_PLACEHOLDER)
+    try:
+        os.chmod(path, 0o640)
+        gid = grp.getgrnam(group).gr_gid
+        os.chown(path, 0, gid)
+    except (KeyError, PermissionError):
+        pass
+    return "ok"
+
+
+def _copy_if_changed(
+    src: Path, dst: Path, *, mode: int, force: bool, dry_run: bool
+) -> str:
+    if dst.exists() and not force and _sha256(src) == _sha256(dst):
+        return f"skip: {dst} up to date"
+    if dry_run:
+        console.print(f"  [dim]would install:[/] {src} -> {dst} (mode={oct(mode)})")
+        return "ok"
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    shutil.copy2(src, dst)
+    try:
+        os.chmod(dst, mode)
+        os.chown(dst, 0, 0)
+    except PermissionError:
+        pass
+    return "ok"
+
+
+def _render_template(src: Path, context: dict[str, str]) -> str:
+    """Render a Jinja2 .j2 template with the given context.
+
+    StrictUndefined: a missing context variable is an error, not a
+    silent empty-string substitution — that way a typo in the template
+    fails loudly instead of shipping a broken systemd unit.
+    """
+    env = Environment(
+        loader=FileSystemLoader(str(src.parent)),
+        undefined=StrictUndefined,
+        keep_trailing_newline=True,
+        autoescape=False,  # nosec B701 — rendering systemd INI, not HTML
+    )
+    template = env.get_template(src.name)
+    return template.render(**context)
+
+
+def _write_rendered_if_changed(
+    src: Path, dst: Path, rendered: str, *, mode: int, force: bool, dry_run: bool
+) -> str:
+    """Write *rendered* content to *dst* only if it differs from what's there.
+
+    SHA compares rendered-output ↔ on-disk bytes (NOT source-template ↔
+    on-disk) so operators who customise their install_dir get idempotent
+    re-runs instead of every ``decnet init`` rewriting files.
+    """
+    rendered_bytes = rendered.encode("utf-8")
+    if dst.exists() and not force:
+        if hashlib.sha256(dst.read_bytes()).hexdigest() == hashlib.sha256(rendered_bytes).hexdigest():
+            return f"skip: {dst} up to date"
+    if dry_run:
+        console.print(f"  [dim]would render:[/] {src} -> {dst} (mode={oct(mode)})")
+        return "ok"
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    dst.write_bytes(rendered_bytes)
+    try:
+        os.chmod(dst, mode)
+        os.chown(dst, 0, 0)
+    except PermissionError:
+        pass
+    return "ok"
+
+
+def _resolve_venv_dir(install_dir: str, explicit: str | None) -> str:
+    """Pick the virtualenv systemd units should ExecStart out of.
+
+    Priority:
+      1. ``--venv-dir`` flag (explicit; absolute path required).
+      2. ``VIRTUAL_ENV`` env var, but only when it lives under
+         ``install_dir`` (refuse to bake /home/user/.venv into a system
+         service — that directory is user-owned and may vanish).
+      3. ``{install_dir}/venv``  — what ``enroll_bootstrap.sh`` creates
+         on fresh agents; the production default.
+      4. First hit from a short list of dev-box conventions under
+         ``install_dir``:  ``.venv``, ``.311``, ``.312``, ``.313``.
+
+    Raises RuntimeError with an operator-friendly message if none of
+    those resolve to a directory containing ``bin/decnet``. Failing loud
+    at init time beats systemd spamming journalctl with
+    'Failed at step EXEC spawning .../venv/bin/decnet: No such file or
+    directory' on every auto-restart.
+    """
+    install_path = Path(install_dir)
+
+    candidates: list[Path] = []
+    if explicit:
+        if not explicit.startswith("/"):
+            raise RuntimeError(
+                f"--venv-dir must be an absolute path, got {explicit!r}"
+            )
+        candidates.append(Path(explicit))
+    else:
+        virtual_env = os.environ.get("VIRTUAL_ENV")
+        if virtual_env:
+            ve_path = Path(virtual_env)
+            try:
+                ve_path.relative_to(install_path)
+                candidates.append(ve_path)
+            except ValueError:
+                # VIRTUAL_ENV lives outside install_dir — don't bake a
+                # user-home venv into a root-owned systemd unit.
+                pass
+        candidates.append(install_path / "venv")
+        for name in (".venv", ".311", ".312", ".313"):
+            candidates.append(install_path / name)
+
+    for cand in candidates:
+        if (cand / "bin" / "decnet").is_file():
+            return str(cand)
+
+    searched = ", ".join(str(c) for c in candidates)
+    raise RuntimeError(
+        "Could not find a DECNET venv. Create one first (e.g. "
+        f"`python -m venv {install_path}/venv && "
+        f"{install_path}/venv/bin/pip install -e {install_path}[dev]`) "
+        "or pass --venv-dir. Searched: " + searched
+    )
+
+
+def _install_units(
+    deploy: Path,
+    systemd_dir: Path,
+    *,
+    install_dir: str,
+    venv_dir: str,
+    user: str,
+    group: str,
+    force: bool,
+    dry_run: bool,
+) -> str:
+    """Render decnet-*.service.j2 → systemd_dir/decnet-*.service, and copy
+    the static decnet.target (no templating needed — it has no install
+    path references)."""
+    context = {
+        "install_dir": install_dir,
+        "venv_dir": venv_dir,
+        "user": user,
+        "group": group,
+    }
+    templates = sorted(deploy.glob("decnet-*.service.j2"))
+    static = [deploy / "decnet.target"]
+
+    touched = 0
+    for src in templates:
+        rendered = _render_template(src, context)
+        # decnet-api.service.j2 → decnet-api.service
+        dst_name = src.name[: -len(".j2")]
+        result = _write_rendered_if_changed(
+            src, systemd_dir / dst_name, rendered,
+            mode=0o644, force=force, dry_run=dry_run,
+        )
+        if not result.startswith("skip:"):
+            touched += 1
+    for src in static:
+        result = _copy_if_changed(
+            src, systemd_dir / src.name,
+            mode=0o644, force=force, dry_run=dry_run,
+        )
+        if not result.startswith("skip:"):
+            touched += 1
+    total = len(templates) + len(static)
+    if touched == 0:
+        return f"skip: {total} unit files up to date"
+    return f"ok ({touched}/{total} installed)"
+
+
+def _install_polkit(
+    deploy: Path, rules_dir: Path, *, group: str, force: bool, dry_run: bool
+) -> str:
+    """Render the group-scoped polkit rule to /etc/polkit-1/rules.d/.
+
+    The rule has to reference the same POSIX group passed via --group —
+    otherwise the API (running as that user) can't
+    systemctl start/stop decnet-*.service without an interactive auth
+    prompt that never gets answered in a daemon context.
+    """
+    src = deploy / "polkit" / "50-decnet-workers.rules.j2"
+    if not src.is_file():
+        raise RuntimeError(f"missing polkit rule template at {src}")
+    rendered = _render_template(src, {"group": group})
+    # 50-decnet-workers.rules.j2 → 50-decnet-workers.rules
+    dst_name = src.name[: -len(".j2")]
+    return _write_rendered_if_changed(
+        src, rules_dir / dst_name, rendered,
+        mode=0o644, force=force, dry_run=dry_run,
+    )
+
+
+def _run_allow_fail(argv: List[str], *, dry_run: bool) -> str:
+    """Like ``_run`` but tolerates non-zero exits (stop/disable on an
+    already-absent unit is fine during deinit)."""
+    if dry_run:
+        console.print(f"  [dim]would run (allow fail):[/] {' '.join(argv)}")
+        return "ok"
+    log.info("init: exec (allow fail) %s", argv)
+    result = subprocess.run(argv, check=False)  # nosec B603
+    if result.returncode != 0:
+        return f"skip: rc={result.returncode} (already absent)"
+    return "ok"
+
+
+def _remove_file(path: Path, *, dry_run: bool) -> str:
+    if not path.exists() and not path.is_symlink():
+        return f"skip: {path} already absent"
+    if dry_run:
+        console.print(f"  [dim]would remove:[/] {path}")
+        return "ok"
+    path.unlink()
+    return "ok"
+
+
+def _uninstall_units(systemd_dir: Path, *, dry_run: bool) -> str:
+    removed = 0
+    present = sorted(systemd_dir.glob("decnet-*.service"))
+    target = systemd_dir / "decnet.target"
+    if target.exists():
+        present.append(target)
+    for path in present:
+        if dry_run:
+            console.print(f"  [dim]would remove:[/] {path}")
+            removed += 1
+            continue
+        path.unlink()
+        removed += 1
+    if removed == 0:
+        return "skip: no decnet unit files present"
+    return f"ok ({removed} removed)"
+
+
+def _remove_user(user: str, *, dry_run: bool) -> str:
+    try:
+        pwd.getpwnam(user)
+    except KeyError:
+        return f"skip: user {user} already absent"
+    # userdel returns non-zero if the user still owns running
+    # processes; that's the operator's problem to sort out, not ours.
+    return _run_allow_fail(["userdel", user], dry_run=dry_run)
+
+
+def _remove_group(group: str, *, dry_run: bool) -> str:
+    try:
+        grp.getgrnam(group)
+    except KeyError:
+        return f"skip: group {group} already absent"
+    return _run_allow_fail(["groupdel", group], dry_run=dry_run)
+
+
+def _remove_dir_if_present(
+    path: Path, *, dry_run: bool, recursive: bool = False
+) -> str:
+    if not path.exists():
+        return f"skip: {path} already absent"
+    if dry_run:
+        verb = "would rm -rf" if recursive else "would rmdir"
+        console.print(f"  [dim]{verb}:[/] {path}")
+        return "ok"
+    if recursive:
+        shutil.rmtree(path, ignore_errors=True)
+    else:
+        try:
+            path.rmdir()
+        except OSError as exc:
+            return f"skip: {path} not empty ({exc.strerror})"
+    return "ok"
+
+
+def _install_tmpfiles(
+    deploy: Path, tmpfiles_dir: Path, *, force: bool, dry_run: bool
+) -> str:
+    src = deploy / "tmpfiles.d" / "decnet.conf"
+    if not src.is_file():
+        raise RuntimeError(f"missing tmpfiles.d entry at {src}")
+    result = _copy_if_changed(
+        src, tmpfiles_dir / src.name,
+        mode=0o644, force=force, dry_run=dry_run,
+    )
+    # Apply immediately so /run/decnet exists before daemon-reload.
+    _run(["systemd-tmpfiles", "--create", str(tmpfiles_dir / src.name)], dry_run=dry_run)
+    return result
+
+
+def _install_logrotate(
+    deploy: Path, logrotate_dir: Path, *, force: bool, dry_run: bool
+) -> str:
+    """Drop the logrotate config into ``/etc/logrotate.d/decnet``.
+
+    The ingester / forwarder hold the log files open via Python, so the
+    config uses ``copytruncate`` rather than rename+create. Without this
+    rule, /var/log/decnet/ grows without bound and a single noisy day of
+    attacker traffic fills the disk on a small VPS. Best-effort: a host
+    without logrotate installed (rare on systemd distros) still boots
+    fine — the operator just needs to wire their own rotation.
+    """
+    src = deploy / "logrotate.d" / "decnet"
+    if not src.is_file():
+        raise RuntimeError(f"missing logrotate config at {src}")
+    return _copy_if_changed(
+        src, logrotate_dir / src.name,
+        mode=0o644, force=force, dry_run=dry_run,
+    )
+
+
+def register(app: typer.Typer) -> None:
+    @app.command(name="init")
+    def init_cmd(
+        dry_run: bool = typer.Option(
+            False, "--dry-run",
+            help="Print every action; make no changes.",
+        ),
+        no_start: bool = typer.Option(
+            False, "--no-start",
+            help="Install everything but don't `systemctl enable --now decnet.target`.",
+        ),
+        force: bool = typer.Option(
+            False, "--force",
+            help="Overwrite unit / polkit / tmpfiles entries even if identical.",
+        ),
+        deinit: bool = typer.Option(
+            False, "--deinit",
+            help="Undo a previous init: stop + disable decnet.target, remove "
+                 "unit files, polkit rule, tmpfiles.d entry, /etc/decnet. "
+                 "Preserves /var/lib/decnet, /var/log/decnet, and the "
+                 "service user/group — pass --purge to remove those too.",
+        ),
+        purge: bool = typer.Option(
+            False, "--purge",
+            help="With --deinit, also wipe /var/lib/decnet, "
+                 "/var/log/decnet, AND the service user/group. "
+                 "Destructive — operator data is gone, and if --user "
+                 "points at your own login account, that account goes "
+                 "with it. Only use when the user/group was created by "
+                 "`decnet init` in the first place.",
+        ),
+        user: str = typer.Option(
+            "decnet", "--user",
+            help="System user to own DECNET processes.",
+        ),
+        group: str = typer.Option(
+            "decnet", "--group",
+            help="Primary group of the DECNET user.",
+        ),
+        install_dir: str = typer.Option(
+            "/opt/decnet", "--install-dir",
+            help="Absolute path where DECNET is installed. Default "
+                 "/opt/decnet; distros that reserve /opt can point this "
+                 "at /srv/decnet, /usr/local/decnet, etc. Gets rendered "
+                 "into every systemd unit via Jinja2 and used as the "
+                 "decnet user's home directory.",
+        ),
+        venv_dir: Optional[str] = typer.Option(
+            None, "--venv-dir",
+            help="Absolute path to the Python venv systemd should "
+                 "ExecStart from. If omitted, auto-detected in order: "
+                 "$VIRTUAL_ENV (if under --install-dir), "
+                 "{install-dir}/venv, then {install-dir}/{.venv,.311,"
+                 ".312,.313}. Init aborts if none exists.",
+        ),
+        prefix: str = typer.Option(
+            "", "--prefix", hidden=True,
+            help="Filesystem prefix for tests (e.g. tmp_path). Empty = real root.",
+        ),
+    ) -> None:
+        """One-shot bootstrap of a DECNET master host.
+
+        Creates the `decnet` user/group, installs systemd units,
+        polkit rules, tmpfiles.d entries, seeds directories and
+        drops a placeholder config, then starts decnet.target.
+        """
+        _require_master_mode("init")
+
+        if purge and not deinit:
+            console.print("[red]--purge only applies with --deinit[/]")
+            raise typer.Exit(1)
+
+        # Root check — skip when --prefix is set (tests don't run as root).
+        if not prefix and os.geteuid() != 0:
+            verb = "deinit" if deinit else "init"
+            console.print(f"[red]decnet {verb}: must run as root (use sudo)[/]")
+            raise typer.Exit(1)
+
+        if not install_dir.startswith("/"):
+            console.print(
+                f"[red]decnet init: --install-dir must be absolute, got {install_dir!r}[/]"
+            )
+            raise typer.Exit(1)
+        # Strip leading slash so pfx-joining works under --prefix test mode
+        # (Path("/").  / "/opt/decnet" == Path("/opt/decnet"), dropping pfx).
+        _install_rel = install_dir.lstrip("/")
+
+        required_tools = ("systemctl",) if deinit else (
+            "systemctl", "useradd", "groupadd", "systemd-tmpfiles",
+        )
+        if deinit:
+            required_tools = required_tools + ("userdel", "groupdel")
+        for tool in required_tools:
+            if shutil.which(tool) is None and not dry_run:
+                verb = "deinit" if deinit else "init"
+                console.print(f"[red]decnet {verb}: {tool!r} is required on PATH[/]")
+                raise typer.Exit(1)
+
+        pfx = Path(prefix) if prefix else Path("/")
+        systemd_dir = pfx / "etc/systemd/system"
+        polkit_dir = pfx / "etc/polkit-1/rules.d"
+        tmpfiles_dir = pfx / "etc/tmpfiles.d"
+        logrotate_dir = pfx / "etc/logrotate.d"
+        etc_decnet = pfx / "etc/decnet"
+
+        if deinit:
+            console.print(
+                f"[bold cyan]DECNET deinit[/] "
+                f"(dry_run={dry_run}, purge={purge})"
+            )
+            _step(
+                "systemctl stop + disable decnet.target",
+                lambda: _run_allow_fail(
+                    ["systemctl", "disable", "--now", "decnet.target"],
+                    dry_run=dry_run,
+                ),
+            )
+            _step(
+                "remove systemd unit files",
+                lambda: _uninstall_units(systemd_dir, dry_run=dry_run),
+            )
+            _step(
+                "remove polkit rule",
+                lambda: _remove_file(
+                    polkit_dir / "50-decnet-workers.rules",
+                    dry_run=dry_run,
+                ),
+            )
+            _step(
+                "remove tmpfiles.d entry",
+                lambda: _remove_file(
+                    tmpfiles_dir / "decnet.conf",
+                    dry_run=dry_run,
+                ),
+            )
+            _step(
+                "remove logrotate config",
+                lambda: _remove_file(
+                    logrotate_dir / "decnet",
+                    dry_run=dry_run,
+                ),
+            )
+            _step(
+                "systemctl daemon-reload",
+                lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],
+            )
+            _step(
+                f"remove {etc_decnet / 'decnet.ini'}",
+                lambda: _remove_file(etc_decnet / "decnet.ini", dry_run=dry_run),
+            )
+            # Legacy name from pre-domain-sections placeholder era.
+            # Harmless if absent (the _remove_file step logs skip).
+            _step(
+                f"remove legacy {etc_decnet / 'config.ini'}",
+                lambda: _remove_file(etc_decnet / "config.ini", dry_run=dry_run),
+            )
+            _step(
+                f"remove {etc_decnet}",
+                lambda: _remove_dir_if_present(etc_decnet, dry_run=dry_run),
+            )
+            _step(
+                f"remove {pfx / 'run/decnet'}",
+                lambda: _remove_dir_if_present(
+                    pfx / "run/decnet", dry_run=dry_run,
+                ),
+            )
+            _step(
+                f"remove {pfx / _install_rel}",
+                lambda: _remove_dir_if_present(
+                    pfx / _install_rel, dry_run=dry_run,
+                ),
+            )
+            if purge:
+                _step(
+                    f"purge {pfx / 'var/lib/decnet'}",
+                    lambda: _remove_dir_if_present(
+                        pfx / "var/lib/decnet",
+                        dry_run=dry_run, recursive=True,
+                    ),
+                )
+                _step(
+                    f"purge {pfx / 'var/log/decnet'}",
+                    lambda: _remove_dir_if_present(
+                        pfx / "var/log/decnet",
+                        dry_run=dry_run, recursive=True,
+                    ),
+                )
+            else:
+                console.print(
+                    f"[dim]preserved {pfx / 'var/lib/decnet'} and "
+                    f"{pfx / 'var/log/decnet'} (operator data); "
+                    "re-run with --purge to remove.[/]"
+                )
+            # User / group removal is also gated on --purge. In dev the
+            # operator may have passed their own login user via
+            # `--user $USER` to avoid ownership churn; an unconditional
+            # `userdel anti` during deinit would nuke their account.
+            if purge:
+                _step(
+                    f"remove user {user!r}",
+                    lambda: _remove_user(user, dry_run=dry_run),
+                )
+                _step(
+                    f"remove group {group!r}",
+                    lambda: _remove_group(group, dry_run=dry_run),
+                )
+            else:
+                console.print(
+                    f"[dim]preserved user {user!r} and group {group!r}; "
+                    "re-run with --purge to remove (only do this if "
+                    "they were created by `decnet init`).[/]"
+                )
+            console.print("[bold green]DECNET deinit complete.[/]")
+            return
+
+        try:
+            deploy = _deploy_root()
+        except RuntimeError as exc:
+            console.print(f"[red]decnet init: {exc}[/]")
+            raise typer.Exit(1) from exc
+
+        # Resolve venv BEFORE any file writes — fails loud if the
+        # operator hasn't created one yet, instead of shipping broken
+        # systemd units that journalctl spams forever. Skipped under
+        # --prefix (test mode) because the test harness doesn't build a
+        # real venv and the rendered string is asserted on directly.
+        if prefix:
+            resolved_venv = venv_dir or f"{install_dir}/venv"
+        else:
+            try:
+                resolved_venv = _resolve_venv_dir(install_dir, venv_dir)
+            except RuntimeError as exc:
+                console.print(f"[red]decnet init: {exc}[/]")
+                raise typer.Exit(1) from exc
+            console.print(f"[dim]using venv: {resolved_venv}[/]")
+
+        dirs = [
+            (pfx / _install_rel, 0o755, user, group),
+            (pfx / "var/lib/decnet", 0o750, user, group),
+            (pfx / "var/lib/decnet/geoip", 0o755, user, group),
+            (pfx / "var/log/decnet", 0o750, user, group),
+            (etc_decnet, 0o755, "root", group),
+            (pfx / "run/decnet", 0o755, "root", group),
+        ]
+
+        console.print(
+            f"[bold cyan]DECNET init[/] "
+            f"(dry_run={dry_run}, no_start={no_start}, force={force})"
+        )
+
+        _step(
+            f"ensure group {group!r}",
+            lambda: _ensure_group(group, dry_run=dry_run),
+        )
+        _step(
+            f"ensure user {user!r}",
+            lambda: _ensure_user(user, group, install_dir, dry_run=dry_run),
+        )
+        for path, mode, d_owner, d_group in dirs:
+            _step(
+                f"ensure dir {path}",
+                lambda p=path, m=mode, o=d_owner, g=d_group:
+                    _ensure_dir(p, mode=m, owner=o, group=g, dry_run=dry_run),
+            )
+        _step(
+            f"write {etc_decnet / 'decnet.ini'}",
+            lambda: _ensure_config(etc_decnet / "decnet.ini", group, dry_run=dry_run),
+        )
+        _step(
+            "install systemd units",
+            lambda: _install_units(
+                deploy, systemd_dir,
+                install_dir=install_dir, venv_dir=resolved_venv,
+                user=user, group=group,
+                force=force, dry_run=dry_run,
+            ),
+        )
+        _step(
+            "install polkit rule",
+            lambda: _install_polkit(
+                deploy, polkit_dir, group=group,
+                force=force, dry_run=dry_run,
+            ),
+        )
+        _step(
+            "install tmpfiles.d entry",
+            lambda: _install_tmpfiles(
+                deploy, tmpfiles_dir, force=force, dry_run=dry_run,
+            ),
+        )
+        _step(
+            "install logrotate config",
+            lambda: _install_logrotate(
+                deploy, logrotate_dir, force=force, dry_run=dry_run,
+            ),
+        )
+        _step(
+            "systemctl daemon-reload",
+            lambda: (_run(["systemctl", "daemon-reload"], dry_run=dry_run), "ok")[1],
+        )
+
+        if no_start:
+            console.print("[yellow]--no-start: skipping decnet.target start[/]")
+            return
+
+        try:
+            _step(
+                "systemctl enable --now decnet.target",
+                lambda: (
+                    _run(
+                        ["systemctl", "enable", "--now", "decnet.target"],
+                        dry_run=dry_run,
+                    ),
+                    "ok",
+                )[1],
+            )
+        except subprocess.CalledProcessError as exc:
+            console.print(
+                f"[red]decnet.target failed to start (rc={exc.returncode}); "
+                "inspect `systemctl status decnet.target` and individual "
+                "`decnet-*.service` units.[/]"
+            )
+            raise typer.Exit(1) from exc
+
+        console.print("[bold green]DECNET init complete.[/] "
+                      "Check `decnet status` or the Workers panel.")
+        sys.stdout.flush()
--- a/decnet/cli/inventory.py
+++ b/decnet/cli/inventory.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import typer
+from rich.table import Table
+
+from decnet.archetypes import all_archetypes
+from decnet.distros import all_distros
+from decnet.services.registry import all_services
+
+from .utils import console
+
+
+def register(app: typer.Typer) -> None:
+    @app.command(name="services")
+    def list_services() -> None:
+        """List all registered honeypot service plugins."""
+        svcs = all_services()
+        table = Table(title="Available Services", show_lines=True)
+        table.add_column("Name", style="bold cyan")
+        table.add_column("Ports")
+        table.add_column("Image")
+        for name, svc in sorted(svcs.items()):
+            table.add_row(name, ", ".join(str(p) for p in svc.ports), svc.default_image)
+        console.print(table)
+
+    @app.command(name="distros")
+    def list_distros() -> None:
+        """List all available OS distro profiles for deckies."""
+        table = Table(title="Available Distro Profiles", show_lines=True)
+        table.add_column("Slug", style="bold cyan")
+        table.add_column("Display Name")
+        table.add_column("Docker Image", style="dim")
+        for slug, profile in sorted(all_distros().items()):
+            table.add_row(slug, profile.display_name, profile.image)
+        console.print(table)
+
+    @app.command(name="archetypes")
+    def list_archetypes() -> None:
+        """List all machine archetype profiles."""
+        table = Table(title="Machine Archetypes", show_lines=True)
+        table.add_column("Slug", style="bold cyan")
+        table.add_column("Display Name")
+        table.add_column("Default Services", style="green")
+        table.add_column("Description", style="dim")
+        for slug, arch in sorted(all_archetypes().items()):
+            table.add_row(
+                slug,
+                arch.display_name,
+                ", ".join(arch.services),
+                arch.description,
+            )
+        console.print(table)
--- a/decnet/cli/lifecycle.py
+++ b/decnet/cli/lifecycle.py
@@ -0,0 +1,147 @@
+from __future__ import annotations
+
+import subprocess  # nosec B404
+from typing import Optional
+
+import typer
+from rich.table import Table
+
+from decnet.env import DECNET_INGEST_LOG_FILE
+
+from . import utils as _utils
+from .gating import _agent_mode_active, _require_master_mode
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command()
+    def redeploy(
+        log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to the DECNET log file"),
+    ) -> None:
+        """Check running DECNET services and relaunch any that are down."""
+        log.info("redeploy: checking services")
+        registry = _utils._service_registry(str(log_file))
+
+        table = Table(title="DECNET Services", show_lines=True)
+        table.add_column("Service", style="bold cyan")
+        table.add_column("Status")
+        table.add_column("PID", style="dim")
+        table.add_column("Action")
+
+        relaunched = 0
+        for name, match_fn, launch_args in registry:
+            pid = _utils._is_running(match_fn)
+            if pid is not None:
+                table.add_row(name, "[green]UP[/]", str(pid), "—")
+            else:
+                try:
+                    subprocess.Popen(  # nosec B603
+                        launch_args,
+                        stdin=subprocess.DEVNULL,
+                        stdout=subprocess.DEVNULL,
+                        stderr=subprocess.STDOUT,
+                        start_new_session=True,
+                    )
+                    table.add_row(name, "[red]DOWN[/]", "—", "[green]relaunched[/]")
+                    relaunched += 1
+                except (FileNotFoundError, subprocess.SubprocessError) as exc:
+                    table.add_row(name, "[red]DOWN[/]", "—", f"[red]failed: {exc}[/]")
+
+        console.print(table)
+        if relaunched:
+            console.print(f"[green]{relaunched} service(s) relaunched.[/]")
+        else:
+            console.print("[green]All services running.[/]")
+
+    @app.command()
+    def status() -> None:
+        """Show running deckies and the state of every ``decnet-*`` unit.
+
+        Prefers systemd (``systemctl list-units 'decnet-*.service'``) so
+        agents, masters and mixed hosts all get one consistent view of
+        what's installed, loaded, and active. Falls back to the psutil
+        cmdline registry on boxes without systemd (dev laptops, CI
+        containers, non-systemd init) so `decnet status` is still useful
+        there.
+        """
+        log.info("status command invoked")
+        from decnet.engine import status as _status
+        _status()
+
+        units = _utils._systemd_units()
+        if units is not None:
+            _render_systemd_units(units)
+        else:
+            _render_psutil_fallback()
+
+    def _render_systemd_units(units: list[dict]) -> None:
+        svc_table = Table(title="DECNET Services (systemd)", show_lines=True)
+        svc_table.add_column("Unit", style="bold cyan")
+        svc_table.add_column("Load")
+        svc_table.add_column("Active")
+        svc_table.add_column("Sub")
+        svc_table.add_column("Description", style="dim")
+
+        if not units:
+            console.print(
+                "[yellow]No decnet-* systemd units loaded. "
+                "Run `sudo decnet init` to install them.[/]"
+            )
+            return
+
+        def _active_style(active: str) -> str:
+            if active == "active":
+                return "[green]active[/]"
+            if active == "failed":
+                return "[red]failed[/]"
+            return f"[yellow]{active}[/]"
+
+        for u in sorted(units, key=lambda x: x.get("unit", "")):
+            svc_table.add_row(
+                u.get("unit", ""),
+                u.get("load", ""),
+                _active_style(u.get("active", "")),
+                u.get("sub", ""),
+                u.get("description", ""),
+            )
+        console.print(svc_table)
+
+    def _render_psutil_fallback() -> None:
+        registry = _utils._service_registry(str(DECNET_INGEST_LOG_FILE))
+        if _agent_mode_active():
+            registry = [r for r in registry if r[0] not in {"Mutator", "Profiler", "API"}]
+        svc_table = Table(
+            title="DECNET Services (psutil fallback — systemd unavailable)",
+            show_lines=True,
+        )
+        svc_table.add_column("Service", style="bold cyan")
+        svc_table.add_column("Status")
+        svc_table.add_column("PID", style="dim")
+
+        for name, match_fn, _launch_args in registry:
+            pid = _utils._is_running(match_fn)
+            if pid is not None:
+                svc_table.add_row(name, "[green]UP[/]", str(pid))
+            else:
+                svc_table.add_row(name, "[red]DOWN[/]", "—")
+
+        console.print(svc_table)
+
+    @app.command()
+    def teardown(
+        all_: bool = typer.Option(False, "--all", help="Tear down all deckies and remove network"),
+        id_: Optional[str] = typer.Option(None, "--id", help="Tear down a specific decky by name"),
+    ) -> None:
+        """Stop and remove deckies."""
+        _require_master_mode("teardown")
+        if not all_ and not id_:
+            console.print("[red]Specify --all or --id <name>.[/]")
+            raise typer.Exit(1)
+
+        log.info("teardown command invoked all=%s id=%s", all_, id_)
+        from decnet.engine import teardown as _teardown
+        _teardown(decky_id=id_)
+        log.info("teardown complete all=%s id=%s", all_, id_)
+
+        if all_:
+            _utils._kill_all_services()
--- a/decnet/cli/listener.py
+++ b/decnet/cli/listener.py
@@ -0,0 +1,57 @@
+from __future__ import annotations
+
+import asyncio
+import pathlib
+import signal
+from typing import Optional
+
+import typer
+
+from . import utils as _utils
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command()
+    def listener(
+        bind_host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the master syslog-TLS listener"),  # nosec B104
+        bind_port: int = typer.Option(6514, "--port", help="Listener TCP port (RFC 5425 default 6514)"),
+        log_path: Optional[str] = typer.Option(None, "--log-path", help="RFC 5424 forensic sink (default: ./master.log)"),
+        json_path: Optional[str] = typer.Option(None, "--json-path", help="Parsed-JSON ingest sink (default: ./master.json)"),
+        ca_dir: Optional[str] = typer.Option(None, "--ca-dir", help="DECNET CA dir (default: ~/.decnet/ca)"),
+        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
+    ) -> None:
+        """Run the master-side syslog-over-TLS listener (RFC 5425, mTLS)."""
+        from decnet.swarm import pki
+        from decnet.swarm.log_listener import ListenerConfig, run_listener
+
+        resolved_ca_dir = pathlib.Path(ca_dir) if ca_dir else pki.DEFAULT_CA_DIR
+        resolved_log = pathlib.Path(log_path) if log_path else pathlib.Path("master.log")
+        resolved_json = pathlib.Path(json_path) if json_path else pathlib.Path("master.json")
+
+        cfg = ListenerConfig(
+            log_path=resolved_log, json_path=resolved_json,
+            bind_host=bind_host, bind_port=bind_port, ca_dir=resolved_ca_dir,
+        )
+
+        if daemon:
+            log.info("listener daemonizing host=%s port=%d", bind_host, bind_port)
+            _utils._daemonize()
+
+        log.info("listener command invoked host=%s port=%d", bind_host, bind_port)
+        console.print(f"[green]Starting DECNET log listener on {bind_host}:{bind_port} (mTLS)...[/]")
+
+        async def _main() -> None:
+            stop = asyncio.Event()
+            loop = asyncio.get_running_loop()
+            for sig in (signal.SIGTERM, signal.SIGINT):
+                try:
+                    loop.add_signal_handler(sig, stop.set)
+                except (NotImplementedError, RuntimeError):  # pragma: no cover
+                    pass
+            await run_listener(cfg, stop_event=stop)
+
+        try:
+            asyncio.run(_main())
+        except KeyboardInterrupt:
+            pass
--- a/decnet/cli/orchestrator.py
+++ b/decnet/cli/orchestrator.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+from typing import Optional
+
+import typer
+
+from . import utils as _utils
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command(name="orchestrate")
+    def orchestrate_cmd(
+        interval: int = typer.Option(
+            60, "--interval", "-i",
+            help="Seconds between synthetic activity ticks",
+        ),
+        daemon: bool = typer.Option(
+            False, "--daemon", "-d",
+            help="Detach to background as a daemon process",
+        ),
+        llm: Optional[bool] = typer.Option(
+            None, "--llm/--no-llm",
+            help=(
+                "Enable / disable LLM enrichment of user-class file "
+                "bodies.  Default reads $DECNET_REALISM_LLM (any "
+                "non-empty value enables; 'off' / unset disables)."
+            ),
+        ),
+    ) -> None:
+        """Inject synthetic life (inter-decky traffic + file ops + email) into the fleet."""
+        import asyncio
+        from decnet.orchestrator import orchestrator_worker
+        from decnet.web.dependencies import repo
+
+        if daemon:
+            log.info("orchestrator daemonizing interval=%d", interval)
+            _utils._daemonize()
+
+        log.info(
+            "orchestrator starting interval=%d llm=%s",
+            interval, "default" if llm is None else ("on" if llm else "off"),
+        )
+        console.print(
+            f"[bold cyan]Orchestrator starting[/] (interval: {interval}s)"
+        )
+
+        async def _run() -> None:
+            await repo.initialize()
+            await orchestrator_worker(repo, interval=interval, llm_enabled=llm)
+
+        try:
+            asyncio.run(_run())
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Orchestrator stopped.[/]")
--- a/decnet/cli/profiler.py
+++ b/decnet/cli/profiler.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+import typer
+
+from . import utils as _utils
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command(name="profiler")
+    def profiler_cmd(
+        interval: int = typer.Option(30, "--interval", "-i", help="Seconds between profile rebuild cycles"),
+        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
+    ) -> None:
+        """Run the attacker profiler as a standalone microservice."""
+        import asyncio
+        from decnet.profiler import attacker_profile_worker
+        from decnet.web.dependencies import repo
+
+        if daemon:
+            log.info("profiler daemonizing interval=%d", interval)
+            _utils._daemonize()
+
+        log.info("profiler starting interval=%d", interval)
+        console.print(f"[bold cyan]Profiler starting[/] (interval: {interval}s)")
+
+        async def _run() -> None:
+            await repo.initialize()
+            await attacker_profile_worker(repo, interval=interval)
+
+        try:
+            asyncio.run(_run())
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Profiler stopped.[/]")
--- a/decnet/cli/realism.py
+++ b/decnet/cli/realism.py
@@ -0,0 +1,111 @@
+"""``decnet realism ...`` — content-engine maintenance commands.
+
+After stage 5 of the realism migration, this is the only remaining
+CLI surface from the realism library / former emailgen.  ``decnet
+realism run`` does not exist (the orchestrator runs the unified
+worker via ``decnet orchestrate``); the only sub-command is
+``import-personas``, which validates + installs the host-wide global
+persona pool consumed by fleet (MACVLAN/IPVLAN) and SWARM-shard
+deckies.
+
+Topology personas live on ``Topology.email_personas`` and are
+managed via the dashboard or the topology API; this command does
+not touch them.
+"""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Optional
+
+import typer
+
+from .gating import _require_master_mode
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    realism_app = typer.Typer(
+        name="realism",
+        help=(
+            "Maintain the realism content engine (persona pool import, "
+            "future content-class tuning)."
+        ),
+    )
+    app.add_typer(realism_app, name="realism")
+
+    @realism_app.command("import-personas")
+    def realism_import_personas(
+        path: Path = typer.Argument(
+            ..., exists=True, file_okay=True, dir_okay=False, readable=True,
+            help="JSON file containing a list of EmailPersona objects",
+        ),
+        output: Optional[Path] = typer.Option(
+            None, "--output", "-o",
+            help=(
+                "Override the destination path.  Defaults to the canonical "
+                "global pool (DECNET_REALISM_PERSONAS, /etc/decnet/"
+                "email_personas.json, or ~/.decnet/email_personas.json)."
+            ),
+        ),
+    ) -> None:
+        """Validate + install a personas JSON file as the global pool.
+
+        Use this when deploying with IMAP/POP3 services on fleet
+        (MACVLAN/IPVLAN) or SWARM-shard mail deckies — those have no
+        parent topology row, so they read this host-wide list.
+        MazeNET topology mail deckies use ``Topology.email_personas``
+        instead and this command does not touch them.
+        """
+        _require_master_mode("realism import-personas")
+        from decnet.realism import personas_pool as global_pool
+        from decnet.realism.personas import parse_personas
+
+        try:
+            raw = path.read_text(encoding="utf-8")
+        except OSError as exc:
+            console.print(f"[red]Cannot read {path}:[/] {exc}")
+            raise typer.Exit(code=1) from exc
+
+        try:
+            payload = json.loads(raw)
+        except json.JSONDecodeError as exc:
+            console.print(f"[red]Invalid JSON in {path}:[/] {exc}")
+            raise typer.Exit(code=1) from exc
+        if not isinstance(payload, list):
+            console.print(
+                f"[red]{path} must contain a JSON list of personas, "
+                f"got {type(payload).__name__}[/]"
+            )
+            raise typer.Exit(code=1)
+
+        personas = parse_personas(payload)
+        if not personas:
+            console.print(
+                f"[red]No valid personas in {path}.[/] "
+                "Check the schema (name, email, role, tone, mannerisms)."
+            )
+            raise typer.Exit(code=1)
+        if len(personas) < 2:
+            console.print(
+                f"[yellow]Warning: only {len(personas)} valid persona(s) — "
+                "the worker requires at least 2 to send mail; importing "
+                "anyway in case more are added later.[/]"
+            )
+
+        dest = output or global_pool.resolve_path()
+        dest.parent.mkdir(parents=True, exist_ok=True)
+        dest.write_text(
+            json.dumps(
+                [p.model_dump(exclude_none=False) for p in personas],
+                indent=2,
+                ensure_ascii=False,
+            ),
+            encoding="utf-8",
+        )
+        global_pool.reset_cache()
+        console.print(
+            f"[green]Imported {len(personas)} personas to[/] {dest}"
+        )
+        if path != dest:
+            log.info("realism import-personas src=%s dest=%s", path, dest)
--- a/decnet/cli/reconciler.py
+++ b/decnet/cli/reconciler.py
@@ -0,0 +1,62 @@
+from __future__ import annotations
+
+import typer
+
+from . import utils as _utils
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command(name="reconcile")
+    def reconcile_cmd(
+        once: bool = typer.Option(
+            False, "--once",
+            help="Run a single reconcile pass and exit (no daemon loop).",
+        ),
+        interval: int = typer.Option(
+            30, "--interval", "-i",
+            help="Seconds between reconcile passes (ignored with --once).",
+        ),
+        daemon: bool = typer.Option(
+            False, "--daemon", "-d",
+            help="Detach to background as a daemon process (long-lived only).",
+        ),
+    ) -> None:
+        """Converge fleet state across decnet-state.json, the DB, and docker."""
+        import asyncio
+        from decnet.web.dependencies import repo
+
+        if once:
+            from decnet.fleet.reconciler import reconcile_once
+
+            async def _one() -> None:
+                await repo.initialize()
+                counts = await reconcile_once(repo)
+                console.print(
+                    f"[bold cyan]reconcile:[/] "
+                    f"inserted={counts['inserted']} "
+                    f"deleted={counts['deleted']} "
+                    f"state_updated={counts['state_updated']}"
+                )
+            asyncio.run(_one())
+            return
+
+        from decnet.fleet.reconciler_worker import fleet_reconciler_worker
+
+        if daemon:
+            log.info("reconciler daemonizing interval=%d", interval)
+            _utils._daemonize()
+
+        log.info("reconciler starting interval=%d", interval)
+        console.print(
+            f"[bold cyan]Fleet reconciler starting[/] (interval: {interval}s)"
+        )
+
+        async def _run() -> None:
+            await repo.initialize()
+            await fleet_reconciler_worker(repo, interval=interval)
+
+        try:
+            asyncio.run(_run())
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Reconciler stopped.[/]")
--- a/decnet/cli/sniffer.py
+++ b/decnet/cli/sniffer.py
@@ -0,0 +1,31 @@
+from __future__ import annotations
+
+import typer
+
+from decnet.env import DECNET_INGEST_LOG_FILE
+
+from . import utils as _utils
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command(name="sniffer")
+    def sniffer_cmd(
+        log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write captured syslog + JSON records"),
+        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
+    ) -> None:
+        """Run the network sniffer as a standalone microservice."""
+        import asyncio
+        from decnet.sniffer import sniffer_worker
+
+        if daemon:
+            log.info("sniffer daemonizing log_file=%s", log_file)
+            _utils._daemonize()
+
+        log.info("sniffer starting log_file=%s", log_file)
+        console.print(f"[bold cyan]Sniffer starting[/] → {log_file}")
+
+        try:
+            asyncio.run(sniffer_worker(log_file))
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Sniffer stopped.[/]")
--- a/decnet/cli/swarm.py
+++ b/decnet/cli/swarm.py
@@ -0,0 +1,346 @@
+"""`decnet swarm ...` — master-side operator commands (HTTP to local swarmctl)."""
+
+from __future__ import annotations
+
+from typing import Optional
+
+import typer
+from rich.table import Table
+
+from . import utils as _utils
+from .utils import console
+
+
+def register(app: typer.Typer) -> None:
+    swarm_app = typer.Typer(
+        name="swarm",
+        help="Manage swarm workers (enroll, list, decommission). Requires `decnet swarmctl` running.",
+        no_args_is_help=True,
+    )
+    app.add_typer(swarm_app, name="swarm")
+
+    @swarm_app.command("enroll")
+    def swarm_enroll(
+        name: str = typer.Option(..., "--name", help="Short hostname for the worker (also the cert CN)"),
+        address: str = typer.Option(..., "--address", help="IP or DNS the master uses to reach the worker"),
+        agent_port: int = typer.Option(8765, "--agent-port", help="Worker agent TCP port"),
+        sans: Optional[str] = typer.Option(None, "--sans", help="Comma-separated extra SANs for the worker cert"),
+        notes: Optional[str] = typer.Option(None, "--notes", help="Free-form operator notes"),
+        out_dir: Optional[str] = typer.Option(None, "--out-dir", help="Write the bundle (ca.crt/worker.crt/worker.key) to this dir for scp"),
+        updater: bool = typer.Option(False, "--updater", help="Also issue an updater-identity cert (CN=updater@<name>) for the remote self-updater"),
+        url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL (default: 127.0.0.1:8770)"),
+    ) -> None:
+        """Issue a mTLS bundle for a new worker and register it in the swarm."""
+        import pathlib as _pathlib
+
+        body: dict = {"name": name, "address": address, "agent_port": agent_port}
+        if sans:
+            body["sans"] = [s.strip() for s in sans.split(",") if s.strip()]
+        if notes:
+            body["notes"] = notes
+        if updater:
+            body["issue_updater_bundle"] = True
+
+        resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/enroll", json_body=body)
+        data = resp.json()
+
+        console.print(f"[green]Enrolled worker:[/] {data['name']}  "
+                      f"[dim]uuid=[/]{data['host_uuid']}  "
+                      f"[dim]fingerprint=[/]{data['fingerprint']}")
+        if data.get("updater"):
+            console.print(f"[green]  + updater identity[/] "
+                          f"[dim]fingerprint=[/]{data['updater']['fingerprint']}")
+
+        if out_dir:
+            target = _pathlib.Path(out_dir).expanduser()
+            target.mkdir(parents=True, exist_ok=True)
+            (target / "ca.crt").write_text(data["ca_cert_pem"])
+            (target / "worker.crt").write_text(data["worker_cert_pem"])
+            (target / "worker.key").write_text(data["worker_key_pem"])
+            for leaf in ("worker.key",):
+                try:
+                    (target / leaf).chmod(0o600)
+                except OSError:
+                    pass
+            console.print(f"[cyan]Agent bundle written to[/] {target}")
+
+            if data.get("updater"):
+                upd_target = target.parent / f"{target.name}-updater"
+                upd_target.mkdir(parents=True, exist_ok=True)
+                (upd_target / "ca.crt").write_text(data["ca_cert_pem"])
+                (upd_target / "updater.crt").write_text(data["updater"]["updater_cert_pem"])
+                (upd_target / "updater.key").write_text(data["updater"]["updater_key_pem"])
+                try:
+                    (upd_target / "updater.key").chmod(0o600)
+                except OSError:
+                    pass
+                console.print(f"[cyan]Updater bundle written to[/] {upd_target}")
+                console.print("[dim]Ship the agent dir to ~/.decnet/agent/ and the updater dir to ~/.decnet/updater/ on the worker.[/]")
+            else:
+                console.print("[dim]Ship this directory to the worker at ~/.decnet/agent/ (or wherever `decnet agent --agent-dir` points).[/]")
+        else:
+            console.print("[yellow]No --out-dir given — bundle PEMs are in the JSON response; persist them before leaving this shell.[/]")
+
+    @swarm_app.command("list")
+    def swarm_list(
+        host_status: Optional[str] = typer.Option(None, "--status", help="Filter by status (enrolled|active|unreachable|decommissioned)"),
+        url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
+    ) -> None:
+        """List enrolled workers."""
+        q = f"?host_status={host_status}" if host_status else ""
+        resp = _utils._http_request("GET", _utils._swarmctl_base_url(url) + "/swarm/hosts" + q)
+        rows = resp.json()
+        if not rows:
+            console.print("[dim]No workers enrolled.[/]")
+            return
+        table = Table(title="DECNET swarm workers")
+        for col in ("name", "address", "port", "status", "last heartbeat", "enrolled"):
+            table.add_column(col)
+        for r in rows:
+            table.add_row(
+                r.get("name") or "",
+                r.get("address") or "",
+                str(r.get("agent_port") or ""),
+                r.get("status") or "",
+                str(r.get("last_heartbeat") or "—"),
+                str(r.get("enrolled_at") or "—"),
+            )
+        console.print(table)
+
+    @swarm_app.command("check")
+    def swarm_check(
+        url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
+        json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
+    ) -> None:
+        """Actively probe every enrolled worker and refresh status + last_heartbeat."""
+        resp = _utils._http_request("POST", _utils._swarmctl_base_url(url) + "/swarm/check", timeout=60.0)
+        payload = resp.json()
+        results = payload.get("results", [])
+
+        if json_out:
+            console.print_json(data=payload)
+            return
+
+        if not results:
+            console.print("[dim]No workers enrolled.[/]")
+            return
+
+        table = Table(title="DECNET swarm check")
+        for col in ("name", "address", "reachable", "detail"):
+            table.add_column(col)
+        for r in results:
+            reachable = r.get("reachable")
+            mark = "[green]yes[/]" if reachable else "[red]no[/]"
+            detail = r.get("detail")
+            detail_str = "—"
+            if isinstance(detail, dict):
+                detail_str = detail.get("status") or ", ".join(f"{k}={v}" for k, v in detail.items())
+            elif detail is not None:
+                detail_str = str(detail)
+            table.add_row(
+                r.get("name") or "",
+                r.get("address") or "",
+                mark,
+                detail_str,
+            )
+        console.print(table)
+
+    @swarm_app.command("update")
+    def swarm_update(
+        host: Optional[str] = typer.Option(None, "--host", help="Target worker (name or UUID). Omit with --all."),
+        all_hosts: bool = typer.Option(False, "--all", help="Push to every enrolled worker."),
+        include_self: bool = typer.Option(False, "--include-self", help="Also push to each updater's /update-self after a successful agent update."),
+        root: Optional[str] = typer.Option(None, "--root", help="Source tree to tar (default: CWD)."),
+        exclude: list[str] = typer.Option([], "--exclude", help="Additional exclude glob. Repeatable."),
+        updater_port: int = typer.Option(8766, "--updater-port", help="Port the workers' updater listens on."),
+        dry_run: bool = typer.Option(False, "--dry-run", help="Build the tarball and print stats; no network."),
+        url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL."),
+    ) -> None:
+        """Push the current working tree to workers' self-updaters (with auto-rollback on failure)."""
+        import asyncio
+        import pathlib as _pathlib
+
+        from decnet.swarm.tar_tree import tar_working_tree, detect_git_sha
+        from decnet.swarm.updater_client import UpdaterClient
+
+        if not (host or all_hosts):
+            console.print("[red]Supply --host <name> or --all.[/]")
+            raise typer.Exit(2)
+        if host and all_hosts:
+            console.print("[red]--host and --all are mutually exclusive.[/]")
+            raise typer.Exit(2)
+
+        base = _utils._swarmctl_base_url(url)
+        resp = _utils._http_request("GET", base + "/swarm/hosts")
+        rows = resp.json()
+        if host:
+            targets = [r for r in rows if r.get("name") == host or r.get("uuid") == host]
+            if not targets:
+                console.print(f"[red]No enrolled worker matching '{host}'.[/]")
+                raise typer.Exit(1)
+        else:
+            targets = [r for r in rows if r.get("status") != "decommissioned"]
+        if not targets:
+            console.print("[dim]No targets.[/]")
+            return
+
+        tree_root = _pathlib.Path(root) if root else _pathlib.Path.cwd()
+        sha = detect_git_sha(tree_root)
+        console.print(f"[dim]Tarring[/] {tree_root} [dim]sha={sha or '(not a git repo)'}[/]")
+        tarball = tar_working_tree(tree_root, extra_excludes=exclude)
+        console.print(f"[dim]Tarball size:[/] {len(tarball):,} bytes")
+
+        if dry_run:
+            console.print("[yellow]--dry-run: not pushing.[/]")
+            for t in targets:
+                console.print(f"  would push to [cyan]{t.get('name')}[/] at {t.get('address')}:{updater_port}")
+            return
+
+        async def _push_one(h: dict) -> dict:
+            name = h.get("name") or h.get("uuid")
+            out: dict = {"name": name, "address": h.get("address"), "agent": None, "self": None}
+            try:
+                async with UpdaterClient(h, updater_port=updater_port) as u:
+                    r = await u.update(tarball, sha=sha)
+                    out["agent"] = {"status": r.status_code, "body": r.json() if r.content else {}}
+                    if r.status_code == 200 and include_self:
+                        rs = await u.update_self(tarball, sha=sha)
+                        out["self"] = {"status": rs.status_code, "body": rs.json() if rs.content else {}}
+            except Exception as exc:  # noqa: BLE001
+                out["error"] = f"{type(exc).__name__}: {exc}"
+            return out
+
+        async def _push_all() -> list[dict]:
+            return await asyncio.gather(*(_push_one(t) for t in targets))
+
+        results = asyncio.run(_push_all())
+
+        table = Table(title="DECNET swarm update")
+        for col in ("host", "address", "agent", "self", "detail"):
+            table.add_column(col)
+        any_failure = False
+        for r in results:
+            agent = r.get("agent") or {}
+            selff = r.get("self") or {}
+            err = r.get("error")
+            if err:
+                any_failure = True
+                table.add_row(r["name"], r.get("address") or "", "[red]error[/]", "—", err)
+                continue
+            a_status = agent.get("status")
+            if a_status == 200:
+                agent_cell = "[green]updated[/]"
+            elif a_status == 409:
+                agent_cell = "[yellow]rolled-back[/]"
+                any_failure = True
+            else:
+                agent_cell = f"[red]{a_status}[/]"
+                any_failure = True
+            if not include_self:
+                self_cell = "—"
+            elif selff.get("status") == 200 or selff.get("status") is None:
+                self_cell = "[green]ok[/]" if selff else "[dim]skipped[/]"
+            else:
+                self_cell = f"[red]{selff.get('status')}[/]"
+            detail = ""
+            body = agent.get("body") or {}
+            if isinstance(body, dict):
+                detail = body.get("release", {}).get("sha") or body.get("detail", {}).get("error") or ""
+            table.add_row(r["name"], r.get("address") or "", agent_cell, self_cell, str(detail)[:80])
+        console.print(table)
+
+        if any_failure:
+            raise typer.Exit(1)
+
+    @swarm_app.command("deckies")
+    def swarm_deckies(
+        host: Optional[str] = typer.Option(None, "--host", help="Filter by worker name or UUID"),
+        state: Optional[str] = typer.Option(None, "--state", help="Filter by shard state (pending|running|failed|torn_down)"),
+        url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
+        json_out: bool = typer.Option(False, "--json", help="Emit JSON instead of a table"),
+    ) -> None:
+        """List deployed deckies across the swarm with their owning worker host."""
+        base = _utils._swarmctl_base_url(url)
+
+        host_uuid: Optional[str] = None
+        if host:
+            resp = _utils._http_request("GET", base + "/swarm/hosts")
+            rows = resp.json()
+            match = next((r for r in rows if r.get("uuid") == host or r.get("name") == host), None)
+            if match is None:
+                console.print(f"[red]No enrolled worker matching '{host}'.[/]")
+                raise typer.Exit(1)
+            host_uuid = match["uuid"]
+
+        query = []
+        if host_uuid:
+            query.append(f"host_uuid={host_uuid}")
+        if state:
+            query.append(f"state={state}")
+        path = "/swarm/deckies" + ("?" + "&".join(query) if query else "")
+
+        resp = _utils._http_request("GET", base + path)
+        rows = resp.json()
+
+        if json_out:
+            console.print_json(data=rows)
+            return
+
+        if not rows:
+            console.print("[dim]No deckies deployed.[/]")
+            return
+
+        table = Table(title="DECNET swarm deckies")
+        for col in ("decky", "host", "address", "state", "services"):
+            table.add_column(col)
+        for r in rows:
+            services = ",".join(r.get("services") or []) or "—"
+            state_val = r.get("state") or "pending"
+            colored = {
+                "running": f"[green]{state_val}[/]",
+                "failed": f"[red]{state_val}[/]",
+                "pending": f"[yellow]{state_val}[/]",
+                "torn_down": f"[dim]{state_val}[/]",
+            }.get(state_val, state_val)
+            table.add_row(
+                r.get("decky_name") or "",
+                r.get("host_name") or "<unknown>",
+                r.get("host_address") or "",
+                colored,
+                services,
+            )
+        console.print(table)
+
+    @swarm_app.command("decommission")
+    def swarm_decommission(
+        name: Optional[str] = typer.Option(None, "--name", help="Worker hostname"),
+        uuid: Optional[str] = typer.Option(None, "--uuid", help="Worker UUID (skip lookup)"),
+        url: Optional[str] = typer.Option(None, "--url", help="Override swarm controller URL"),
+        yes: bool = typer.Option(False, "--yes", "-y", help="Skip interactive confirmation"),
+    ) -> None:
+        """Remove a worker from the swarm (cascades decky shard rows)."""
+        if not (name or uuid):
+            console.print("[red]Supply --name or --uuid.[/]")
+            raise typer.Exit(2)
+
+        base = _utils._swarmctl_base_url(url)
+        target_uuid = uuid
+        target_name = name
+        if target_uuid is None:
+            resp = _utils._http_request("GET", base + "/swarm/hosts")
+            rows = resp.json()
+            match = next((r for r in rows if r.get("name") == name), None)
+            if match is None:
+                console.print(f"[red]No enrolled worker named '{name}'.[/]")
+                raise typer.Exit(1)
+            target_uuid = match["uuid"]
+            target_name = match.get("name") or target_name
+
+        if not yes:
+            confirm = typer.confirm(f"Decommission worker {target_name!r} ({target_uuid})?", default=False)
+            if not confirm:
+                console.print("[dim]Aborted.[/]")
+                raise typer.Exit(0)
+
+        _utils._http_request("DELETE", f"{base}/swarm/hosts/{target_uuid}")
+        console.print(f"[green]Decommissioned {target_name or target_uuid}.[/]")
--- a/decnet/cli/swarmctl.py
+++ b/decnet/cli/swarmctl.py
@@ -0,0 +1,104 @@
+from __future__ import annotations
+
+import os
+import signal
+import subprocess  # nosec B404
+import sys
+from typing import Optional
+
+import typer
+
+from . import utils as _utils
+from .gating import _require_master_mode
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command()
+    def swarmctl(
+        port: int = typer.Option(8770, "--port", help="Port for the swarm controller"),
+        host: str = typer.Option("127.0.0.1", "--host", help="Bind address for the swarm controller"),
+        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
+        no_listener: bool = typer.Option(False, "--no-listener", help="Do not auto-spawn the syslog-TLS listener alongside swarmctl"),
+        tls: bool = typer.Option(False, "--tls", help="Serve over HTTPS with mTLS (required for cross-host worker heartbeats)"),
+        cert: Optional[str] = typer.Option(None, "--cert", help="BYOC: path to TLS server cert (PEM). Auto-issues from the DECNET CA if omitted."),
+        key: Optional[str] = typer.Option(None, "--key", help="BYOC: path to TLS server private key (PEM)."),
+        client_ca: Optional[str] = typer.Option(None, "--client-ca", help="CA bundle used to verify worker client certs. Defaults to the DECNET CA."),
+    ) -> None:
+        """Run the DECNET SWARM controller (master-side, separate process from `decnet api`).
+
+        By default, `decnet swarmctl` auto-spawns `decnet listener` as a fully-
+        detached sibling process so the master starts accepting forwarder
+        connections on 6514 without a second manual invocation. The listener
+        survives swarmctl restarts and crashes — if it dies on its own,
+        restart it manually with `decnet listener --daemon …`. Pass
+        --no-listener to skip.
+
+        Pass ``--tls`` to serve over HTTPS with mutual-TLS enforcement. By
+        default the server cert is auto-issued from the DECNET CA under
+        ``~/.decnet/swarmctl/`` so enrolled workers (which already ship that
+        CA's ``ca.crt``) trust it out of the box. BYOC via ``--cert``/``--key``
+        if you need a publicly-trusted or externally-managed cert.
+        """
+        _require_master_mode("swarmctl")
+        if daemon:
+            log.info("swarmctl daemonizing host=%s port=%d", host, port)
+            _utils._daemonize()
+
+        if not no_listener:
+            listener_host = os.environ.get("DECNET_LISTENER_HOST", "0.0.0.0")  # nosec B104
+            listener_port = int(os.environ.get("DECNET_SWARM_SYSLOG_PORT", "6514"))
+            lst_argv = [
+                sys.executable, "-m", "decnet", "listener",
+                "--host", listener_host,
+                "--port", str(listener_port),
+                "--daemon",
+            ]
+            try:
+                pid = _utils._spawn_detached(lst_argv, _utils._pid_dir() / "listener.pid")
+                log.info("swarmctl auto-spawned listener pid=%d bind=%s:%d",
+                         pid, listener_host, listener_port)
+                console.print(f"[dim]Auto-spawned listener (pid {pid}) on {listener_host}:{listener_port}.[/]")
+            except Exception as e:  # noqa: BLE001
+                log.warning("swarmctl could not auto-spawn listener: %s", e)
+                console.print(f"[yellow]listener auto-spawn skipped: {e}[/]")
+
+        log.info("swarmctl command invoked host=%s port=%d tls=%s", host, port, tls)
+        scheme = "https" if tls else "http"
+        console.print(f"[green]Starting DECNET SWARM controller on {scheme}://{host}:{port}...[/]")
+        _cmd = [sys.executable, "-m", "uvicorn", "decnet.web.swarm_api:app",
+                "--host", host, "--port", str(port)]
+        if tls:
+            from decnet.swarm import pki as _pki
+            if cert and key:
+                cert_path, key_path = cert, key
+            elif cert or key:
+                console.print("[red]--cert and --key must be provided together.[/]")
+                raise typer.Exit(code=2)
+            else:
+                auto_cert, auto_key, _auto_ca = _pki.ensure_swarmctl_cert(host)
+                cert_path, key_path = str(auto_cert), str(auto_key)
+                console.print(f"[dim]Auto-issued swarmctl server cert → {cert_path}[/]")
+            ca_path = client_ca or str(_pki.DEFAULT_CA_DIR / "ca.crt")
+            _cmd += [
+                "--ssl-keyfile", key_path,
+                "--ssl-certfile", cert_path,
+                "--ssl-ca-certs", ca_path,
+                "--ssl-cert-reqs", "2",
+            ]
+        try:
+            proc = subprocess.Popen(_cmd, start_new_session=True)  # nosec B603 B404
+            try:
+                proc.wait()
+            except KeyboardInterrupt:
+                try:
+                    os.killpg(proc.pid, signal.SIGTERM)
+                    try:
+                        proc.wait(timeout=10)
+                    except subprocess.TimeoutExpired:
+                        os.killpg(proc.pid, signal.SIGKILL)
+                        proc.wait()
+                except ProcessLookupError:
+                    pass
+        except (FileNotFoundError, subprocess.SubprocessError):
+            console.print("[red]Failed to start swarmctl. Ensure 'uvicorn' is installed in the current environment.[/]")
--- a/decnet/cli/topology.py
+++ b/decnet/cli/topology.py
@@ -0,0 +1,348 @@
+"""MazeNET topology CLI: generate / deploy / teardown / list / show."""
+from __future__ import annotations
+
+import asyncio
+from typing import Optional
+
+import typer
+from rich.console import Console
+from rich.table import Table
+
+from decnet.topology.config import TopologyConfig
+from decnet.topology.generator import generate
+from decnet.topology.persistence import hydrate, persist
+from decnet.topology.status import TopologyStatus
+
+from .gating import _require_master_mode
+
+_console = Console()
+
+_group = typer.Typer(
+    name="topology",
+    help="MazeNET nested-topology commands (DECNET master only).",
+    no_args_is_help=True,
+)
+
+
+async def _repo():
+    from decnet.web.db.factory import get_repository
+    r = get_repository()
+    await r.initialize()
+    return r
+
+
+@_group.command("generate")
+def _generate(
+    name: str = typer.Option(..., "--name", help="Topology name"),
+    depth: int = typer.Option(3, "--depth", min=1, max=16),
+    branching: int = typer.Option(2, "--branching", min=1, max=8),
+    deckies_per_lan: str = typer.Option(
+        "1-3",
+        "--deckies-per-lan",
+        help="Min-max deckies per LAN, e.g. 1-3",
+    ),
+    bridge_forward_probability: float = typer.Option(1.0, "--bridge-forward-p", min=0.0, max=1.0),
+    cross_edge_probability: float = typer.Option(0.0, "--cross-edge-p", min=0.0, max=1.0),
+    services: Optional[str] = typer.Option(None, "--services", help="Comma-separated explicit services"),
+    randomize_services: bool = typer.Option(True, "--randomize-services/--no-randomize-services"),
+    seed: Optional[int] = typer.Option(None, "--seed", min=0),
+) -> None:
+    """Generate a topology plan and persist it as pending."""
+    _require_master_mode("topology generate")
+
+    try:
+        lo, hi = (int(x) for x in deckies_per_lan.split("-", 1))
+    except ValueError:
+        _console.print("[red]--deckies-per-lan must be formatted as MIN-MAX, e.g. 1-3.[/]")
+        raise typer.Exit(1)
+
+    services_explicit = (
+        [s.strip() for s in services.split(",") if s.strip()] if services else None
+    )
+
+    try:
+        cfg = TopologyConfig(
+            name=name,
+            depth=depth,
+            branching_factor=branching,
+            deckies_per_lan_min=lo,
+            deckies_per_lan_max=hi,
+            bridge_forward_probability=bridge_forward_probability,
+            cross_edge_probability=cross_edge_probability,
+            services_explicit=services_explicit,
+            randomize_services=randomize_services if not services_explicit else False,
+            seed=seed,
+        )
+    except ValueError as e:
+        _console.print(f"[red]{e}[/]")
+        raise typer.Exit(1)
+
+    plan = generate(cfg)
+
+    async def _go() -> str:
+        repo = await _repo()
+        return await persist(repo, plan)
+
+    tid = asyncio.run(_go())
+    _console.print(f"[green]Topology persisted as pending[/] — id=[bold]{tid}[/]")
+    _console.print(
+        f"  LANs: {len(plan.lans)}  deckies: {len(plan.deckies)}  edges: {len(plan.edges)}"
+    )
+
+
+@_group.command("list")
+def _list() -> None:
+    """List all topologies."""
+    _require_master_mode("topology list")
+
+    async def _go() -> list[dict]:
+        repo = await _repo()
+        return await repo.list_topologies()
+
+    rows = asyncio.run(_go())
+    if not rows:
+        _console.print("[yellow]No topologies.[/]")
+        return
+    table = Table(title="DECNET / MazeNET Topologies")
+    for col in ("id", "name", "mode", "status", "created_at"):
+        table.add_column(col)
+    for r in rows:
+        table.add_row(
+            str(r["id"]),
+            str(r["name"]),
+            str(r["mode"]),
+            str(r["status"]),
+            str(r.get("created_at", "")),
+        )
+    _console.print(table)
+
+
+@_group.command("show")
+def _show(topology_id: str = typer.Argument(..., help="Topology id")) -> None:
+    """Print a structured summary of a topology."""
+    _require_master_mode("topology show")
+
+    async def _go():
+        repo = await _repo()
+        return await hydrate(repo, topology_id)
+
+    hydrated = asyncio.run(_go())
+    if hydrated is None:
+        _console.print(f"[red]No such topology: {topology_id}[/]")
+        raise typer.Exit(1)
+
+    topo = hydrated["topology"]
+    _console.print(
+        f"[bold]{topo['name']}[/]  id={topo['id']}  status={topo['status']}"
+        f"  mode={topo['mode']}"
+    )
+
+    def _decky_name(d: dict) -> str:
+        cfg = d.get("decky_config") or {}
+        return cfg.get("name") or d.get("name") or d["uuid"]
+
+    deckies_by_name = {_decky_name(d): d for d in hydrated["deckies"]}
+    edges_by_lan: dict[str, list[dict]] = {}
+    for e in hydrated["edges"]:
+        edges_by_lan.setdefault(e["lan_id"], []).append(e)
+
+    for lan in hydrated["lans"]:
+        dmz_tag = " [dim](DMZ)[/]" if lan["is_dmz"] else ""
+        _console.print(f"\n[cyan]LAN[/] {lan['name']}  {lan['subnet']}{dmz_tag}")
+        lan_edges = edges_by_lan.get(lan["id"], [])
+        for e in lan_edges:
+            # Find the decky name via uuid.
+            decky = next(
+                (d for d in hydrated["deckies"] if d["uuid"] == e["decky_uuid"]),
+                None,
+            )
+            if decky is None:
+                continue
+            cfg = decky.get("decky_config") or {}
+            name = _decky_name(decky)
+            ip = (cfg.get("ips_by_lan") or {}).get(lan["name"]) or decky.get("ip") or "?"
+            tags = []
+            if e["is_bridge"]:
+                tags.append("bridge")
+            if e["forwards_l3"]:
+                tags.append("L3-forward")
+            tag_s = f" [yellow]({', '.join(tags)})[/]" if tags else ""
+            svcs = ",".join(cfg.get("services") or decky.get("services") or []) or "-"
+            _console.print(f"  • {name}  {ip}  svcs={svcs}{tag_s}")
+
+    _ = deckies_by_name  # for future cross-reference extensions
+
+
+@_group.command("deploy")
+def _deploy(
+    topology_id: str = typer.Argument(..., help="Topology id (must be pending)"),
+    dry_run: bool = typer.Option(False, "--dry-run", help="Write compose + create nets, skip containers"),
+) -> None:
+    """Deploy a pending topology."""
+    _require_master_mode("topology deploy")
+    from decnet.engine.deployer import deploy_topology
+
+    async def _go() -> None:
+        repo = await _repo()
+        await deploy_topology(repo, topology_id, dry_run=dry_run)
+
+    asyncio.run(_go())
+    _console.print(f"[green]Topology {topology_id} deployed.[/]")
+
+
+@_group.command("teardown")
+def _teardown(
+    topology_id: str = typer.Argument(..., help="Topology id"),
+) -> None:
+    """Tear down a topology. Legal from active|degraded|failed|deploying."""
+    _require_master_mode("topology teardown")
+    from decnet.engine.deployer import teardown_topology
+
+    async def _go() -> None:
+        repo = await _repo()
+        await teardown_topology(repo, topology_id)
+
+    asyncio.run(_go())
+    _console.print(f"[green]Topology {topology_id} torn down.[/]")
+
+
+@_group.command("delete")
+def _delete(
+    topology_id: str = typer.Argument(..., help="Topology id"),
+    force: bool = typer.Option(
+        False,
+        "--force",
+        help="Skip the confirmation prompt (required for non-interactive use).",
+    ),
+) -> None:
+    """Delete a topology and all its children (LANs, deckies, edges, mutations).
+
+    Refuses while containers are running — teardown first.
+    """
+    _require_master_mode("topology delete")
+
+    _RUNNING = {
+        TopologyStatus.DEPLOYING,
+        TopologyStatus.ACTIVE,
+        TopologyStatus.DEGRADED,
+        TopologyStatus.TEARING_DOWN,
+    }
+
+    async def _go() -> tuple[bool, Optional[str]]:
+        repo = await _repo()
+        topo = await repo.get_topology(topology_id)
+        if topo is None:
+            return False, "not-found"
+        if topo["status"] in _RUNNING:
+            return False, str(topo["status"])
+        ok = await repo.delete_topology_cascade(topology_id)
+        return ok, None
+
+    if not force and not typer.confirm(
+        f"Delete topology {topology_id} and all its children? This cannot be undone.",
+        default=False,
+    ):
+        _console.print("[yellow]Cancelled.[/]")
+        raise typer.Exit(0)
+
+    ok, reason = asyncio.run(_go())
+    if reason == "not-found":
+        _console.print(f"[red]No such topology: {topology_id}[/]")
+        raise typer.Exit(1)
+    if reason is not None:
+        _console.print(
+            f"[red]Cannot delete while status={reason!r}. Run "
+            f"[bold]decnet topology teardown {topology_id}[/] first.[/]"
+        )
+        raise typer.Exit(1)
+    if not ok:
+        _console.print(f"[red]Delete failed: {topology_id}[/]")
+        raise typer.Exit(1)
+    _console.print(f"[green]Topology {topology_id} deleted.[/]")
+
+
+@_group.command("mutate")
+def _mutate(
+    topology_id: str = typer.Argument(..., help="Topology id (active or degraded)"),
+    op: str = typer.Argument(
+        ...,
+        help=(
+            "One of: add_lan, remove_lan, add_decky, attach_decky, "
+            "detach_decky, remove_decky, update_decky, update_lan"
+        ),
+    ),
+    payload_json: str = typer.Option(
+        "{}",
+        "--payload-json",
+        help="JSON payload for the op (see mutator.ops for keys)",
+    ),
+    expected_version: Optional[int] = typer.Option(
+        None,
+        "--expected-version",
+        help="Optimistic-concurrency guard; enqueue fails with a "
+        "VersionConflict if the topology has since been mutated.",
+    ),
+) -> None:
+    """Enqueue a live mutation.  The mutator's watch loop applies it."""
+    _require_master_mode("topology mutate")
+    import json
+
+    try:
+        payload = json.loads(payload_json)
+    except ValueError as e:
+        _console.print(f"[red]Invalid JSON: {e}[/]")
+        raise typer.Exit(1)
+
+    async def _go() -> str:
+        repo = await _repo()
+        return await repo.enqueue_topology_mutation(
+            topology_id, op, payload, expected_version=expected_version,
+        )
+
+    mid = asyncio.run(_go())
+    _console.print(
+        f"[green]Mutation enqueued[/] — id=[bold]{mid}[/] op={op} "
+        f"(watch for state=applied on [cyan]topology mutations {topology_id}[/])"
+    )
+
+
+@_group.command("mutations")
+def _mutations(
+    topology_id: str = typer.Argument(..., help="Topology id"),
+    state: Optional[str] = typer.Option(
+        None,
+        "--state",
+        help="Filter to one of pending|applying|applied|failed",
+    ),
+) -> None:
+    """List queued/applied mutations for a topology."""
+    _require_master_mode("topology mutations")
+
+    async def _go() -> list[dict]:
+        repo = await _repo()
+        return await repo.list_topology_mutations(topology_id, state=state)
+
+    rows = asyncio.run(_go())
+    if not rows:
+        _console.print("[yellow]No mutations.[/]")
+        return
+    table = Table(title=f"Mutations — topology {topology_id}")
+    for col in ("id", "op", "state", "requested_at", "applied_at", "reason"):
+        table.add_column(col)
+    for r in rows:
+        table.add_row(
+            str(r["id"]),
+            str(r["op"]),
+            str(r["state"]),
+            str(r.get("requested_at", "")),
+            str(r.get("applied_at") or ""),
+            str(r.get("reason") or ""),
+        )
+    _console.print(table)
+
+
+def register(app: typer.Typer) -> None:
+    app.add_typer(_group, name="topology")
+
+
+__all__ = ["register", "TopologyStatus"]
--- a/decnet/cli/updater.py
+++ b/decnet/cli/updater.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+import pathlib as _pathlib
+from typing import Optional
+
+import typer
+
+from . import utils as _utils
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command()
+    def updater(
+        port: int = typer.Option(8766, "--port", help="Port for the self-updater daemon"),
+        host: str = typer.Option("0.0.0.0", "--host", help="Bind address for the updater"),  # nosec B104
+        updater_dir: Optional[str] = typer.Option(None, "--updater-dir", help="Updater cert bundle dir (default: ~/.decnet/updater)"),
+        install_dir: Optional[str] = typer.Option(None, "--install-dir", help="Release install root (default: /opt/decnet)"),
+        agent_dir: Optional[str] = typer.Option(None, "--agent-dir", help="Worker agent cert bundle (for local /health probes; default: ~/.decnet/agent)"),
+        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
+    ) -> None:
+        """Run the DECNET self-updater (requires a bundle in ~/.decnet/updater/)."""
+        from decnet.swarm import pki as _pki
+        from decnet.updater import server as _upd_server
+
+        resolved_updater = _pathlib.Path(updater_dir) if updater_dir else _upd_server.DEFAULT_UPDATER_DIR
+        resolved_install = _pathlib.Path(install_dir) if install_dir else _pathlib.Path("/opt/decnet")
+        resolved_agent = _pathlib.Path(agent_dir) if agent_dir else _pki.DEFAULT_AGENT_DIR
+
+        if daemon:
+            log.info("updater daemonizing host=%s port=%d", host, port)
+            _utils._daemonize()
+
+        log.info(
+            "updater command invoked host=%s port=%d updater_dir=%s install_dir=%s",
+            host, port, resolved_updater, resolved_install,
+        )
+        console.print(f"[green]Starting DECNET self-updater on {host}:{port} (mTLS)...[/]")
+        rc = _upd_server.run(
+            host, port,
+            updater_dir=resolved_updater,
+            install_dir=resolved_install,
+            agent_dir=resolved_agent,
+        )
+        if rc != 0:
+            raise typer.Exit(rc)
--- a/decnet/cli/utils.py
+++ b/decnet/cli/utils.py
@@ -0,0 +1,217 @@
+"""Shared CLI helpers: console, logger, process management, swarm HTTP client.
+
+Submodules reference these as ``from . import utils`` then ``utils.foo(...)``
+so tests can patch ``decnet.cli.utils.<name>`` and have every caller see it.
+"""
+
+from __future__ import annotations
+
+import os
+import signal
+import subprocess  # nosec B404
+import sys
+from pathlib import Path
+from typing import Optional
+
+import typer
+from rich.console import Console
+
+from decnet.logging import get_logger
+from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_INGEST_LOG_FILE
+
+log = get_logger("cli")
+console = Console()
+
+
+def _daemonize() -> None:
+    """Fork the current process into a background daemon (Unix double-fork)."""
+    if os.fork() > 0:
+        raise SystemExit(0)
+    os.setsid()
+    if os.fork() > 0:
+        raise SystemExit(0)
+    sys.stdout = open(os.devnull, "w")  # noqa: SIM115
+    sys.stderr = open(os.devnull, "w")  # noqa: SIM115
+    sys.stdin = open(os.devnull, "r")  # noqa: SIM115
+
+
+def _pid_dir() -> Path:
+    """Return the writable PID directory.
+
+    /opt/decnet when it exists and is writable (production), else
+    ~/.decnet (dev). The directory is created if needed."""
+    candidates = [Path("/opt/decnet"), Path.home() / ".decnet"]
+    for path in candidates:
+        try:
+            path.mkdir(parents=True, exist_ok=True)
+            if os.access(path, os.W_OK):
+                return path
+        except (PermissionError, OSError):
+            continue
+    return Path("/tmp")  # nosec B108
+
+
+def _spawn_detached(argv: list[str], pid_file: Path) -> int:
+    """Spawn a DECNET subcommand as a fully-independent sibling process.
+
+    The parent does NOT wait() on this child. start_new_session=True puts
+    the child in its own session so SIGHUP on parent exit doesn't kill it;
+    stdin/stdout/stderr go to /dev/null so the launching shell can close
+    without EIO on the child. close_fds=True prevents inherited sockets
+    from pinning ports we're trying to rebind.
+
+    This is deliberately NOT a supervisor — we fire-and-forget. If the
+    child dies, the operator restarts it manually via its own subcommand.
+    """
+    if pid_file.exists():
+        try:
+            existing = int(pid_file.read_text().strip())
+            os.kill(existing, 0)
+            return existing
+        except (ValueError, ProcessLookupError, PermissionError, OSError):
+            pass  # stale pid_file — fall through and spawn
+
+    with open(os.devnull, "rb") as dn_in, open(os.devnull, "ab") as dn_out:
+        proc = subprocess.Popen(  # nosec B603
+            argv,
+            stdin=dn_in, stdout=dn_out, stderr=dn_out,
+            start_new_session=True, close_fds=True,
+        )
+    pid_file.parent.mkdir(parents=True, exist_ok=True)
+    pid_file.write_text(f"{proc.pid}\n")
+    return proc.pid
+
+
+def _is_running(match_fn) -> int | None:
+    """Return PID of a running DECNET process matching ``match_fn(cmdline)``, or None."""
+    import psutil
+
+    for proc in psutil.process_iter(["pid", "cmdline"]):
+        try:
+            cmd = proc.info["cmdline"]
+            if cmd and match_fn(cmd):
+                return proc.info["pid"]
+        except (psutil.NoSuchProcess, psutil.AccessDenied):
+            continue
+    return None
+
+
+def _service_registry(log_file: str) -> list[tuple[str, callable, list[str]]]:
+    """Return the microservice registry for health-check and relaunch.
+
+    On agents these run as systemd units invoking /usr/local/bin/decnet,
+    which doesn't include "decnet.cli" in its cmdline. On master dev boxes
+    they're launched via `python -m decnet.cli`. Match either form — cmd
+    is a list of argv tokens, so substring-check the joined string.
+    """
+    _py = sys.executable
+
+    def _matches(sub: str, extras: tuple[str, ...] = ()):
+        def _check(cmd) -> bool:
+            joined = " ".join(cmd) if not isinstance(cmd, str) else cmd
+            if "decnet" not in joined:
+                return False
+            if sub not in joined:
+                return False
+            return all(e in joined for e in extras)
+        return _check
+
+    return [
+        ("Collector", _matches("collect"),
+         [_py, "-m", "decnet.cli", "collect", "--daemon", "--log-file", log_file]),
+        ("Mutator", _matches("mutate", ("--watch",)),
+         [_py, "-m", "decnet.cli", "mutate", "--daemon", "--watch"]),
+        ("Prober", _matches("probe"),
+         [_py, "-m", "decnet.cli", "probe", "--daemon", "--log-file", log_file]),
+        ("Profiler", _matches("profiler"),
+         [_py, "-m", "decnet.cli", "profiler", "--daemon"]),
+        ("Sniffer", _matches("sniffer"),
+         [_py, "-m", "decnet.cli", "sniffer", "--daemon", "--log-file", log_file]),
+        ("API",
+         lambda cmd: "uvicorn" in cmd and "decnet.web.api:app" in cmd,
+         [_py, "-m", "uvicorn", "decnet.web.api:app",
+          "--host", DECNET_API_HOST, "--port", str(DECNET_API_PORT)]),
+    ]
+
+
+def _systemd_units(pattern: str = "decnet-*.service") -> list[dict] | None:
+    """Return state of every systemd unit matching *pattern*, or ``None``
+    when systemctl is unavailable (non-systemd host, container lab,
+    PATH-stripped env, user-manager unreachable).
+
+    Output shape mirrors ``systemctl list-units --output=json``: each
+    dict has ``unit``, ``load``, ``active``, ``sub``, ``description``.
+    Empty list = systemd works but no matching units are loaded (fresh
+    host that never ran ``decnet init``).
+    """
+    import json  # local import — avoids paying it on every CLI startup
+    import shutil
+
+    if not shutil.which("systemctl"):
+        return None
+    try:
+        proc = subprocess.run(  # nosec B603 B607 — fixed argv, no shell
+            [
+                "systemctl", "list-units",
+                "--type=service", "--all",
+                "--no-legend", "--no-pager",
+                "--output=json",
+                pattern,
+            ],
+            capture_output=True,
+            text=True,
+            timeout=5,
+            check=False,
+        )
+    except (OSError, subprocess.SubprocessError):
+        return None
+    if proc.returncode != 0:
+        return None
+    try:
+        data = json.loads(proc.stdout or "[]")
+    except json.JSONDecodeError:
+        return None
+    return data if isinstance(data, list) else None
+
+
+def _kill_all_services() -> None:
+    """Find and kill all running DECNET microservice processes."""
+    registry = _service_registry(str(DECNET_INGEST_LOG_FILE))
+    killed = 0
+    for name, match_fn, _launch_args in registry:
+        pid = _is_running(match_fn)
+        if pid is not None:
+            console.print(f"[yellow]Stopping {name} (PID {pid})...[/]")
+            os.kill(pid, signal.SIGTERM)
+            killed += 1
+
+    if killed:
+        console.print(f"[green]{killed} background process(es) stopped.[/]")
+    else:
+        console.print("[dim]No DECNET services were running.[/]")
+
+
+_DEFAULT_SWARMCTL_URL = "http://127.0.0.1:8770"
+
+
+def _swarmctl_base_url(url: Optional[str]) -> str:
+    return url or os.environ.get("DECNET_SWARMCTL_URL", _DEFAULT_SWARMCTL_URL)
+
+
+def _http_request(method: str, url: str, *, json_body: Optional[dict] = None, timeout: float = 30.0):
+    """Tiny sync wrapper around httpx; avoids leaking async into the CLI."""
+    import httpx
+    try:
+        resp = httpx.request(method, url, json=json_body, timeout=timeout)
+    except httpx.HTTPError as exc:
+        console.print(f"[red]Could not reach swarm controller at {url}: {exc}[/]")
+        console.print("[dim]Is `decnet swarmctl` running?[/]")
+        raise typer.Exit(2)
+    if resp.status_code >= 400:
+        try:
+            detail = resp.json().get("detail", resp.text)
+        except Exception:  # nosec B110
+            detail = resp.text
+        console.print(f"[red]{method} {url} failed: {resp.status_code} — {detail}[/]")
+        raise typer.Exit(1)
+    return resp
--- a/decnet/cli/web.py
+++ b/decnet/cli/web.py
@@ -0,0 +1,153 @@
+from __future__ import annotations
+
+import typer
+
+from decnet.env import DECNET_API_HOST, DECNET_API_PORT, DECNET_WEB_HOST, DECNET_WEB_PORT
+
+from . import utils as _utils
+from .utils import console, log
+
+
+def _proxy_target(api_host: str) -> str:
+    """Resolve the host the web proxy should connect to.
+
+    The API binds at ``DECNET_API_HOST``; when that's a wildcard
+    (``0.0.0.0`` / ``::``) we still connect over loopback because the
+    web and API run in the same host. When the operator binds the API
+    to a specific address (e.g. a Tailscale IP), the API is *only*
+    reachable there — loopback is closed — so the proxy must follow.
+    """
+    wildcard = {"0.0.0.0", "::", ""}  # nosec B104 — comparison only
+    if api_host in wildcard:
+        return "127.0.0.1"
+    return api_host
+
+
+def register(app: typer.Typer) -> None:
+    @app.command(name="web")
+    def serve_web(
+        web_port: int = typer.Option(DECNET_WEB_PORT, "--web-port", help="Port to serve the DECNET Web Dashboard"),
+        host: str = typer.Option(DECNET_WEB_HOST, "--host", help="Host IP to serve the Web Dashboard"),
+        api_host: str = typer.Option(DECNET_API_HOST, "--api-host", help="Host the DECNET API is listening on (loopback for wildcard binds)"),
+        api_port: int = typer.Option(DECNET_API_PORT, "--api-port", help="Port the DECNET API is listening on"),
+        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
+    ) -> None:
+        """Serve the DECNET Web Dashboard frontend.
+
+        Proxies /api/* requests to the API server so the frontend can use
+        relative URLs (/api/v1/...) with no CORS configuration required.
+        """
+        import http.client
+        import http.server
+        import os
+        import socketserver
+        from pathlib import Path
+
+        dist_dir = Path(__file__).resolve().parent.parent.parent / "decnet_web" / "dist"
+
+        if not dist_dir.exists():
+            console.print(f"[red]Frontend build not found at {dist_dir}. Make sure you run 'npm run build' inside 'decnet_web'.[/]")
+            raise typer.Exit(1)
+
+        _api_target = _proxy_target(api_host)
+
+        if daemon:
+            log.info(
+                "web daemonizing host=%s port=%d api_target=%s:%d",
+                host, web_port, _api_target, api_port,
+            )
+            _utils._daemonize()
+
+        _api_port = api_port
+
+        class SPAHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
+            def do_GET(self):
+                if self.path.startswith("/api/"):
+                    self._proxy("GET")
+                    return
+                path = self.translate_path(self.path)
+                if not Path(path).exists() or Path(path).is_dir():
+                    self.path = "/index.html"
+                return super().do_GET()
+
+            def do_POST(self):
+                if self.path.startswith("/api/"):
+                    self._proxy("POST")
+                    return
+                self.send_error(405)
+
+            def do_PUT(self):
+                if self.path.startswith("/api/"):
+                    self._proxy("PUT")
+                    return
+                self.send_error(405)
+
+            def do_DELETE(self):
+                if self.path.startswith("/api/"):
+                    self._proxy("DELETE")
+                    return
+                self.send_error(405)
+
+            def do_PATCH(self):
+                if self.path.startswith("/api/"):
+                    self._proxy("PATCH")
+                    return
+                self.send_error(405)
+
+            def do_OPTIONS(self):
+                if self.path.startswith("/api/"):
+                    self._proxy("OPTIONS")
+                    return
+                self.send_error(405)
+
+            def _proxy(self, method: str) -> None:
+                content_length = int(self.headers.get("Content-Length", 0))
+                body = self.rfile.read(content_length) if content_length else None
+
+                forward = {k: v for k, v in self.headers.items()
+                           if k.lower() not in ("host", "connection")}
+
+                try:
+                    conn = http.client.HTTPConnection(_api_target, _api_port, timeout=120)
+                    conn.request(method, self.path, body=body, headers=forward)
+                    resp = conn.getresponse()
+
+                    self.send_response(resp.status)
+                    for key, val in resp.getheaders():
+                        if key.lower() not in ("connection", "transfer-encoding"):
+                            self.send_header(key, val)
+                    self.end_headers()
+
+                    content_type = resp.getheader("Content-Type", "")
+                    if "text/event-stream" in content_type:
+                        conn.sock.settimeout(None)
+
+                    _read = getattr(resp, "read1", resp.read)
+                    while True:
+                        chunk = _read(4096)
+                        if not chunk:
+                            break
+                        self.wfile.write(chunk)
+                        self.wfile.flush()
+                except Exception as exc:
+                    log.warning("web proxy error %s %s: %s", method, self.path, exc)
+                    self.send_error(502, f"API proxy error: {exc}")
+                finally:
+                    try:
+                        conn.close()
+                    except Exception:  # nosec B110 — best-effort conn cleanup
+                        pass
+
+            def log_message(self, fmt: str, *args: object) -> None:
+                log.debug("web %s", fmt % args)
+
+        os.chdir(dist_dir)
+
+        socketserver.TCPServer.allow_reuse_address = True
+        with socketserver.ThreadingTCPServer((host, web_port), SPAHTTPRequestHandler) as httpd:
+            console.print(f"[green]Serving DECNET Web Dashboard on http://{host}:{web_port}[/]")
+            console.print(f"[dim]Proxying /api/* → http://{_api_target}:{_api_port}[/]")
+            try:
+                httpd.serve_forever()
+            except KeyboardInterrupt:
+                console.print("\n[dim]Shutting down dashboard server.[/]")
--- a/decnet/cli/webhook.py
+++ b/decnet/cli/webhook.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+import typer
+
+from . import utils as _utils
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command(name="webhook")
+    def webhook_cmd(
+        daemon: bool = typer.Option(
+            False, "--daemon", "-d", help="Detach to background as a daemon process"
+        ),
+    ) -> None:
+        """Run the webhook dispatcher — bus consumer → external HTTP egress."""
+        import asyncio
+        from decnet.web.dependencies import repo
+        from decnet.webhook import webhook_worker
+
+        if daemon:
+            log.info("webhook daemonizing")
+            _utils._daemonize()
+
+        log.info("webhook starting")
+        console.print("[bold cyan]Webhook dispatcher starting[/]")
+
+        async def _run() -> None:
+            await repo.initialize()
+            await webhook_worker(repo)
+
+        try:
+            asyncio.run(_run())
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Webhook worker stopped.[/]")
--- a/decnet/cli/workers.py
+++ b/decnet/cli/workers.py
@@ -0,0 +1,297 @@
+from __future__ import annotations
+
+from typing import Optional
+
+import typer
+
+from decnet.env import DECNET_INGEST_LOG_FILE
+
+from . import utils as _utils
+from .utils import console, log
+
+
+def register(app: typer.Typer) -> None:
+    @app.command()
+    def probe(
+        log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path for RFC 5424 syslog + .json output (reads attackers from .json, writes results to both)"),
+        interval: int = typer.Option(300, "--interval", "-i", help="Seconds between probe cycles (default: 300)"),
+        timeout: float = typer.Option(5.0, "--timeout", help="Per-probe TCP timeout in seconds"),
+        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background (used by deploy, no console output)"),
+    ) -> None:
+        """Fingerprint attackers (JARM + HASSH + TCP/IP stack) discovered in the log stream."""
+        import asyncio
+        from decnet.prober import prober_worker
+
+        if daemon:
+            log.info("probe daemonizing log_file=%s interval=%d", log_file, interval)
+            _utils._daemonize()
+            asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
+            return
+
+        log.info("probe command invoked log_file=%s interval=%d", log_file, interval)
+        console.print(f"[bold cyan]DECNET-PROBER[/] watching {log_file} for attackers (interval: {interval}s)")
+        console.print("[dim]Press Ctrl+C to stop[/]")
+        try:
+            asyncio.run(prober_worker(log_file, interval=interval, timeout=timeout))
+        except KeyboardInterrupt:
+            console.print("\n[yellow]DECNET-PROBER stopped.[/]")
+
+    @app.command()
+    def collect(
+        log_file: str = typer.Option(DECNET_INGEST_LOG_FILE, "--log-file", "-f", help="Path to write RFC 5424 syslog lines and .json records"),
+        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
+    ) -> None:
+        """Stream Docker logs from all running decky service containers to a log file."""
+        import asyncio
+        from decnet.collector import log_collector_worker
+
+        if daemon:
+            log.info("collect daemonizing log_file=%s", log_file)
+            _utils._daemonize()
+
+        log.info("collect command invoked log_file=%s", log_file)
+        console.print(f"[bold cyan]Collector starting[/] → {log_file}")
+        asyncio.run(log_collector_worker(log_file))
+
+    @app.command()
+    def mutate(
+        watch: bool = typer.Option(False, "--watch", "-w", help="Run continuously and mutate deckies according to their interval"),
+        decky_name: Optional[str] = typer.Option(None, "--decky", help="Force mutate a specific decky immediately"),
+        force_all: bool = typer.Option(False, "--all", help="Force mutate all deckies immediately"),
+        daemon: bool = typer.Option(False, "--daemon", "-d", help="Detach to background as a daemon process"),
+    ) -> None:
+        """Manually trigger or continuously watch for decky mutation."""
+        import asyncio
+        from decnet.mutator import mutate_decky, mutate_all, run_watch_loop
+        from decnet.web.dependencies import repo
+
+        if daemon:
+            log.info("mutate daemonizing watch=%s", watch)
+            _utils._daemonize()
+
+        async def _run() -> None:
+            await repo.initialize()
+            if watch:
+                await run_watch_loop(repo)
+            elif decky_name:
+                await mutate_decky(decky_name, repo)
+            elif force_all:
+                await mutate_all(force=True, repo=repo)
+            else:
+                await mutate_all(force=False, repo=repo)
+
+        asyncio.run(_run())
+
+    @app.command(name="enrich")
+    def enrich(
+        poll_interval_secs: float = typer.Option(
+            60.0, "--poll-interval", "-i",
+            help="Slow-tick fallback when the bus is idle or unavailable (seconds)",
+        ),
+        ttl_hours: int = typer.Option(
+            24, "--ttl-hours",
+            help="Cache lifetime per attacker IP — re-firings inside the window short-circuit before any HTTP egress",
+        ),
+        daemon: bool = typer.Option(
+            False, "--daemon", "-d",
+            help="Detach to background as a daemon process",
+        ),
+    ) -> None:
+        """Threat-intel enrichment worker — fan out per attacker IP across
+        configured providers (GreyNoise, AbuseIPDB, abuse.ch Feodo Tracker
+        + ThreatFox), cache the verdict in ``attacker_intel``, and publish
+        ``attacker.intel.enriched`` for SIEM-bound webhook consumers.
+        """
+        import asyncio
+        from decnet.intel.worker import run_intel_loop
+        from decnet.web.dependencies import repo
+
+        if daemon:
+            log.info(
+                "enrich daemonizing poll=%s ttl_hours=%d",
+                poll_interval_secs, ttl_hours,
+            )
+            _utils._daemonize()
+
+        log.info(
+            "enrich command invoked poll=%s ttl_hours=%d",
+            poll_interval_secs, ttl_hours,
+        )
+        console.print(
+            f"[bold cyan]Intel enrichment starting[/] "
+            f"poll={poll_interval_secs}s ttl={ttl_hours}h"
+        )
+        console.print("[dim]Press Ctrl+C to stop[/]")
+
+        async def _run() -> None:
+            await repo.initialize()
+            await run_intel_loop(
+                repo,
+                poll_interval_secs=poll_interval_secs,
+                ttl_hours=ttl_hours,
+            )
+
+        try:
+            asyncio.run(_run())
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Intel enrichment stopped.[/]")
+
+    @app.command(name="reuse-correlate")
+    def reuse_correlate(
+        min_targets: int = typer.Option(
+            2, "--min-targets", "-m",
+            help="Minimum distinct (decky, service) targets a secret must hit before a CredentialReuse row is persisted",
+        ),
+        poll_interval_secs: float = typer.Option(
+            60.0, "--poll-interval", "-i",
+            help="Slow-tick fallback when the bus is idle or unavailable (seconds)",
+        ),
+        daemon: bool = typer.Option(
+            False, "--daemon", "-d",
+            help="Detach to background as a daemon process",
+        ),
+    ) -> None:
+        """Long-running credential-reuse correlator.
+
+        Watches the bus for ``credential.captured`` and ``attacker.observed``
+        events, re-runs the reuse pass on each wake, and publishes
+        ``credential.reuse.detected`` for every new or grown
+        ``CredentialReuse`` row.
+        """
+        import asyncio
+        from decnet.correlation.reuse_worker import run_reuse_loop
+        from decnet.web.dependencies import repo
+
+        if daemon:
+            log.info(
+                "reuse-correlate daemonizing min_targets=%d poll=%s",
+                min_targets, poll_interval_secs,
+            )
+            _utils._daemonize()
+
+        log.info(
+            "reuse-correlate command invoked min_targets=%d poll=%s",
+            min_targets, poll_interval_secs,
+        )
+        console.print(
+            f"[bold cyan]Reuse correlator starting[/] "
+            f"min_targets={min_targets} poll={poll_interval_secs}s"
+        )
+        console.print("[dim]Press Ctrl+C to stop[/]")
+
+        async def _run() -> None:
+            await repo.initialize()
+            await run_reuse_loop(
+                repo,
+                poll_interval_secs=poll_interval_secs,
+                min_targets=min_targets,
+            )
+
+        try:
+            asyncio.run(_run())
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Reuse correlator stopped.[/]")
+
+    @app.command(name="clusterer")
+    def clusterer(
+        poll_interval_secs: float = typer.Option(
+            60.0, "--poll-interval", "-i",
+            help="Slow-tick fallback when the bus is idle or unavailable (seconds)",
+        ),
+        daemon: bool = typer.Option(
+            False, "--daemon", "-d",
+            help="Detach to background as a daemon process",
+        ),
+    ) -> None:
+        """Identity-resolution clusterer.
+
+        Bus-woken on ``attacker.observed`` and ``attacker.scored``;
+        builds a similarity graph over observations, runs
+        connected-components, writes ``attacker_identities`` rows, and
+        publishes ``identity.formed`` / ``identity.observation.linked``
+        / ``identity.merged`` / ``identity.unmerged``.
+        """
+        import asyncio
+        from decnet.cli.gating import _require_master_mode
+        from decnet.clustering.worker import run_clusterer_loop
+        from decnet.web.dependencies import repo
+
+        _require_master_mode("clusterer")
+
+        if daemon:
+            log.info("clusterer daemonizing poll=%s", poll_interval_secs)
+            _utils._daemonize()
+
+        log.info("clusterer command invoked poll=%s", poll_interval_secs)
+        console.print(
+            f"[bold cyan]Identity clusterer starting[/] "
+            f"poll={poll_interval_secs}s"
+        )
+        console.print("[dim]Press Ctrl+C to stop[/]")
+
+        async def _run() -> None:
+            await repo.initialize()
+            await run_clusterer_loop(
+                repo, poll_interval_secs=poll_interval_secs,
+            )
+
+        try:
+            asyncio.run(_run())
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Identity clusterer stopped.[/]")
+
+    @app.command(name="campaign-clusterer")
+    def campaign_clusterer(
+        poll_interval_secs: float = typer.Option(
+            60.0, "--poll-interval", "-i",
+            help="Slow-tick fallback when the bus is idle or unavailable (seconds)",
+        ),
+        daemon: bool = typer.Option(
+            False, "--daemon", "-d",
+            help="Detach to background as a daemon process",
+        ),
+    ) -> None:
+        """Campaign clusterer — groups identities into operations.
+
+        Bus-woken on ``identity.>`` (any identity-layer change is
+        potential input); reads ``AttackerIdentity`` rows, runs
+        connected-components over the campaign-level similarity graph
+        (phase-handoff / shared-infra / temporal-overlap / cohort),
+        writes ``campaigns`` rows + sets ``attacker_identities.campaign_id``,
+        and publishes ``campaign.formed`` / ``campaign.identity.assigned``
+        / ``campaign.merged`` / ``campaign.unmerged`` plus the cross-family
+        ``identity.campaign.assigned`` so identity-side subscribers see
+        the badge update.
+        """
+        import asyncio
+        from decnet.cli.gating import _require_master_mode
+        from decnet.clustering.campaign.worker import (
+            run_campaign_clusterer_loop,
+        )
+        from decnet.web.dependencies import repo
+
+        _require_master_mode("campaign-clusterer")
+
+        if daemon:
+            log.info("campaign-clusterer daemonizing poll=%s", poll_interval_secs)
+            _utils._daemonize()
+
+        log.info(
+            "campaign-clusterer command invoked poll=%s", poll_interval_secs,
+        )
+        console.print(
+            f"[bold cyan]Campaign clusterer starting[/] "
+            f"poll={poll_interval_secs}s"
+        )
+        console.print("[dim]Press Ctrl+C to stop[/]")
+
+        async def _run() -> None:
+            await repo.initialize()
+            await run_campaign_clusterer_loop(
+                repo, poll_interval_secs=poll_interval_secs,
+            )
+
+        try:
+            asyncio.run(_run())
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Campaign clusterer stopped.[/]")
--- a/decnet/clustering/init.py
+++ b/decnet/clustering/init.py
@@ -0,0 +1 @@
+"""Campaign clustering — see development/CAMPAIGN_CLUSTERING.md."""
--- a/decnet/clustering/base.py
+++ b/decnet/clustering/base.py
@@ -0,0 +1,83 @@
+"""Identity-resolution clusterer protocol.
+
+Each concrete clusterer (``decnet.clustering.impl.connected_components``,
+and any future variant) implements this. Callers must obtain the active
+clusterer via :func:`decnet.clustering.factory.get_clusterer` — never
+instantiate a concrete class directly.
+
+The clusterer mirrors the provider-subpackage convention used by
+:mod:`decnet.bus` and :mod:`decnet.web.db`: ``base.py`` defines the
+protocol, ``factory.py`` dispatches on ``DECNET_CLUSTERER_TYPE``, and
+``impl/`` holds concrete implementations.
+
+Distinct from the ``tests/factories/campaign_factory.py`` namespace —
+that's the synthetic-data DSL used by the fixture suite. The clusterer
+here is the production worker that the fixture suite *gates*.
+"""
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any
+
+from decnet.web.db.repository import BaseRepository
+
+
+@dataclass
+class ClusterResult:
+    """Side-effects produced by a single clusterer ``tick``.
+
+    The worker shell consumes these to publish on the bus
+    (``identity.formed`` / ``identity.observation.linked`` /
+    ``identity.merged`` / ``identity.unmerged``). The clusterer itself
+    has already committed any DB writes by the time it returns this —
+    losing a publish is at most a few seconds of UI latency.
+    """
+
+    identities_formed: list[dict[str, Any]] = field(default_factory=list)
+    """One dict per newly created identity. Shape:
+    ``{"identity_uuid": str, "observation_uuids": [str, ...]}``."""
+
+    observations_linked: list[dict[str, Any]] = field(default_factory=list)
+    """One dict per observation attached to an existing identity. Shape:
+    ``{"identity_uuid": str, "observation_uuid": str}``."""
+
+    identities_merged: list[dict[str, Any]] = field(default_factory=list)
+    """One dict per merge. Shape: ``{"winner_uuid": str,
+    "loser_uuid": str}``."""
+
+    identities_unmerged: list[dict[str, Any]] = field(default_factory=list)
+    """One dict per revoked merge (contradicting evidence re-split a
+    previously-merged pair). Shape:
+    ``{"resurrected_uuid": str, "former_winner_uuid": str}``.
+
+    Reserved for the revocable-merge work; the skeleton clusterer never
+    produces these. Subscribers on ``identity.>`` should still handle
+    them from day one — see ``identity.unmerged`` in
+    :mod:`decnet.bus.topics`.
+    """
+
+
+class Clusterer(ABC):
+    """Abstract identity-resolution clusterer.
+
+    Single-method contract: ``tick`` reads pending observations from the
+    repo, runs a clustering pass, commits ``attacker_identities`` rows +
+    sets ``attackers.identity_id``, and returns a :class:`ClusterResult`
+    summarising the side-effects so the worker shell can publish.
+
+    Implementations MUST NOT raise from ``tick``: a single bad pass
+    cannot be allowed to crash the worker. Internal failures should be
+    logged and the method should return an empty :class:`ClusterResult`.
+    """
+
+    #: Short tag — surfaces in logs and in
+    #: ``DECNET_CLUSTERER_TYPE`` for factory dispatch.
+    name: str
+
+    @abstractmethod
+    async def tick(self, repo: BaseRepository) -> ClusterResult:
+        """Run a single clustering pass. See class docstring."""
+
+
+__all__ = ["Clusterer", "ClusterResult"]
--- a/decnet/clustering/campaign/init.py
+++ b/decnet/clustering/campaign/init.py
@@ -0,0 +1,5 @@
+"""Campaign clusterer — groups resolved identities into operations.
+
+The layer above identity resolution. See
+``development/CAMPAIGN_CLUSTERING.md`` for the signal taxonomy.
+"""
--- a/decnet/clustering/campaign/base.py
+++ b/decnet/clustering/campaign/base.py
@@ -0,0 +1,66 @@
+"""Campaign clusterer protocol — layer above identity resolution.
+
+Mirrors :mod:`decnet.clustering.base` for the layer above. Each concrete
+campaign clusterer implements :class:`CampaignClusterer`; callers obtain
+the active instance via
+:func:`decnet.clustering.campaign.factory.get_campaign_clusterer`.
+
+The result shape parallels :class:`ClusterResult` but speaks campaign
+vocabulary: campaigns formed, identities assigned, campaigns merged,
+campaigns unmerged.
+"""
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any
+
+from decnet.web.db.repository import BaseRepository
+
+
+@dataclass
+class CampaignClusterResult:
+    """Side-effects produced by a single campaign-clusterer ``tick``.
+
+    Consumed by the worker shell to publish on the bus
+    (``campaign.formed`` / ``campaign.identity.assigned`` /
+    ``campaign.merged`` / ``campaign.unmerged`` plus the cross-family
+    ``identity.campaign.assigned``).  DB writes are already committed
+    by the time this returns.
+    """
+
+    campaigns_formed: list[dict[str, Any]] = field(default_factory=list)
+    """``{"campaign_uuid": str, "identity_uuids": [str, ...]}``."""
+
+    identities_assigned: list[dict[str, Any]] = field(default_factory=list)
+    """``{"campaign_uuid": str, "identity_uuid": str,
+    "prior_campaign_uuid": Optional[str]}``."""
+
+    campaigns_merged: list[dict[str, Any]] = field(default_factory=list)
+    """``{"winner_uuid": str, "loser_uuid": str}``."""
+
+    campaigns_unmerged: list[dict[str, Any]] = field(default_factory=list)
+    """``{"resurrected_uuid": str, "former_winner_uuid": str}``."""
+
+
+class CampaignClusterer(ABC):
+    """Abstract campaign clusterer.
+
+    Single-method contract mirroring :class:`Clusterer`: ``tick`` reads
+    identities from the repo, projects them to a campaign-level feature
+    shape, runs a clustering pass, commits ``campaigns`` rows + sets
+    ``attacker_identities.campaign_id``, and returns a
+    :class:`CampaignClusterResult` summarising side-effects.
+
+    Implementations MUST NOT raise from ``tick``: a single bad pass
+    cannot be allowed to crash the worker.
+    """
+
+    name: str
+
+    @abstractmethod
+    async def tick(self, repo: BaseRepository) -> CampaignClusterResult:
+        """Run a single campaign clustering pass."""
+
+
+__all__ = ["CampaignClusterer", "CampaignClusterResult"]
--- a/decnet/clustering/campaign/factory.py
+++ b/decnet/clustering/campaign/factory.py
@@ -0,0 +1,31 @@
+"""Campaign-clusterer factory.
+
+Mirrors :mod:`decnet.clustering.factory` for the campaign layer.
+Configuration knob ``DECNET_CAMPAIGN_CLUSTERER_TYPE``; default
+``"connected_components"``.
+"""
+from __future__ import annotations
+
+import os
+
+from decnet.clustering.campaign.base import CampaignClusterer
+
+_KNOWN: tuple[str, ...] = ("connected_components",)
+_DEFAULT = "connected_components"
+
+
+def get_campaign_clusterer() -> CampaignClusterer:
+    name = os.environ.get(
+        "DECNET_CAMPAIGN_CLUSTERER_TYPE", _DEFAULT,
+    ).strip().lower()
+    if name == "connected_components":
+        from decnet.clustering.campaign.impl.connected_components import (
+            ConnectedComponentsCampaignClusterer,
+        )
+        return ConnectedComponentsCampaignClusterer()
+    raise ValueError(
+        f"Unknown campaign clusterer: {name!r}. Known: {_KNOWN}"
+    )
+
+
+__all__ = ["get_campaign_clusterer"]
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`"""Campaign clustering — see development/CAMPAIGN_CLUSTERING.md."""`